From 208ea5ec95eb88e137451a8e0b658f1df7c1dbca Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Thu, 18 Feb 2021 17:23:17 -0800 Subject: [PATCH 01/31] gold standard initial commit --- .../approved-plans-v1_4/q1/explain.txt | 43 ++ .../approved-plans-v1_4/q1/simplified.txt | 58 ++ .../approved-plans-v1_4/q10/explain.txt | 49 ++ .../approved-plans-v1_4/q10/simplified.txt | 65 ++ .../approved-plans-v1_4/q11/explain.txt | 81 +++ .../approved-plans-v1_4/q11/simplified.txt | 109 ++++ .../approved-plans-v1_4/q12/explain.txt | 24 + .../approved-plans-v1_4/q12/simplified.txt | 34 + .../approved-plans-v1_4/q13/explain.txt | 37 ++ .../approved-plans-v1_4/q13/simplified.txt | 49 ++ .../approved-plans-v1_4/q14a/explain.txt | 192 ++++++ .../approved-plans-v1_4/q14a/simplified.txt | 267 ++++++++ .../approved-plans-v1_4/q14b/explain.txt | 166 +++++ .../approved-plans-v1_4/q14b/simplified.txt | 226 +++++++ .../approved-plans-v1_4/q15/explain.txt | 26 + .../approved-plans-v1_4/q15/simplified.txt | 34 + .../approved-plans-v1_4/q16/explain.txt | 37 ++ .../approved-plans-v1_4/q16/simplified.txt | 51 ++ .../approved-plans-v1_4/q17/explain.txt | 50 ++ .../approved-plans-v1_4/q17/simplified.txt | 66 ++ .../approved-plans-v1_4/q18/explain.txt | 45 ++ .../approved-plans-v1_4/q18/simplified.txt | 59 ++ .../approved-plans-v1_4/q19/explain.txt | 38 ++ .../approved-plans-v1_4/q19/simplified.txt | 50 ++ .../approved-plans-v1_4/q2/explain.txt | 36 ++ .../approved-plans-v1_4/q2/simplified.txt | 52 ++ .../approved-plans-v1_4/q20/explain.txt | 24 + .../approved-plans-v1_4/q20/simplified.txt | 34 + .../approved-plans-v1_4/q21/explain.txt | 27 + .../approved-plans-v1_4/q21/simplified.txt | 35 ++ .../approved-plans-v1_4/q22/explain.txt | 27 + .../approved-plans-v1_4/q22/simplified.txt | 35 ++ .../approved-plans-v1_4/q23a/explain.txt | 89 +++ .../approved-plans-v1_4/q23a/simplified.txt | 122 ++++ .../approved-plans-v1_4/q23b/explain.txt | 101 +++ .../approved-plans-v1_4/q23b/simplified.txt | 138 ++++ .../approved-plans-v1_4/q24a/explain.txt | 82 +++ .../approved-plans-v1_4/q24a/simplified.txt | 111 ++++ .../approved-plans-v1_4/q24b/explain.txt | 82 +++ .../approved-plans-v1_4/q24b/simplified.txt | 111 ++++ .../approved-plans-v1_4/q25/explain.txt | 50 ++ .../approved-plans-v1_4/q25/simplified.txt | 66 ++ .../approved-plans-v1_4/q26/explain.txt | 32 + .../approved-plans-v1_4/q26/simplified.txt | 42 ++ .../approved-plans-v1_4/q27/explain.txt | 33 + .../approved-plans-v1_4/q27/simplified.txt | 43 ++ .../approved-plans-v1_4/q28/explain.txt | 66 ++ .../approved-plans-v1_4/q28/simplified.txt | 95 +++ .../approved-plans-v1_4/q29/explain.txt | 50 ++ .../approved-plans-v1_4/q29/simplified.txt | 66 ++ .../approved-plans-v1_4/q3/explain.txt | 20 + .../approved-plans-v1_4/q3/simplified.txt | 26 + .../approved-plans-v1_4/q30/explain.txt | 52 ++ .../approved-plans-v1_4/q30/simplified.txt | 70 +++ .../approved-plans-v1_4/q31/explain.txt | 100 +++ .../approved-plans-v1_4/q31/simplified.txt | 140 +++++ .../approved-plans-v1_4/q32/explain.txt | 33 + .../approved-plans-v1_4/q32/simplified.txt | 43 ++ .../approved-plans-v1_4/q33/explain.txt | 65 ++ .../approved-plans-v1_4/q33/simplified.txt | 91 +++ .../approved-plans-v1_4/q34/explain.txt | 34 + .../approved-plans-v1_4/q34/simplified.txt | 46 ++ .../approved-plans-v1_4/q35/explain.txt | 49 ++ .../approved-plans-v1_4/q35/simplified.txt | 65 ++ .../approved-plans-v1_4/q36/explain.txt | 31 + .../approved-plans-v1_4/q36/simplified.txt | 43 ++ .../approved-plans-v1_4/q37/explain.txt | 26 + .../approved-plans-v1_4/q37/simplified.txt | 34 + .../approved-plans-v1_4/q38/explain.txt | 55 ++ .../approved-plans-v1_4/q38/simplified.txt | 75 +++ .../approved-plans-v1_4/q39a/explain.txt | 51 ++ .../approved-plans-v1_4/q39a/simplified.txt | 69 ++ .../approved-plans-v1_4/q39b/explain.txt | 51 ++ .../approved-plans-v1_4/q39b/simplified.txt | 69 ++ .../approved-plans-v1_4/q4/explain.txt | 124 ++++ .../approved-plans-v1_4/q4/simplified.txt | 165 +++++ .../approved-plans-v1_4/q40/explain.txt | 32 + .../approved-plans-v1_4/q40/simplified.txt | 42 ++ .../approved-plans-v1_4/q41/explain.txt | 19 + .../approved-plans-v1_4/q41/simplified.txt | 25 + .../approved-plans-v1_4/q42/explain.txt | 20 + .../approved-plans-v1_4/q42/simplified.txt | 26 + .../approved-plans-v1_4/q43/explain.txt | 20 + .../approved-plans-v1_4/q43/simplified.txt | 26 + .../approved-plans-v1_4/q44/explain.txt | 50 ++ .../approved-plans-v1_4/q44/simplified.txt | 72 +++ .../approved-plans-v1_4/q45/explain.txt | 39 ++ .../approved-plans-v1_4/q45/simplified.txt | 51 ++ .../approved-plans-v1_4/q46/explain.txt | 41 ++ .../approved-plans-v1_4/q46/simplified.txt | 54 ++ .../approved-plans-v1_4/q47/explain.txt | 51 ++ .../approved-plans-v1_4/q47/simplified.txt | 77 +++ .../approved-plans-v1_4/q48/explain.txt | 31 + .../approved-plans-v1_4/q48/simplified.txt | 41 ++ .../approved-plans-v1_4/q49/explain.txt | 75 +++ .../approved-plans-v1_4/q49/simplified.txt | 115 ++++ .../approved-plans-v1_4/q5/explain.txt | 76 +++ .../approved-plans-v1_4/q5/simplified.txt | 110 ++++ .../approved-plans-v1_4/q50/explain.txt | 32 + .../approved-plans-v1_4/q50/simplified.txt | 42 ++ .../approved-plans-v1_4/q51/explain.txt | 41 ++ .../approved-plans-v1_4/q51/simplified.txt | 67 ++ .../approved-plans-v1_4/q52/explain.txt | 20 + .../approved-plans-v1_4/q52/simplified.txt | 26 + .../approved-plans-v1_4/q53/explain.txt | 31 + .../approved-plans-v1_4/q53/simplified.txt | 43 ++ .../approved-plans-v1_4/q54/explain.txt | 88 +++ .../approved-plans-v1_4/q54/simplified.txt | 123 ++++ .../approved-plans-v1_4/q55/explain.txt | 20 + .../approved-plans-v1_4/q55/simplified.txt | 26 + .../approved-plans-v1_4/q56/explain.txt | 65 ++ .../approved-plans-v1_4/q56/simplified.txt | 91 +++ .../approved-plans-v1_4/q57/explain.txt | 51 ++ .../approved-plans-v1_4/q57/simplified.txt | 77 +++ .../approved-plans-v1_4/q58/explain.txt | 101 +++ .../approved-plans-v1_4/q58/simplified.txt | 133 ++++ .../approved-plans-v1_4/q59/explain.txt | 43 ++ .../approved-plans-v1_4/q59/simplified.txt | 58 ++ .../approved-plans-v1_4/q6/explain.txt | 58 ++ .../approved-plans-v1_4/q6/simplified.txt | 78 +++ .../approved-plans-v1_4/q60/explain.txt | 65 ++ .../approved-plans-v1_4/q60/simplified.txt | 91 +++ .../approved-plans-v1_4/q61/explain.txt | 68 ++ .../approved-plans-v1_4/q61/simplified.txt | 92 +++ .../approved-plans-v1_4/q62/explain.txt | 32 + .../approved-plans-v1_4/q62/simplified.txt | 42 ++ .../approved-plans-v1_4/q63/explain.txt | 31 + .../approved-plans-v1_4/q63/simplified.txt | 43 ++ .../approved-plans-v1_4/q64/explain.txt | 170 +++++ .../approved-plans-v1_4/q64/simplified.txt | 229 +++++++ .../approved-plans-v1_4/q65/explain.txt | 42 ++ .../approved-plans-v1_4/q65/simplified.txt | 57 ++ .../approved-plans-v1_4/q66/explain.txt | 54 ++ .../approved-plans-v1_4/q66/simplified.txt | 75 +++ .../approved-plans-v1_4/q67/explain.txt | 31 + .../approved-plans-v1_4/q67/simplified.txt | 43 ++ .../approved-plans-v1_4/q68/explain.txt | 41 ++ .../approved-plans-v1_4/q68/simplified.txt | 54 ++ .../approved-plans-v1_4/q69/explain.txt | 48 ++ .../approved-plans-v1_4/q69/simplified.txt | 64 ++ .../approved-plans-v1_4/q7/explain.txt | 32 + .../approved-plans-v1_4/q7/simplified.txt | 42 ++ .../approved-plans-v1_4/q70/explain.txt | 46 ++ .../approved-plans-v1_4/q70/simplified.txt | 65 ++ .../approved-plans-v1_4/q71/explain.txt | 40 ++ .../approved-plans-v1_4/q71/simplified.txt | 56 ++ .../approved-plans-v1_4/q72/explain.txt | 68 ++ .../approved-plans-v1_4/q72/simplified.txt | 90 +++ .../approved-plans-v1_4/q73/explain.txt | 34 + .../approved-plans-v1_4/q73/simplified.txt | 46 ++ .../approved-plans-v1_4/q74/explain.txt | 80 +++ .../approved-plans-v1_4/q74/simplified.txt | 108 ++++ .../approved-plans-v1_4/q75/explain.txt | 117 ++++ .../approved-plans-v1_4/q75/simplified.txt | 167 +++++ .../approved-plans-v1_4/q76/explain.txt | 39 ++ .../approved-plans-v1_4/q76/simplified.txt | 53 ++ .../approved-plans-v1_4/q77/explain.txt | 100 +++ .../approved-plans-v1_4/q77/simplified.txt | 141 +++++ .../approved-plans-v1_4/q78/explain.txt | 61 ++ .../approved-plans-v1_4/q78/simplified.txt | 81 +++ .../approved-plans-v1_4/q79/explain.txt | 32 + .../approved-plans-v1_4/q79/simplified.txt | 42 ++ .../approved-plans-v1_4/q8/explain.txt | 45 ++ .../approved-plans-v1_4/q8/simplified.txt | 61 ++ .../approved-plans-v1_4/q80/explain.txt | 97 +++ .../approved-plans-v1_4/q80/simplified.txt | 133 ++++ .../approved-plans-v1_4/q81/explain.txt | 52 ++ .../approved-plans-v1_4/q81/simplified.txt | 70 +++ .../approved-plans-v1_4/q82/explain.txt | 26 + .../approved-plans-v1_4/q82/simplified.txt | 34 + .../approved-plans-v1_4/q83/explain.txt | 69 ++ .../approved-plans-v1_4/q83/simplified.txt | 94 +++ .../approved-plans-v1_4/q84/explain.txt | 35 ++ .../approved-plans-v1_4/q84/simplified.txt | 45 ++ .../approved-plans-v1_4/q85/explain.txt | 50 ++ .../approved-plans-v1_4/q85/simplified.txt | 66 ++ .../approved-plans-v1_4/q86/explain.txt | 25 + .../approved-plans-v1_4/q86/simplified.txt | 35 ++ .../approved-plans-v1_4/q87/explain.txt | 54 ++ .../approved-plans-v1_4/q87/simplified.txt | 74 +++ .../approved-plans-v1_4/q88/explain.txt | 165 +++++ .../approved-plans-v1_4/q88/simplified.txt | 222 +++++++ .../approved-plans-v1_4/q89/explain.txt | 31 + .../approved-plans-v1_4/q89/simplified.txt | 43 ++ .../approved-plans-v1_4/q9/explain.txt | 109 ++++ .../approved-plans-v1_4/q9/simplified.txt | 154 +++++ .../approved-plans-v1_4/q90/explain.txt | 47 ++ .../approved-plans-v1_4/q90/simplified.txt | 64 ++ .../approved-plans-v1_4/q91/explain.txt | 45 ++ .../approved-plans-v1_4/q91/simplified.txt | 61 ++ .../approved-plans-v1_4/q92/explain.txt | 33 + .../approved-plans-v1_4/q92/simplified.txt | 44 ++ .../approved-plans-v1_4/q93/explain.txt | 18 + .../approved-plans-v1_4/q93/simplified.txt | 24 + .../approved-plans-v1_4/q94/explain.txt | 37 ++ .../approved-plans-v1_4/q94/simplified.txt | 51 ++ .../approved-plans-v1_4/q95/explain.txt | 54 ++ .../approved-plans-v1_4/q95/simplified.txt | 73 +++ .../approved-plans-v1_4/q96/explain.txt | 26 + .../approved-plans-v1_4/q96/simplified.txt | 34 + .../approved-plans-v1_4/q97/explain.txt | 32 + .../approved-plans-v1_4/q97/simplified.txt | 48 ++ .../approved-plans-v1_4/q98/explain.txt | 26 + .../approved-plans-v1_4/q98/simplified.txt | 38 ++ .../approved-plans-v1_4/q99/explain.txt | 32 + .../approved-plans-v1_4/q99/simplified.txt | 42 ++ src/test/resources/tpcds/q1.sql | 19 + src/test/resources/tpcds/q10.sql | 57 ++ src/test/resources/tpcds/q11.sql | 68 ++ src/test/resources/tpcds/q12.sql | 22 + src/test/resources/tpcds/q13.sql | 49 ++ src/test/resources/tpcds/q14a.sql | 120 ++++ src/test/resources/tpcds/q14b.sql | 95 +++ src/test/resources/tpcds/q15.sql | 15 + src/test/resources/tpcds/q16.sql | 23 + src/test/resources/tpcds/q17.sql | 33 + src/test/resources/tpcds/q18.sql | 28 + src/test/resources/tpcds/q19.sql | 19 + src/test/resources/tpcds/q2.sql | 81 +++ src/test/resources/tpcds/q20.sql | 18 + src/test/resources/tpcds/q21.sql | 25 + src/test/resources/tpcds/q22.sql | 14 + src/test/resources/tpcds/q23a.sql | 53 ++ src/test/resources/tpcds/q23b.sql | 68 ++ src/test/resources/tpcds/q24a.sql | 34 + src/test/resources/tpcds/q24b.sql | 34 + src/test/resources/tpcds/q25.sql | 33 + src/test/resources/tpcds/q26.sql | 19 + src/test/resources/tpcds/q27.sql | 21 + src/test/resources/tpcds/q28.sql | 56 ++ src/test/resources/tpcds/q29.sql | 32 + src/test/resources/tpcds/q3.sql | 13 + src/test/resources/tpcds/q30.sql | 35 ++ src/test/resources/tpcds/q31.sql | 60 ++ src/test/resources/tpcds/q32.sql | 15 + src/test/resources/tpcds/q33.sql | 65 ++ src/test/resources/tpcds/q34.sql | 32 + src/test/resources/tpcds/q35.sql | 46 ++ src/test/resources/tpcds/q36.sql | 26 + src/test/resources/tpcds/q37.sql | 15 + src/test/resources/tpcds/q38.sql | 30 + src/test/resources/tpcds/q39a.sql | 47 ++ src/test/resources/tpcds/q39b.sql | 48 ++ src/test/resources/tpcds/q4.sql | 120 ++++ src/test/resources/tpcds/q40.sql | 25 + src/test/resources/tpcds/q41.sql | 49 ++ src/test/resources/tpcds/q42.sql | 18 + src/test/resources/tpcds/q43.sql | 33 + src/test/resources/tpcds/q44.sql | 46 ++ src/test/resources/tpcds/q45.sql | 21 + src/test/resources/tpcds/q46.sql | 32 + src/test/resources/tpcds/q47.sql | 63 ++ src/test/resources/tpcds/q48.sql | 63 ++ src/test/resources/tpcds/q49.sql | 126 ++++ src/test/resources/tpcds/q5.sql | 131 ++++ src/test/resources/tpcds/q50.sql | 47 ++ src/test/resources/tpcds/q51.sql | 55 ++ src/test/resources/tpcds/q52.sql | 14 + src/test/resources/tpcds/q53.sql | 30 + src/test/resources/tpcds/q54.sql | 61 ++ src/test/resources/tpcds/q55.sql | 13 + src/test/resources/tpcds/q56.sql | 65 ++ src/test/resources/tpcds/q57.sql | 56 ++ src/test/resources/tpcds/q58.sql | 59 ++ src/test/resources/tpcds/q59.sql | 75 +++ src/test/resources/tpcds/q6.sql | 21 + src/test/resources/tpcds/q60.sql | 62 ++ src/test/resources/tpcds/q61.sql | 33 + src/test/resources/tpcds/q62.sql | 35 ++ src/test/resources/tpcds/q63.sql | 31 + src/test/resources/tpcds/q64.sql | 92 +++ src/test/resources/tpcds/q65.sql | 33 + src/test/resources/tpcds/q66.sql | 240 +++++++ src/test/resources/tpcds/q67.sql | 38 ++ src/test/resources/tpcds/q68.sql | 34 + src/test/resources/tpcds/q69.sql | 38 ++ src/test/resources/tpcds/q7.sql | 19 + src/test/resources/tpcds/q70.sql | 38 ++ src/test/resources/tpcds/q71.sql | 44 ++ src/test/resources/tpcds/q72.sql | 33 + src/test/resources/tpcds/q73.sql | 30 + src/test/resources/tpcds/q74.sql | 58 ++ src/test/resources/tpcds/q75.sql | 76 +++ src/test/resources/tpcds/q76.sql | 47 ++ src/test/resources/tpcds/q77.sql | 100 +++ src/test/resources/tpcds/q78.sql | 64 ++ src/test/resources/tpcds/q79.sql | 27 + src/test/resources/tpcds/q8.sql | 87 +++ src/test/resources/tpcds/q80.sql | 94 +++ src/test/resources/tpcds/q81.sql | 38 ++ src/test/resources/tpcds/q82.sql | 15 + src/test/resources/tpcds/q83.sql | 56 ++ src/test/resources/tpcds/q84.sql | 19 + src/test/resources/tpcds/q85.sql | 82 +++ src/test/resources/tpcds/q86.sql | 24 + src/test/resources/tpcds/q87.sql | 28 + src/test/resources/tpcds/q88.sql | 122 ++++ src/test/resources/tpcds/q89.sql | 30 + src/test/resources/tpcds/q9.sql | 48 ++ src/test/resources/tpcds/q90.sql | 19 + src/test/resources/tpcds/q91.sql | 23 + src/test/resources/tpcds/q92.sql | 16 + src/test/resources/tpcds/q93.sql | 19 + src/test/resources/tpcds/q94.sql | 23 + src/test/resources/tpcds/q95.sql | 29 + src/test/resources/tpcds/q96.sql | 11 + src/test/resources/tpcds/q97.sql | 30 + src/test/resources/tpcds/q98.sql | 21 + src/test/resources/tpcds/q99.sql | 34 + .../goldstandard/PlanStabilitySuite.scala | 277 +++++++++ .../hyperspace/goldstandard/TPCDSBase.scala | 588 ++++++++++++++++++ 311 files changed, 18665 insertions(+) create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt create mode 100644 src/test/resources/tpcds/q1.sql create mode 100644 src/test/resources/tpcds/q10.sql create mode 100644 src/test/resources/tpcds/q11.sql create mode 100644 src/test/resources/tpcds/q12.sql create mode 100644 src/test/resources/tpcds/q13.sql create mode 100644 src/test/resources/tpcds/q14a.sql create mode 100644 src/test/resources/tpcds/q14b.sql create mode 100644 src/test/resources/tpcds/q15.sql create mode 100644 src/test/resources/tpcds/q16.sql create mode 100644 src/test/resources/tpcds/q17.sql create mode 100644 src/test/resources/tpcds/q18.sql create mode 100644 src/test/resources/tpcds/q19.sql create mode 100644 src/test/resources/tpcds/q2.sql create mode 100644 src/test/resources/tpcds/q20.sql create mode 100644 src/test/resources/tpcds/q21.sql create mode 100644 src/test/resources/tpcds/q22.sql create mode 100644 src/test/resources/tpcds/q23a.sql create mode 100644 src/test/resources/tpcds/q23b.sql create mode 100644 src/test/resources/tpcds/q24a.sql create mode 100644 src/test/resources/tpcds/q24b.sql create mode 100644 src/test/resources/tpcds/q25.sql create mode 100644 src/test/resources/tpcds/q26.sql create mode 100644 src/test/resources/tpcds/q27.sql create mode 100644 src/test/resources/tpcds/q28.sql create mode 100644 src/test/resources/tpcds/q29.sql create mode 100644 src/test/resources/tpcds/q3.sql create mode 100644 src/test/resources/tpcds/q30.sql create mode 100644 src/test/resources/tpcds/q31.sql create mode 100644 src/test/resources/tpcds/q32.sql create mode 100644 src/test/resources/tpcds/q33.sql create mode 100644 src/test/resources/tpcds/q34.sql create mode 100644 src/test/resources/tpcds/q35.sql create mode 100644 src/test/resources/tpcds/q36.sql create mode 100644 src/test/resources/tpcds/q37.sql create mode 100644 src/test/resources/tpcds/q38.sql create mode 100644 src/test/resources/tpcds/q39a.sql create mode 100644 src/test/resources/tpcds/q39b.sql create mode 100644 src/test/resources/tpcds/q4.sql create mode 100644 src/test/resources/tpcds/q40.sql create mode 100644 src/test/resources/tpcds/q41.sql create mode 100644 src/test/resources/tpcds/q42.sql create mode 100644 src/test/resources/tpcds/q43.sql create mode 100644 src/test/resources/tpcds/q44.sql create mode 100644 src/test/resources/tpcds/q45.sql create mode 100644 src/test/resources/tpcds/q46.sql create mode 100644 src/test/resources/tpcds/q47.sql create mode 100644 src/test/resources/tpcds/q48.sql create mode 100644 src/test/resources/tpcds/q49.sql create mode 100644 src/test/resources/tpcds/q5.sql create mode 100644 src/test/resources/tpcds/q50.sql create mode 100644 src/test/resources/tpcds/q51.sql create mode 100644 src/test/resources/tpcds/q52.sql create mode 100644 src/test/resources/tpcds/q53.sql create mode 100644 src/test/resources/tpcds/q54.sql create mode 100644 src/test/resources/tpcds/q55.sql create mode 100644 src/test/resources/tpcds/q56.sql create mode 100644 src/test/resources/tpcds/q57.sql create mode 100644 src/test/resources/tpcds/q58.sql create mode 100644 src/test/resources/tpcds/q59.sql create mode 100644 src/test/resources/tpcds/q6.sql create mode 100644 src/test/resources/tpcds/q60.sql create mode 100644 src/test/resources/tpcds/q61.sql create mode 100644 src/test/resources/tpcds/q62.sql create mode 100644 src/test/resources/tpcds/q63.sql create mode 100644 src/test/resources/tpcds/q64.sql create mode 100644 src/test/resources/tpcds/q65.sql create mode 100644 src/test/resources/tpcds/q66.sql create mode 100644 src/test/resources/tpcds/q67.sql create mode 100644 src/test/resources/tpcds/q68.sql create mode 100644 src/test/resources/tpcds/q69.sql create mode 100644 src/test/resources/tpcds/q7.sql create mode 100644 src/test/resources/tpcds/q70.sql create mode 100644 src/test/resources/tpcds/q71.sql create mode 100644 src/test/resources/tpcds/q72.sql create mode 100644 src/test/resources/tpcds/q73.sql create mode 100644 src/test/resources/tpcds/q74.sql create mode 100644 src/test/resources/tpcds/q75.sql create mode 100644 src/test/resources/tpcds/q76.sql create mode 100644 src/test/resources/tpcds/q77.sql create mode 100644 src/test/resources/tpcds/q78.sql create mode 100644 src/test/resources/tpcds/q79.sql create mode 100644 src/test/resources/tpcds/q8.sql create mode 100644 src/test/resources/tpcds/q80.sql create mode 100644 src/test/resources/tpcds/q81.sql create mode 100644 src/test/resources/tpcds/q82.sql create mode 100644 src/test/resources/tpcds/q83.sql create mode 100644 src/test/resources/tpcds/q84.sql create mode 100644 src/test/resources/tpcds/q85.sql create mode 100644 src/test/resources/tpcds/q86.sql create mode 100644 src/test/resources/tpcds/q87.sql create mode 100644 src/test/resources/tpcds/q88.sql create mode 100644 src/test/resources/tpcds/q89.sql create mode 100644 src/test/resources/tpcds/q9.sql create mode 100644 src/test/resources/tpcds/q90.sql create mode 100644 src/test/resources/tpcds/q91.sql create mode 100644 src/test/resources/tpcds/q92.sql create mode 100644 src/test/resources/tpcds/q93.sql create mode 100644 src/test/resources/tpcds/q94.sql create mode 100644 src/test/resources/tpcds/q95.sql create mode 100644 src/test/resources/tpcds/q96.sql create mode 100644 src/test/resources/tpcds/q97.sql create mode 100644 src/test/resources/tpcds/q98.sql create mode 100644 src/test/resources/tpcds/q99.sql create mode 100644 src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala create mode 100644 src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt new file mode 100644 index 000000000..97885fe7f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt @@ -0,0 +1,43 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], output=[c_customer_id#1]) ++- *(9) Project [c_customer_id#1] + +- *(9) BroadcastHashJoin [ctr_customer_sk#2], [cast(c_customer_sk#3 as bigint)], Inner, BuildRight + :- *(9) Project [ctr_customer_sk#2] + : +- *(9) BroadcastHashJoin [ctr_store_sk#4], [cast(s_store_sk#5 as bigint)], Inner, BuildRight + : :- *(9) Project [ctr_customer_sk#2, ctr_store_sk#4] + : : +- *(9) BroadcastHashJoin [ctr_store_sk#4], [ctr_store_sk#4#6], Inner, BuildRight, (cast(ctr_total_return#7 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#8) + : : :- *(9) Filter isnotnull(ctr_total_return#7) + : : : +- *(9) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[sum(UnscaledValue(sr_return_amt#11))]) + : : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 200) + : : : +- *(2) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[partial_sum(UnscaledValue(sr_return_amt#11))]) + : : : +- *(2) Project [sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : : +- *(2) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight + : : : :- *(2) Project [sr_returned_date_sk#12, sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : : : +- *(2) Filter ((isnotnull(sr_returned_date_sk#12) && isnotnull(sr_store_sk#10)) && isnotnull(sr_customer_sk#9)) + : : : : +- *(2) FileScan parquet default.store_returns[sr_returned_date_sk#12,sr_customer_sk#9,sr_store_sk#10,sr_return_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true])) + : : +- *(6) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#8) + : : +- *(6) HashAggregate(keys=[ctr_store_sk#4], functions=[avg(ctr_total_return#7)]) + : : +- Exchange hashpartitioning(ctr_store_sk#4, 200) + : : +- *(5) HashAggregate(keys=[ctr_store_sk#4], functions=[partial_avg(ctr_total_return#7)]) + : : +- *(5) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[sum(UnscaledValue(sr_return_amt#11))]) + : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 200) + : : +- *(4) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[partial_sum(UnscaledValue(sr_return_amt#11))]) + : : +- *(4) Project [sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : +- *(4) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight + : : :- *(4) Project [sr_returned_date_sk#12, sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : : +- *(4) Filter (isnotnull(sr_returned_date_sk#12) && isnotnull(sr_store_sk#10)) + : : : +- *(4) FileScan parquet default.store_returns[sr_returned_date_sk#12,sr_customer_sk#9,sr_store_sk#10,sr_return_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [c_customer_sk#3, c_customer_id#1] + +- *(8) Filter isnotnull(c_customer_sk#3) + +- *(8) FileScan parquet default.customer[c_customer_sk#3,c_customer_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt new file mode 100644 index 000000000..6104e2ac9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [c_customer_id] + WholeStageCodegen + Project [c_customer_id] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk] + BroadcastHashJoin [ctr_store_sk,s_store_sk] + Project [ctr_customer_sk,ctr_store_sk] + BroadcastHashJoin [ctr_store_sk,ctr_store_skL,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + Filter [ctr_total_return] + HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_customer_sk,ctr_total_return,ctr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #1 + WholeStageCodegen + HashAggregate [sr_customer_sk,sum,sr_return_amt,sum,sr_store_sk] [sum,sum] + Project [sr_customer_sk,sr_store_sk,sr_return_amt] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + Filter [sr_returned_date_sk,sr_store_sk,sr_customer_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [ctr_store_sk,sum,count,avg(ctr_total_return)] [avg(ctr_total_return),ctr_store_skL,sum,count,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + InputAdapter + Exchange [ctr_store_sk] #4 + WholeStageCodegen + HashAggregate [ctr_store_sk,ctr_total_return,sum,count,sum,count] [sum,count,sum,count] + HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [sum(UnscaledValue(sr_return_amt)),ctr_store_sk,ctr_total_return,sum] + InputAdapter + Exchange [sr_customer_sk,sr_store_sk] #5 + WholeStageCodegen + HashAggregate [sum,sr_customer_sk,sum,sr_return_amt,sr_store_sk] [sum,sum] + Project [sr_customer_sk,sr_store_sk,sr_return_amt] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + Filter [sr_returned_date_sk,sr_store_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [s_store_sk] + Filter [s_state,s_store_sk] + Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [c_customer_sk,c_customer_id] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_customer_id] [c_customer_sk,c_customer_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt new file mode 100644 index 000000000..a108b1a61 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt @@ -0,0 +1,49 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST,cd_dep_count#6 ASC NULLS FIRST,cd_dep_employed_count#7 ASC NULLS FIRST,cd_dep_college_count#8 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#9,cd_purchase_estimate#4,cnt2#10,cd_credit_rating#5,cnt3#11,cd_dep_count#6,cnt4#12,cd_dep_employed_count#7,cnt5#13,cd_dep_college_count#8,cnt6#14]) ++- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[count(1)]) + +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8, 200) + +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[partial_count(1)]) + +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] + +- *(9) BroadcastHashJoin [c_current_cdemo_sk#15], [cd_demo_sk#16], Inner, BuildRight + :- *(9) Project [c_current_cdemo_sk#15] + : +- *(9) BroadcastHashJoin [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight + : :- *(9) Project [c_current_cdemo_sk#15, c_current_addr_sk#17] + : : +- *(9) Filter (exists#19 || exists#20) + : : +- *(9) BroadcastHashJoin [c_customer_sk#21], [cs_ship_customer_sk#22], ExistenceJoin(exists#20), BuildRight + : : :- *(9) BroadcastHashJoin [c_customer_sk#21], [ws_bill_customer_sk#23], ExistenceJoin(exists#19), BuildRight + : : : :- *(9) BroadcastHashJoin [c_customer_sk#21], [ss_customer_sk#24], LeftSemi, BuildRight + : : : : :- *(9) Project [c_customer_sk#21, c_current_cdemo_sk#15, c_current_addr_sk#17] + : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#17) && isnotnull(c_current_cdemo_sk#15)) + : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#21,c_current_cdemo_sk#15,c_current_addr_sk#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [ss_customer_sk#24] + : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#25], [d_date_sk#26], Inner, BuildRight + : : : : :- *(2) Project [ss_sold_date_sk#25, ss_customer_sk#24] + : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#25) + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#25,ss_customer_sk#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#26] + : : : : +- *(1) Filter (((((isnotnull(d_year#27) && isnotnull(d_moy#28)) && (d_year#27 = 2002)) && (d_moy#28 >= 1)) && (d_moy#28 <= 4)) && isnotnull(d_date_sk#26)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#26,d_year#27,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThan..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_bill_customer_sk#23] + : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#26], Inner, BuildRight + : : : :- *(4) Project [ws_sold_date_sk#29, ws_bill_customer_sk#23] + : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#29) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_bill_customer_sk#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [cs_ship_customer_sk#22] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#30], [d_date_sk#26], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#30, cs_ship_customer_sk#22] + : : : +- *(6) Filter isnotnull(cs_sold_date_sk#30) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_ship_customer_sk#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#18] + : +- *(7) Filter (ca_county#31 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) && isnotnull(ca_address_sk#18)) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#18,ca_county#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_county, [Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County]), IsNo..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [cd_demo_sk#16, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] + +- *(8) Filter isnotnull(cd_demo_sk#16) + +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#16,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5,cd_dep_count#6,cd_dep_employed_count#7,cd_dep_college_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct 0.00) THEN CheckOverflow((promote_precision(year_total#5) / promote_precision(year_total#4)), DecimalType(38,20)) ELSE null END > CASE WHEN (year_total#6 > 0.00) THEN CheckOverflow((promote_precision(year_total#7) / promote_precision(year_total#6)), DecimalType(38,20)) ELSE null END) + :- *(17) Project [customer_id#2, year_total#6, customer_preferred_cust_flag#1, year_total#7, year_total#4] + : +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#8], Inner, BuildRight + : :- *(17) Project [customer_id#2, year_total#6, customer_preferred_cust_flag#1, year_total#7] + : : +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#9], Inner, BuildRight + : : :- Union + : : : :- *(4) Filter (isnotnull(year_total#6) && (year_total#6 > 0.00)) + : : : : +- *(4) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 200) + : : : : +- *(3) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : : +- *(3) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : : :- *(3) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_sold_date_sk#20, ss_ext_discount_amt#19, ss_ext_list_price#18] + : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#22], [ss_customer_sk#23], Inner, BuildRight + : : : : : :- *(3) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : +- LocalTableScan , [customer_id#24, year_total#25] + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- Union + : : :- *(8) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 200) + : : : +- *(7) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : +- *(7) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] + : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : :- *(7) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_sold_date_sk#20, ss_ext_discount_amt#19, ss_ext_list_price#18] + : : : : +- *(7) BroadcastHashJoin [c_customer_sk#22], [ss_customer_sk#23], Inner, BuildRight + : : : : :- *(7) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : : : : +- *(7) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : +- LocalTableScan , [customer_id#24, customer_preferred_cust_flag#26, year_total#25] + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- Union + : :- LocalTableScan , [customer_id#8, year_total#4] + : +- *(12) Filter (isnotnull(year_total#25) && (year_total#25 > 0.00)) + : +- *(12) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 200) + : +- *(11) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + : +- *(11) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] + : +- *(11) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight + : :- *(11) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_sold_date_sk#29, ws_ext_discount_amt#28, ws_ext_list_price#27] + : : +- *(11) BroadcastHashJoin [c_customer_sk#22], [ws_bill_customer_sk#30], Inner, BuildRight + : : :- *(11) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : : +- *(11) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : : +- *(11) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#3, year_total#5] + +- *(16) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 200) + +- *(15) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + +- *(15) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] + +- *(15) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight + :- *(15) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_sold_date_sk#29, ws_ext_discount_amt#28, ws_ext_list_price#27] + : +- *(15) BroadcastHashJoin [c_customer_sk#22], [ws_bill_customer_sk#30], Inner, BuildRight + : :- *(15) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : +- *(15) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : +- *(15) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) + : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt new file mode 100644 index 000000000..497db4cde --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,i_category,revenueratio,i_current_price,i_class] + WholeStageCodegen + Project [i_item_id,i_item_desc,itemrevenue,_we0,i_category,i_current_price,i_class,_w0] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,i_category,sum,i_current_price,sum(UnscaledValue(ws_ext_sales_price)),i_class] [itemrevenue,_w1,sum,sum(UnscaledValue(ws_ext_sales_price)),_w0] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #2 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,ws_ext_sales_price,i_category,sum,i_current_price,i_class,sum] [sum,sum] + Project [i_class,i_current_price,ws_ext_sales_price,i_category,i_item_desc,i_item_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [i_class,i_current_price,ws_ext_sales_price,ws_sold_date_sk,i_category,i_item_desc,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Filter [i_category,i_item_sk] + Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt new file mode 100644 index 000000000..1e61c9ee5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt @@ -0,0 +1,37 @@ +== Physical Plan == +*(7) HashAggregate(keys=[], functions=[avg(cast(ss_quantity#1 as bigint)), avg(UnscaledValue(ss_ext_sales_price#2)), avg(UnscaledValue(ss_ext_wholesale_cost#3)), sum(UnscaledValue(ss_ext_wholesale_cost#3))]) ++- Exchange SinglePartition + +- *(6) HashAggregate(keys=[], functions=[partial_avg(cast(ss_quantity#1 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#2)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#3)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#3))]) + +- *(6) Project [ss_quantity#1, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] + +- *(6) BroadcastHashJoin [ss_hdemo_sk#4], [hd_demo_sk#5], Inner, BuildRight, (((((((cd_marital_status#6 = M) && (cd_education_status#7 = Advanced Degree)) && (ss_sales_price#8 >= 100.00)) && (ss_sales_price#8 <= 150.00)) && (hd_dep_count#9 = 3)) || (((((cd_marital_status#6 = S) && (cd_education_status#7 = College)) && (ss_sales_price#8 >= 50.00)) && (ss_sales_price#8 <= 100.00)) && (hd_dep_count#9 = 1))) || (((((cd_marital_status#6 = W) && (cd_education_status#7 = 2 yr Degree)) && (ss_sales_price#8 >= 150.00)) && (ss_sales_price#8 <= 200.00)) && (hd_dep_count#9 = 1))) + :- *(6) Project [ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, cd_marital_status#6, cd_education_status#7] + : +- *(6) BroadcastHashJoin [ss_cdemo_sk#10], [cd_demo_sk#11], Inner, BuildRight + : :- *(6) Project [ss_cdemo_sk#10, ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] + : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] + : : : +- *(6) BroadcastHashJoin [ss_addr_sk#14], [ca_address_sk#15], Inner, BuildRight, ((((ca_state#16 IN (TX,OH) && (ss_net_profit#17 >= 100.00)) && (ss_net_profit#17 <= 200.00)) || ((ca_state#16 IN (OR,NM,KY) && (ss_net_profit#17 >= 150.00)) && (ss_net_profit#17 <= 300.00))) || ((ca_state#16 IN (VA,TX,MS) && (ss_net_profit#17 >= 50.00)) && (ss_net_profit#17 <= 250.00))) + : : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_addr_sk#14, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, ss_net_profit#17] + : : : : +- *(6) BroadcastHashJoin [ss_store_sk#18], [s_store_sk#19], Inner, BuildRight + : : : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_addr_sk#14, ss_store_sk#18, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, ss_net_profit#17] + : : : : : +- *(6) Filter ((((isnotnull(ss_store_sk#18) && isnotnull(ss_addr_sk#14)) && isnotnull(ss_sold_date_sk#12)) && isnotnull(ss_cdemo_sk#10)) && isnotnull(ss_hdemo_sk#4)) + : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_cdemo_sk#10,ss_hdemo_sk#4,ss_addr_sk#14,ss_store_sk#18,ss_quantity#1,ss_sales_price#8,ss_ext_sales_price#2,ss_ext_wholesale_cost#3,ss_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#15, ca_state#16] + : : : +- *(2) Filter ((isnotnull(ca_country#20) && (ca_country#20 = United States)) && isnotnull(ca_address_sk#15)) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#13] + : : +- *(3) Filter ((isnotnull(d_year#21) && (d_year#21 = 2001)) && isnotnull(d_date_sk#13)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#13,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [cd_demo_sk#11, cd_marital_status#6, cd_education_status#7] + : +- *(4) Filter isnotnull(cd_demo_sk#11) + : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#6,cd_education_status#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [hd_demo_sk#5, hd_dep_count#9] + +- *(5) Filter isnotnull(hd_demo_sk#5) + +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#5,hd_dep_count#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt new file mode 100644 index 000000000..0b02236c0 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt @@ -0,0 +1,49 @@ +WholeStageCodegen + HashAggregate [sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum,avg(cast(ss_quantity as bigint)),count,sum,avg(UnscaledValue(ss_ext_sales_price)),count,avg(UnscaledValue(ss_ext_wholesale_cost)),count] [sum,avg(ss_ext_sales_price),sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum,sum(ss_ext_wholesale_cost),avg(ss_ext_wholesale_cost),avg(cast(ss_quantity as bigint)),avg(ss_quantity),count,sum,avg(UnscaledValue(ss_ext_sales_price)),count,avg(UnscaledValue(ss_ext_wholesale_cost)),count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [ss_ext_wholesale_cost,count,sum,sum,count,sum,count,sum,sum,count,sum,sum,sum,ss_ext_sales_price,count,ss_quantity,count] [count,sum,sum,count,sum,count,sum,sum,count,sum,sum,sum,count,count] + Project [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [hd_dep_count,ss_hdemo_sk,ss_sales_price,cd_education_status,hd_demo_sk,cd_marital_status] + Project [ss_quantity,cd_marital_status,ss_ext_sales_price,ss_sales_price,ss_hdemo_sk,cd_education_status,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_quantity,ss_ext_sales_price,ss_sales_price,ss_cdemo_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_ext_sales_price,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] + Project [ss_addr_sk,ss_quantity,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [hd_demo_sk,hd_dep_count] + Filter [hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt new file mode 100644 index 000000000..27a206912 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt @@ -0,0 +1,192 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sum(sales)#5,sum(number_sales)#6]) ++- *(80) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[sum(sales#8), sum(number_sales#9)]) + +- Exchange hashpartitioning(channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7, 200) + +- *(79) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[partial_sum(sales#8), partial_sum(number_sales#9)]) + +- *(79) Expand [List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13, 0), List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, null, 1), List(sales#8, number_sales#9, channel#10, i_brand_id#11, null, null, 3), List(sales#8, number_sales#9, channel#10, null, null, null, 7), List(sales#8, number_sales#9, null, null, null, null, 15)], [sales#8, number_sales#9, channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7] + +- Union + :- *(26) Project [sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(26) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16 as decimal(32,6)) > cast(Subquery subquery1662 as decimal(32,6)))) + : : +- Subquery subquery1662 + : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange SinglePartition + : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Union + : : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] + : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#20] + : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] + : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] + : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(26) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 200) + : +- *(25) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + : +- *(25) Project [ss_quantity#14, ss_list_price#15, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(25) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : :- *(25) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15, i_brand_id#11, i_class_id#12, i_category_id#13] + : : +- *(25) BroadcastHashJoin [ss_item_sk#32], [i_item_sk#33], Inner, BuildRight + : : :- *(25) BroadcastHashJoin [ss_item_sk#32], [ss_item_sk#34], LeftSemi, BuildRight + : : : :- *(25) Project [ss_sold_date_sk#19, ss_item_sk#32, ss_quantity#14, ss_list_price#15] + : : : : +- *(25) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) + : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(11) Project [i_item_sk#33 AS ss_item_sk#34] + : : : +- *(11) BroadcastHashJoin [i_brand_id#11, i_class_id#12, i_category_id#13], [brand_id#35, class_id#36, category_id#37], Inner, BuildRight + : : : :- *(11) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : +- *(11) Filter ((isnotnull(i_class_id#12) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) + : : : : +- *(11) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) + : : : :- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : : +- Exchange hashpartitioning(brand_id#35, class_id#36, category_id#37, 200) + : : : : +- *(6) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) + : : : : :- *(6) Project [i_brand_id#11 AS brand_id#35, i_class_id#12 AS class_id#36, i_category_id#13 AS category_id#37] + : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#19, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#32], [i_item_sk#33], Inner, BuildRight + : : : : : : :- *(6) Project [ss_sold_date_sk#19, ss_item_sk#32] + : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) + : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(1) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#33) && isnotnull(i_class_id#12)) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [d_date_sk#20] + : : : : : +- *(2) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : : +- *(5) Project [i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : : : :- *(5) Project [cs_sold_date_sk#26, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#38], [i_item_sk#33], Inner, BuildRight + : : : : : :- *(5) Project [cs_sold_date_sk#26, cs_item_sk#38] + : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) + : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(3) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : +- *(3) Filter isnotnull(i_item_sk#33) + : : : : : +- *(3) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : +- *(9) Project [i_brand_id#11, i_class_id#12, i_category_id#13] + : : : +- *(9) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : : : :- *(9) Project [ws_sold_date_sk#31, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : +- *(9) BroadcastHashJoin [ws_item_sk#39], [i_item_sk#33], Inner, BuildRight + : : : : :- *(9) Project [ws_sold_date_sk#31, ws_item_sk#39] + : : : : : +- *(9) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) + : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(23) BroadcastHashJoin [i_item_sk#33], [ss_item_sk#34], LeftSemi, BuildRight + : : :- *(23) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : +- *(23) Filter isnotnull(i_item_sk#33) + : : : +- *(23) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(24) Project [d_date_sk#20] + : +- *(24) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#40)) && (d_year#21 = 2001)) && (d_moy#40 = 11)) && isnotnull(d_date_sk#20)) + : +- *(24) FileScan parquet default.date_dim[d_date_sk#20,d_year#21,d_moy#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + :- *(52) Project [sales#41, number_sales#42, channel#43, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44) && (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44 as decimal(32,6)) > cast(Subquery subquery1667 as decimal(32,6)))) + : : +- Subquery subquery1667 + : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange SinglePartition + : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Union + : : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] + : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#20] + : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] + : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] + : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(52) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 200) + : +- *(51) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + : +- *(51) Project [cs_quantity#22, cs_list_price#24, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(51) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : :- *(51) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24, i_brand_id#11, i_class_id#12, i_category_id#13] + : : +- *(51) BroadcastHashJoin [cs_item_sk#38], [i_item_sk#33], Inner, BuildRight + : : :- *(51) BroadcastHashJoin [cs_item_sk#38], [ss_item_sk#34], LeftSemi, BuildRight + : : : :- *(51) Project [cs_sold_date_sk#26, cs_item_sk#38, cs_quantity#22, cs_list_price#24] + : : : : +- *(51) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) + : : : : +- *(51) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(78) Project [sales#45, number_sales#46, channel#47, i_brand_id#11, i_class_id#12, i_category_id#13] + +- *(78) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48 as decimal(32,6)) > cast(Subquery subquery1672 as decimal(32,6)))) + : +- Subquery subquery1672 + : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Union + : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] + : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] + : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#20] + : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] + : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] + : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] + : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) + : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(78) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 200) + +- *(77) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + +- *(77) Project [ws_quantity#27, ws_list_price#29, i_brand_id#11, i_class_id#12, i_category_id#13] + +- *(77) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + :- *(77) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(77) BroadcastHashJoin [ws_item_sk#39], [i_item_sk#33], Inner, BuildRight + : :- *(77) BroadcastHashJoin [ws_item_sk#39], [ss_item_sk#34], LeftSemi, BuildRight + : : :- *(77) Project [ws_sold_date_sk#31, ws_item_sk#39, ws_quantity#27, ws_list_price#29] + : : : +- *(77) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) + : : : +- *(77) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt new file mode 100644 index 000000000..93bae0792 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt @@ -0,0 +1,267 @@ +TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_category_id,i_class_id] + WholeStageCodegen + HashAggregate [sum(sales),spark_grouping_id,i_brand_id,sum,sum,channel,i_category_id,i_class_id,sum(number_salesL)] [sum(sales),sum(sales),sum(number_sales),sum,sum,sum(number_salesL)] + InputAdapter + Exchange [spark_grouping_id,i_brand_id,channel,i_category_id,i_class_id] #1 + WholeStageCodegen + HashAggregate [sales,spark_grouping_id,sum,i_brand_id,sum,number_sales,sum,sum,channel,i_category_id,i_class_id] [sum,sum,sum,sum] + Expand [channel,sales,i_category_id,number_sales,i_class_id,i_brand_id] + InputAdapter + Union + WholeStageCodegen + Project [channel,sales,i_category_id,number_sales,i_class_id,i_brand_id] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #1 + WholeStageCodegen + HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + InputAdapter + Exchange #13 + WholeStageCodegen + HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + WholeStageCodegen + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,i_category_id,i_brand_id,sum,i_class_id] [sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,number_sales,channel,sum] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #2 + WholeStageCodegen + HashAggregate [count,count,sum,ss_list_price,i_category_id,i_brand_id,ss_quantity,sum,i_class_id] [sum,count,sum,count] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,i_class_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,ss_sold_date_sk,i_class_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk] + BroadcastHashJoin [category_id,class_id,i_category_id,i_brand_id,i_class_id,brand_id] + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_class_id,i_brand_id,i_category_id] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + BroadcastHashJoin [class_id,i_class_id,i_category_id,brand_id,i_brand_id,category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #5 + WholeStageCodegen + HashAggregate [brand_id,class_id,category_id] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,class_id,brand_id,category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_sold_date_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] [ss_sold_date_sk,ss_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_class_id,i_brand_id,i_category_id] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] [cs_sold_date_sk,cs_item_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #7 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] [ws_sold_date_sk,ws_item_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #7 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] [ss_item_sk] #3 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + WholeStageCodegen + Project [sales,channel,i_category_id,i_brand_id,number_sales,i_class_id] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #2 + WholeStageCodegen + HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + InputAdapter + Exchange #13 + WholeStageCodegen + HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + WholeStageCodegen + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum,i_category_id,i_brand_id,count,count(1),i_class_id] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sales,channel,sum,number_sales,count,count(1)] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #15 + WholeStageCodegen + HashAggregate [sum,cs_list_price,sum,i_category_id,count,i_brand_id,cs_quantity,count,i_class_id] [sum,count,sum,count] + Project [cs_quantity,i_category_id,i_brand_id,i_class_id,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,i_category_id,cs_sold_date_sk,i_brand_id,i_class_id,cs_list_price] + BroadcastHashJoin [cs_item_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [ss_item_sk] [ss_item_sk] #3 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #12 + WholeStageCodegen + Project [number_sales,i_category_id,i_brand_id,i_class_id,sales,channel] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #3 + WholeStageCodegen + HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + InputAdapter + Exchange #13 + WholeStageCodegen + HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + WholeStageCodegen + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + HashAggregate [count(1),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,i_category_id,i_brand_id,i_class_id] [count(1),number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,sales,channel] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #16 + WholeStageCodegen + HashAggregate [count,sum,sum,count,i_category_id,i_brand_id,i_class_id,ws_list_price,ws_quantity] [sum,count,sum,count] + Project [ws_quantity,i_category_id,ws_list_price,i_brand_id,i_class_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,i_category_id,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + BroadcastHashJoin [ws_item_sk,ss_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [ss_item_sk] [ss_item_sk] #3 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #12 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt new file mode 100644 index 000000000..b3130331d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt @@ -0,0 +1,166 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sales#5,number_sales#6,channel#7,i_brand_id#8,i_class_id#9,i_category_id#10,sales#11,number_sales#12]) ++- *(52) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [i_brand_id#8, i_class_id#9, i_category_id#10], Inner, BuildRight + :- *(52) Project [channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, sales#5, number_sales#6] + : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15 as decimal(32,6)) > cast(Subquery subquery1884 as decimal(32,6)))) + : : +- Subquery subquery1884 + : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange SinglePartition + : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Union + : : :- *(2) Project [ss_quantity#13 AS quantity#16, ss_list_price#14 AS list_price#17] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] + : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#19] + : : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight + : : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] + : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] + : : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(52) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + : +- Exchange hashpartitioning(i_brand_id#2, i_class_id#3, i_category_id#4, 200) + : +- *(25) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + : +- *(25) Project [ss_quantity#13, ss_list_price#14, i_brand_id#2, i_class_id#3, i_category_id#4] + : +- *(25) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : :- *(25) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14, i_brand_id#2, i_class_id#3, i_category_id#4] + : : +- *(25) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#32], Inner, BuildRight + : : :- *(25) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight + : : : :- *(25) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] + : : : : +- *(25) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) + : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(11) Project [i_item_sk#32 AS ss_item_sk#33] + : : : +- *(11) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [brand_id#34, class_id#35, category_id#36], Inner, BuildRight + : : : :- *(11) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : +- *(11) Filter ((isnotnull(i_class_id#3) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) + : : : : +- *(11) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) + : : : :- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : : +- Exchange hashpartitioning(brand_id#34, class_id#35, category_id#36, 200) + : : : : +- *(6) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) + : : : : :- *(6) Project [i_brand_id#2 AS brand_id#34, i_class_id#3 AS class_id#35, i_category_id#4 AS category_id#36] + : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#18, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#32], Inner, BuildRight + : : : : : : :- *(6) Project [ss_sold_date_sk#18, ss_item_sk#31] + : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) + : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(1) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [d_date_sk#19] + : : : : : +- *(2) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) + : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : : +- *(5) Project [i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight + : : : : :- *(5) Project [cs_sold_date_sk#25, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#37], [i_item_sk#32], Inner, BuildRight + : : : : : :- *(5) Project [cs_sold_date_sk#25, cs_item_sk#37] + : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#37) && isnotnull(cs_sold_date_sk#25)) + : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_item_sk#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(3) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : +- *(3) Filter isnotnull(i_item_sk#32) + : : : : : +- *(3) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : +- *(9) Project [i_brand_id#2, i_class_id#3, i_category_id#4] + : : : +- *(9) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight + : : : :- *(9) Project [ws_sold_date_sk#30, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : +- *(9) BroadcastHashJoin [ws_item_sk#38], [i_item_sk#32], Inner, BuildRight + : : : : :- *(9) Project [ws_sold_date_sk#30, ws_item_sk#38] + : : : : : +- *(9) Filter (isnotnull(ws_item_sk#38) && isnotnull(ws_sold_date_sk#30)) + : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_item_sk#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : +- ReusedExchange [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(23) BroadcastHashJoin [i_item_sk#32], [ss_item_sk#33], LeftSemi, BuildRight + : : :- *(23) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : +- *(23) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_category_id#4)) && isnotnull(i_brand_id#2)) + : : : +- *(23) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(24) Project [d_date_sk#19] + : +- *(24) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1883)) && isnotnull(d_date_sk#19)) + : : +- Subquery subquery1883 + : : +- *(1) Project [d_week_seq#39] + : : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + : +- *(24) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : +- Subquery subquery1883 + : +- *(1) Project [d_week_seq#39] + : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true])) + +- *(51) Project [channel#7, i_brand_id#8, i_class_id#9, i_category_id#10, sales#11, number_sales#12] + +- *(51) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42 as decimal(32,6)) > cast(Subquery subquery1890 as decimal(32,6)))) + : +- Subquery subquery1890 + : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Union + : :- *(2) Project [ss_quantity#13 AS quantity#16, ss_list_price#14 AS list_price#17] + : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] + : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#19] + : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] + : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight + : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] + : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] + : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) + : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(51) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + +- Exchange hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, 200) + +- *(50) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + +- *(50) Project [ss_quantity#13, ss_list_price#14, i_brand_id#8, i_class_id#9, i_category_id#10] + +- *(50) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + :- *(50) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14, i_brand_id#8, i_class_id#9, i_category_id#10] + : +- *(50) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#43], Inner, BuildRight + : :- *(50) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight + : : :- *(50) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] + : : : +- *(50) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) + : : : +- *(50) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#43, i_brand_id#8, i_class_id#9, i_category_id#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(49) Project [d_date_sk#19] + +- *(49) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1889)) && isnotnull(d_date_sk#19)) + : +- Subquery subquery1889 + : +- *(1) Project [d_week_seq#39] + : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct + +- *(49) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + +- Subquery subquery1889 + +- *(1) Project [d_week_seq#39] + +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt new file mode 100644 index 000000000..d0ac00bfe --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt @@ -0,0 +1,226 @@ +TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_id,sales,sales,number_sales,i_category_id,channel,channel,i_class_id] + WholeStageCodegen + BroadcastHashJoin [i_category_id,i_category_id,i_brand_id,i_class_id,i_brand_id,i_class_id] + Project [number_sales,i_category_id,channel,sales,i_brand_id,i_class_id] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #2 + WholeStageCodegen + HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + InputAdapter + Exchange #12 + WholeStageCodegen + HashAggregate [quantity,sum,count,list_price,count,sum] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #13 + WholeStageCodegen + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #13 + HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sum,i_category_id,i_brand_id,count,i_class_id] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),number_sales,count(1),sum,channel,sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #1 + WholeStageCodegen + HashAggregate [sum,sum,ss_list_price,i_category_id,i_brand_id,ss_quantity,count,i_class_id,count] [sum,count,sum,count] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,i_class_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,ss_sold_date_sk,i_class_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_item_sk] + BroadcastHashJoin [brand_id,category_id,i_category_id,i_brand_id,class_id,i_class_id] + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_class_id,i_brand_id,i_category_id] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + BroadcastHashJoin [i_class_id,i_category_id,class_id,category_id,i_brand_id,brand_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #4 + WholeStageCodegen + HashAggregate [brand_id,class_id,category_id] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,class_id,category_id,brand_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_sold_date_sk,ss_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] [ss_sold_date_sk,ss_item_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_class_id,i_brand_id,i_category_id] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_item_sk] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] [cs_sold_date_sk,cs_item_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #6 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] [ws_sold_date_sk,ws_item_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #8 + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #6 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Filter [i_item_sk,i_class_id,i_category_id,i_brand_id] + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + InputAdapter + ReusedExchange [ss_item_sk] [ss_item_sk] #2 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #1 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_dom,d_moy] + Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] + Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] + Subquery #1 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_dom,d_moy] + Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + Project [i_brand_id,i_class_id,sales,number_sales,i_category_id,channel] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #4 + WholeStageCodegen + HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + InputAdapter + Exchange #12 + WholeStageCodegen + HashAggregate [quantity,sum,count,list_price,count,sum] [sum,count,sum,count] + InputAdapter + Union + WholeStageCodegen + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #13 + WholeStageCodegen + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #13 + HashAggregate [i_brand_id,sum,i_class_id,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,i_category_id] [sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),channel] + InputAdapter + Exchange [i_brand_id,i_class_id,i_category_id] #15 + WholeStageCodegen + HashAggregate [i_brand_id,sum,sum,i_class_id,count,count,ss_list_price,i_category_id,ss_quantity] [sum,count,sum,count] + Project [ss_quantity,i_category_id,ss_list_price,i_brand_id,i_class_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,i_category_id,ss_list_price,i_brand_id,ss_sold_date_sk,i_class_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [ss_item_sk,ss_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + InputAdapter + ReusedExchange [ss_item_sk] [ss_item_sk] #2 + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen + Project [d_date_sk] + Filter [d_week_seq,d_date_sk] + Subquery #3 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_dom,d_moy] + Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] + Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] + Subquery #3 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_dom,d_moy] + Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt new file mode 100644 index 000000000..e74a4a6e2 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt @@ -0,0 +1,26 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST], output=[ca_zip#1,sum(cs_sales_price)#2]) ++- *(5) HashAggregate(keys=[ca_zip#1], functions=[sum(UnscaledValue(cs_sales_price#3))]) + +- Exchange hashpartitioning(ca_zip#1, 200) + +- *(4) HashAggregate(keys=[ca_zip#1], functions=[partial_sum(UnscaledValue(cs_sales_price#3))]) + +- *(4) Project [cs_sales_price#3, ca_zip#1] + +- *(4) BroadcastHashJoin [cs_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + :- *(4) Project [cs_sold_date_sk#4, cs_sales_price#3, ca_zip#1] + : +- *(4) BroadcastHashJoin [c_current_addr_sk#6], [ca_address_sk#7], Inner, BuildRight, ((substring(ca_zip#1, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) || ca_state#8 IN (CA,WA,GA)) || (cs_sales_price#3 > 500.00)) + : :- *(4) Project [cs_sold_date_sk#4, cs_sales_price#3, c_current_addr_sk#6] + : : +- *(4) BroadcastHashJoin [cs_bill_customer_sk#9], [c_customer_sk#10], Inner, BuildRight + : : :- *(4) Project [cs_sold_date_sk#4, cs_bill_customer_sk#9, cs_sales_price#3] + : : : +- *(4) Filter (isnotnull(cs_bill_customer_sk#9) && isnotnull(cs_sold_date_sk#4)) + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#9,cs_sales_price#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [c_customer_sk#10, c_current_addr_sk#6] + : : +- *(1) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#6)) + : : +- *(1) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [ca_address_sk#7, ca_state#8, ca_zip#1] + : +- *(2) Filter isnotnull(ca_address_sk#7) + : +- *(2) FileScan parquet default.customer_address[ca_address_sk#7,ca_state#8,ca_zip#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [d_date_sk#5] + +- *(3) Filter ((((isnotnull(d_qoy#11) && isnotnull(d_year#12)) && (d_qoy#11 = 2)) && (d_year#12 = 2001)) && isnotnull(d_date_sk#5)) + +- *(3) FileScan parquet default.date_dim[d_date_sk#5,d_year#12,d_qoy#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt new file mode 100644 index 000000000..5d72165cb --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] + WholeStageCodegen + HashAggregate [ca_zip,sum,sum(UnscaledValue(cs_sales_price))] [sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price),sum] + InputAdapter + Exchange [ca_zip] #1 + WholeStageCodegen + HashAggregate [ca_zip,cs_sales_price,sum,sum] [sum,sum] + Project [cs_sales_price,ca_zip] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_sales_price,ca_zip] + BroadcastHashJoin [ca_zip,cs_sales_price,ca_address_sk,c_current_addr_sk,ca_state] + Project [cs_sold_date_sk,cs_sales_price,c_current_addr_sk] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_sales_price] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_sales_price] [cs_sold_date_sk,cs_bill_customer_sk,cs_sales_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ca_address_sk,ca_state,ca_zip] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip] [ca_address_sk,ca_state,ca_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt new file mode 100644 index 000000000..f1ebfcb6e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt @@ -0,0 +1,37 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) ++- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(cs_ext_ship_cost#4)), sum(UnscaledValue(cs_net_profit#5)), count(distinct cs_order_number#6)]) + +- Exchange SinglePartition + +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5)), partial_count(distinct cs_order_number#6)]) + +- *(7) HashAggregate(keys=[cs_order_number#6], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5))]) + +- Exchange hashpartitioning(cs_order_number#6, 200) + +- *(6) HashAggregate(keys=[cs_order_number#6], functions=[partial_sum(UnscaledValue(cs_ext_ship_cost#4)), partial_sum(UnscaledValue(cs_net_profit#5))]) + +- *(6) Project [cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + +- *(6) BroadcastHashJoin [cs_call_center_sk#7], [cc_call_center_sk#8], Inner, BuildRight + :- *(6) Project [cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : +- *(6) BroadcastHashJoin [cs_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight + : :- *(6) Project [cs_ship_addr_sk#9, cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : : +- *(6) BroadcastHashJoin [cs_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : :- *(6) BroadcastHashJoin [cs_order_number#6], [cr_order_number#13], LeftAnti, BuildRight + : : : :- *(6) Project [cs_ship_date_sk#11, cs_ship_addr_sk#9, cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : : : : +- *(6) BroadcastHashJoin [cs_order_number#6], [cs_order_number#6#14], LeftSemi, BuildRight, NOT (cs_warehouse_sk#15 = cs_warehouse_sk#15#16) + : : : : :- *(6) Project [cs_ship_date_sk#11, cs_ship_addr_sk#9, cs_call_center_sk#7, cs_warehouse_sk#15, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : : : : : +- *(6) Filter ((isnotnull(cs_ship_date_sk#11) && isnotnull(cs_ship_addr_sk#9)) && isnotnull(cs_call_center_sk#7)) + : : : : : +- *(6) FileScan parquet default.catalog_sales[cs_ship_date_sk#11,cs_ship_addr_sk#9,cs_call_center_sk#7,cs_warehouse_sk#15,cs_order_number#6,cs_ext_ship_cost#4,cs_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) FileScan parquet default.catalog_returns[cr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#12] + : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 2002-02-01)) && (d_date#17 <= 11779)) && isnotnull(d_date_sk#12)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ca_address_sk#10] + : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = GA)) && isnotnull(ca_address_sk#10)) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [cc_call_center_sk#8] + +- *(5) Filter ((isnotnull(cc_county#19) && (cc_county#19 = Williamson County)) && isnotnull(cc_call_center_sk#8)) + +- *(5) FileScan parquet default.call_center[cc_call_center_sk#8,cc_county#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt new file mode 100644 index 000000000..57b72893e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -0,0 +1,51 @@ +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen + HashAggregate [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),total net profit ,order count ,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),total shipping cost ] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number] [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] + HashAggregate [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number] [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] + InputAdapter + Exchange [cs_order_number] #2 + WholeStageCodegen + HashAggregate [cs_ext_ship_cost,sum(UnscaledValue(cs_net_profit)),sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number,cs_net_profit] [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] + Project [cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_ship_addr_sk,ca_address_sk] + Project [cs_ship_addr_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + BroadcastHashJoin [cs_order_number,cr_order_number] + Project [cs_ship_addr_sk,cs_ship_date_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + BroadcastHashJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] + Project [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk] + Scan parquet default.catalog_sales [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cs_warehouse_sk,cs_order_number] + Scan parquet default.catalog_sales [cs_warehouse_sk,cs_order_number] [cs_warehouse_sk,cs_order_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Scan parquet default.catalog_returns [cr_order_number] [cr_order_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [cc_call_center_sk] + Filter [cc_county,cc_call_center_sk] + Scan parquet default.call_center [cc_call_center_sk,cc_county] [cc_call_center_sk,cc_county] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt new file mode 100644 index 000000000..62f104f27 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt @@ -0,0 +1,50 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_state#3 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_state#3,store_sales_quantitycount#4,store_sales_quantityave#5,store_sales_quantitystdev#6,store_sales_quantitycov#7,as_store_returns_quantitycount#8,as_store_returns_quantityave#9,as_store_returns_quantitystdev#10,store_returns_quantitycov#11,catalog_sales_quantitycount#12,catalog_sales_quantityave#13,catalog_sales_quantitystdev#14,catalog_sales_quantitycov#15]) ++- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[count(ss_quantity#16), avg(cast(ss_quantity#16 as bigint)), stddev_samp(cast(ss_quantity#16 as double)), count(sr_return_quantity#17), avg(cast(sr_return_quantity#17 as bigint)), stddev_samp(cast(sr_return_quantity#17 as double)), count(cs_quantity#18), avg(cast(cs_quantity#18 as bigint)), stddev_samp(cast(cs_quantity#18 as double))]) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_state#3, 200) + +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[partial_count(ss_quantity#16), partial_avg(cast(ss_quantity#16 as bigint)), partial_stddev_samp(cast(ss_quantity#16 as double)), partial_count(sr_return_quantity#17), partial_avg(cast(sr_return_quantity#17 as bigint)), partial_stddev_samp(cast(sr_return_quantity#17 as double)), partial_count(cs_quantity#18), partial_avg(cast(cs_quantity#18 as bigint)), partial_stddev_samp(cast(cs_quantity#18 as double))]) + +- *(8) Project [ss_quantity#16, sr_return_quantity#17, cs_quantity#18, s_state#3, i_item_id#1, i_item_desc#2] + +- *(8) BroadcastHashJoin [ss_item_sk#19], [i_item_sk#20], Inner, BuildRight + :- *(8) Project [ss_item_sk#19, ss_quantity#16, sr_return_quantity#17, cs_quantity#18, s_state#3] + : +- *(8) BroadcastHashJoin [ss_store_sk#21], [s_store_sk#22], Inner, BuildRight + : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_return_quantity#17, cs_quantity#18] + : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight + : : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] + : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight + : : : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] + : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#27], [d_date_sk#28], Inner, BuildRight + : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] + : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#29, sr_item_sk#30], [cast(cs_bill_customer_sk#31 as bigint), cast(cs_item_sk#32 as bigint)], Inner, BuildRight + : : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_item_sk#30, sr_customer_sk#29, sr_return_quantity#17] + : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#33 as bigint), cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#34 as bigint)], [sr_customer_sk#29, sr_item_sk#30, sr_ticket_number#35], Inner, BuildRight + : : : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_customer_sk#33, ss_store_sk#21, ss_ticket_number#34, ss_quantity#16] + : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#34) && isnotnull(ss_item_sk#19)) && isnotnull(ss_customer_sk#33)) && isnotnull(ss_sold_date_sk#27)) && isnotnull(ss_store_sk#21)) + : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#27,ss_item_sk#19,ss_customer_sk#33,ss_store_sk#21,ss_ticket_number#34,ss_quantity#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#28] + : : : : +- *(3) Filter ((isnotnull(d_quarter_name#36) && (d_quarter_name#36 = 2001Q1)) && isnotnull(d_date_sk#28)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#28,d_quarter_name#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [d_date_sk#26] + : : : +- *(4) Filter (d_quarter_name#37 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#26)) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#26,d_quarter_name#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [d_date_sk#24] + : : +- *(5) Filter (d_quarter_name#38 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#24)) + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#24,d_quarter_name#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [s_store_sk#22, s_state#3] + : +- *(6) Filter isnotnull(s_store_sk#22) + : +- *(6) FileScan parquet default.store[s_store_sk#22,s_state#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [i_item_sk#20, i_item_id#1, i_item_desc#2] + +- *(7) Filter isnotnull(i_item_sk#20) + +- *(7) FileScan parquet default.item[i_item_sk#20,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt new file mode 100644 index 000000000..454790ddb --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [as_store_returns_quantityave,catalog_sales_quantitycount,i_item_id,i_item_desc,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitystdev,catalog_sales_quantitycov,s_state,store_returns_quantitycov,catalog_sales_quantityave,store_sales_quantitycov,as_store_returns_quantitystdev,as_store_returns_quantitycount,catalog_sales_quantitystdev] + WholeStageCodegen + HashAggregate [count,avg(cast(ss_quantity as bigint)),i_item_id,stddev_samp(cast(ss_quantity as double)),i_item_desc,stddev_samp(cast(sr_return_quantity as double)),m2,avg(cast(sr_return_quantity as bigint)),n,count,m2,avg,avg,s_state,count(sr_return_quantity),sum,avg,n,count,m2,count(cs_quantity),sum,stddev_samp(cast(cs_quantity as double)),avg(cast(cs_quantity as bigint)),count,n,sum,count,count(ss_quantity),count] [count,avg(cast(ss_quantity as bigint)),as_store_returns_quantityave,catalog_sales_quantitycount,stddev_samp(cast(ss_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),m2,avg(cast(sr_return_quantity as bigint)),n,count,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitystdev,m2,avg,avg,catalog_sales_quantitycov,count(sr_return_quantity),store_returns_quantitycov,sum,catalog_sales_quantityave,store_sales_quantitycov,avg,n,count,as_store_returns_quantitystdev,m2,count(cs_quantity),sum,stddev_samp(cast(cs_quantity as double)),avg(cast(cs_quantity as bigint)),count,as_store_returns_quantitycount,n,sum,count,count(ss_quantity),count,catalog_sales_quantitystdev] + InputAdapter + Exchange [i_item_id,i_item_desc,s_state] #1 + WholeStageCodegen + HashAggregate [count,m2,i_item_id,m2,i_item_desc,m2,count,avg,avg,m2,count,n,count,sum,n,sum,n,count,m2,avg,avg,s_state,avg,sum,sr_return_quantity,avg,count,count,count,n,count,m2,sum,count,ss_quantity,cs_quantity,n,sum,count,n,sum,count] [count,m2,m2,m2,count,avg,avg,m2,count,n,count,sum,n,sum,n,count,m2,avg,avg,avg,sum,avg,count,count,count,n,count,m2,sum,count,n,sum,count,n,sum,count] + Project [ss_quantity,cs_quantity,sr_return_quantity,s_state,i_item_desc,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,sr_return_quantity,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,sr_return_quantity,sr_customer_sk,sr_item_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_quarter_name,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_quarter_name,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_quarter_name,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [s_store_sk,s_state] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_sk,i_item_id,i_item_desc] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] [i_item_sk,i_item_id,i_item_desc] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt new file mode 100644 index 000000000..9c1bcddcf --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt @@ -0,0 +1,45 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ca_country#1 ASC NULLS FIRST,ca_state#2 ASC NULLS FIRST,ca_county#3 ASC NULLS FIRST,i_item_id#4 ASC NULLS FIRST], output=[i_item_id#4,ca_country#1,ca_state#2,ca_county#3,agg1#5,agg2#6,agg3#7,agg4#8,agg5#9,agg6#10,agg7#11]) ++- *(8) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[avg(cast(cs_quantity#13 as decimal(12,2))), avg(cast(cs_list_price#14 as decimal(12,2))), avg(cast(cs_coupon_amt#15 as decimal(12,2))), avg(cast(cs_sales_price#16 as decimal(12,2))), avg(cast(cs_net_profit#17 as decimal(12,2))), avg(cast(c_birth_year#18 as decimal(12,2))), avg(cast(cd_dep_count#19 as decimal(12,2)))]) + +- Exchange hashpartitioning(i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12, 200) + +- *(7) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[partial_avg(cast(cs_quantity#13 as decimal(12,2))), partial_avg(cast(cs_list_price#14 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#15 as decimal(12,2))), partial_avg(cast(cs_sales_price#16 as decimal(12,2))), partial_avg(cast(cs_net_profit#17 as decimal(12,2))), partial_avg(cast(c_birth_year#18 as decimal(12,2))), partial_avg(cast(cd_dep_count#19 as decimal(12,2)))]) + +- *(7) Expand [List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, ca_county#23, 0), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, null, 1), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, null, null, 3), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, null, null, null, 7), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, null, null, null, null, 15)], [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12] + +- *(7) Project [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#24 AS i_item_id#20, ca_country#25 AS ca_country#21, ca_state#26 AS ca_state#22, ca_county#27 AS ca_county#23] + +- *(7) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#29], Inner, BuildRight + :- *(7) Project [cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, ca_county#27, ca_state#26, ca_country#25] + : +- *(7) BroadcastHashJoin [cs_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight + : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, ca_county#27, ca_state#26, ca_country#25] + : : +- *(7) BroadcastHashJoin [c_current_addr_sk#32], [ca_address_sk#33], Inner, BuildRight + : : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_current_addr_sk#32, c_birth_year#18] + : : : +- *(7) BroadcastHashJoin [c_current_cdemo_sk#34], [cd_demo_sk#35], Inner, BuildRight + : : : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_current_cdemo_sk#34, c_current_addr_sk#32, c_birth_year#18] + : : : : +- *(7) BroadcastHashJoin [cs_bill_customer_sk#36], [c_customer_sk#37], Inner, BuildRight + : : : : :- *(7) Project [cs_sold_date_sk#30, cs_bill_customer_sk#36, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19] + : : : : : +- *(7) BroadcastHashJoin [cs_bill_cdemo_sk#38], [cd_demo_sk#39], Inner, BuildRight + : : : : : :- *(7) Project [cs_sold_date_sk#30, cs_bill_customer_sk#36, cs_bill_cdemo_sk#38, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17] + : : : : : : +- *(7) Filter (((isnotnull(cs_bill_cdemo_sk#38) && isnotnull(cs_bill_customer_sk#36)) && isnotnull(cs_sold_date_sk#30)) && isnotnull(cs_item_sk#28)) + : : : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_bill_customer_sk#36,cs_bill_cdemo_sk#38,cs_item_sk#28,cs_quantity#13,cs_list_price#14,cs_sales_price#16,cs_coupon_amt#15,cs_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNu..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [c_customer_sk#37, c_current_cdemo_sk#34, c_current_addr_sk#32, c_birth_year#18] + : : : : +- *(2) Filter (((c_birth_month#42 IN (1,6,8,9,12,2) && isnotnull(c_customer_sk#37)) && isnotnull(c_current_cdemo_sk#34)) && isnotnull(c_current_addr_sk#32)) + : : : : +- *(2) FileScan parquet default.customer[c_customer_sk#37,c_current_cdemo_sk#34,c_current_addr_sk#32,c_birth_month#42,c_birth_year#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNo..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#33, ca_county#27, ca_state#26, ca_country#25] + : : +- *(4) Filter (ca_state#26 IN (MS,IN,ND,OK,NM,VA) && isnotnull(ca_address_sk#33)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#33,ca_county#27,ca_state#26,ca_country#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [d_date_sk#31] + : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 1998)) && isnotnull(d_date_sk#31)) + : +- *(5) FileScan parquet default.date_dim[d_date_sk#31,d_year#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(6) Project [i_item_sk#29, i_item_id#24] + +- *(6) Filter isnotnull(i_item_sk#29) + +- *(6) FileScan parquet default.item[i_item_sk#29,i_item_id#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt new file mode 100644 index 000000000..d045bd95e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt @@ -0,0 +1,59 @@ +TakeOrderedAndProject [ca_country,agg7,agg6,ca_county,agg2,agg1,agg4,agg3,ca_state,agg5,i_item_id] + WholeStageCodegen + HashAggregate [ca_country,avg(cast(cs_quantity as decimal(12,2))),count,sum,avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,ca_county,avg(cast(cs_list_price as decimal(12,2))),sum,sum,count,sum,count,avg(cast(cs_net_profit as decimal(12,2))),spark_grouping_id,sum,count,ca_state,avg(cast(cd_dep_count as decimal(12,2))),count,avg(cast(cs_sales_price as decimal(12,2))),count,count,avg(cast(c_birth_year as decimal(12,2))),i_item_id] [avg(cast(cs_quantity as decimal(12,2))),count,agg7,sum,avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,agg6,avg(cast(cs_list_price as decimal(12,2))),agg2,agg1,sum,sum,count,sum,count,agg4,agg3,avg(cast(cs_net_profit as decimal(12,2))),sum,count,agg5,avg(cast(cd_dep_count as decimal(12,2))),count,avg(cast(cs_sales_price as decimal(12,2))),count,count,avg(cast(c_birth_year as decimal(12,2)))] + InputAdapter + Exchange [ca_country,ca_county,spark_grouping_id,ca_state,i_item_id] #1 + WholeStageCodegen + HashAggregate [ca_country,cs_list_price,cs_coupon_amt,sum,count,sum,count,count,sum,sum,count,sum,sum,count,sum,count,ca_county,sum,sum,count,sum,count,count,sum,sum,sum,spark_grouping_id,cs_net_profit,sum,count,ca_state,cs_sales_price,cs_quantity,cd_dep_count,count,count,c_birth_year,count,count,i_item_id] [sum,count,sum,count,count,sum,sum,count,sum,sum,count,sum,count,sum,sum,count,sum,count,count,sum,sum,sum,sum,count,count,count,count,count] + Expand [cs_list_price,cs_coupon_amt,ca_country,i_item_id,ca_county,cs_net_profit,cs_sales_price,cs_quantity,cd_dep_count,ca_state,c_birth_year] + Project [cs_list_price,cs_coupon_amt,i_item_id,ca_county,ca_country,cs_net_profit,cs_sales_price,cs_quantity,cd_dep_count,c_birth_year,ca_state] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [ca_state,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cd_dep_count,ca_county,cs_item_sk,cs_net_profit,ca_country,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ca_state,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,ca_county,cs_item_sk,cs_net_profit,ca_country,cs_list_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,c_current_addr_sk,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_coupon_amt,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cd_demo_sk,cd_dep_count] + Filter [cd_gender,cd_education_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + Scan parquet default.customer [c_current_cdemo_sk,c_current_addr_sk,c_birth_year,c_customer_sk,c_birth_month] [c_current_cdemo_sk,c_current_addr_sk,c_birth_year,c_customer_sk,c_birth_month] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk] [cd_demo_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [ca_address_sk,ca_county,ca_state,ca_country] + Filter [ca_state,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] [ca_address_sk,ca_county,ca_state,ca_country] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt new file mode 100644 index 000000000..97442ee59 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt @@ -0,0 +1,38 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand#2 ASC NULLS FIRST,brand_id#3 ASC NULLS FIRST,i_manufact_id#4 ASC NULLS FIRST,i_manufact#5 ASC NULLS FIRST], output=[brand_id#3,brand#2,i_manufact_id#4,i_manufact#5,ext_price#1]) ++- *(7) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[sum(UnscaledValue(ss_ext_sales_price#8))]) + +- Exchange hashpartitioning(i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5, 200) + +- *(6) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#8))]) + +- *(6) Project [ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5] + +- *(6) BroadcastHashJoin [ss_store_sk#9], [s_store_sk#10], Inner, BuildRight, NOT (substring(ca_zip#11, 1, 5) = substring(s_zip#12, 1, 5)) + :- *(6) Project [ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5, ca_zip#11] + : +- *(6) BroadcastHashJoin [c_current_addr_sk#13], [ca_address_sk#14], Inner, BuildRight + : :- *(6) Project [ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5, c_current_addr_sk#13] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#15], [c_customer_sk#16], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5] + : : : +- *(6) BroadcastHashJoin [ss_item_sk#17], [i_item_sk#18], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#17, ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8] + : : : : +- *(6) BroadcastHashJoin [d_date_sk#19], [ss_sold_date_sk#20], Inner, BuildRight + : : : : :- *(6) Project [d_date_sk#19] + : : : : : +- *(6) Filter ((((isnotnull(d_moy#21) && isnotnull(d_year#22)) && (d_moy#21 = 11)) && (d_year#22 = 1998)) && isnotnull(d_date_sk#19)) + : : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#19,d_year#22,d_moy#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [ss_sold_date_sk#20, ss_item_sk#17, ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8] + : : : : +- *(1) Filter (((isnotnull(ss_sold_date_sk#20) && isnotnull(ss_item_sk#17)) && isnotnull(ss_customer_sk#15)) && isnotnull(ss_store_sk#9)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_item_sk#17,ss_customer_sk#15,ss_store_sk#9,ss_ext_sales_price#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ca_address_sk#14, ca_zip#11] + : +- *(4) Filter (isnotnull(ca_address_sk#14) && isnotnull(ca_zip#11)) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#14,ca_zip#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [s_store_sk#10, s_zip#12] + +- *(5) Filter (isnotnull(s_zip#12) && isnotnull(s_store_sk#10)) + +- *(5) FileScan parquet default.store[s_store_sk#10,s_zip#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt new file mode 100644 index 000000000..c008da8ae --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt @@ -0,0 +1,50 @@ +TakeOrderedAndProject [ext_price,brand_id,i_manufact,i_manufact_id,brand] + WholeStageCodegen + HashAggregate [i_manufact,sum(UnscaledValue(ss_ext_sales_price)),i_brand,i_brand_id,i_manufact_id,sum] [ext_price,brand_id,sum(UnscaledValue(ss_ext_sales_price)),brand,sum] + InputAdapter + Exchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 + WholeStageCodegen + HashAggregate [i_manufact,i_brand,sum,ss_ext_sales_price,i_brand_id,i_manufact_id,sum] [sum,sum] + Project [i_manufact,i_manufact_id,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_store_sk,s_store_sk,ca_zip,s_zip] + Project [i_manufact,i_manufact_id,ca_zip,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,i_manufact,i_manufact_id,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [i_manufact,i_manufact_id,ss_store_sk,ss_customer_sk,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_manufact,i_manufact_id,i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + Scan parquet default.item [i_manufact,i_manufact_id,i_item_sk,i_manager_id,i_brand_id,i_brand] [i_manufact,i_manufact_id,i_item_sk,i_manager_id,i_brand_id,i_brand] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [ca_address_sk,ca_zip] + Filter [ca_address_sk,ca_zip] + Scan parquet default.customer_address [ca_address_sk,ca_zip] [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [s_store_sk,s_zip] + Filter [s_zip,s_store_sk] + Scan parquet default.store [s_store_sk,s_zip] [s_store_sk,s_zip] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt new file mode 100644 index 000000000..aff2e2bdb --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt @@ -0,0 +1,36 @@ +== Physical Plan == +*(13) Sort [d_week_seq1#1 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(d_week_seq1#1 ASC NULLS FIRST, 200) + +- *(12) Project [d_week_seq1#1, round(CheckOverflow((promote_precision(sun_sales1#2) / promote_precision(sun_sales2#3)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#4, round(CheckOverflow((promote_precision(mon_sales1#5) / promote_precision(mon_sales2#6)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#7, round(CheckOverflow((promote_precision(tue_sales1#8) / promote_precision(tue_sales2#9)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#10, round(CheckOverflow((promote_precision(wed_sales1#11) / promote_precision(wed_sales2#12)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#13, round(CheckOverflow((promote_precision(thu_sales1#14) / promote_precision(thu_sales2#15)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#16, round(CheckOverflow((promote_precision(fri_sales1#17) / promote_precision(fri_sales2#18)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#19, round(CheckOverflow((promote_precision(sat_sales1#20) / promote_precision(sat_sales2#21)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#22] + +- *(12) BroadcastHashJoin [d_week_seq1#1], [(d_week_seq2#23 - 53)], Inner, BuildRight + :- *(12) Project [d_week_seq#24 AS d_week_seq1#1, sun_sales#25 AS sun_sales1#2, mon_sales#26 AS mon_sales1#5, tue_sales#27 AS tue_sales1#8, wed_sales#28 AS wed_sales1#11, thu_sales#29 AS thu_sales1#14, fri_sales#30 AS fri_sales1#17, sat_sales#31 AS sat_sales1#20] + : +- *(12) BroadcastHashJoin [d_week_seq#24], [d_week_seq#32], Inner, BuildRight + : :- *(12) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) + : : +- Exchange hashpartitioning(d_week_seq#24, 200) + : : +- *(4) HashAggregate(keys=[d_week_seq#24], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) + : : +- *(4) Project [sales_price#34, d_week_seq#24, d_day_name#33] + : : +- *(4) BroadcastHashJoin [sold_date_sk#35], [d_date_sk#36], Inner, BuildRight + : : :- Union + : : : :- *(1) Project [ws_sold_date_sk#37 AS sold_date_sk#35, ws_ext_sales_price#38 AS sales_price#34] + : : : : +- *(1) Filter isnotnull(ws_sold_date_sk#37) + : : : : +- *(1) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- *(2) Project [cs_sold_date_sk#39 AS sold_date_sk#40, cs_ext_sales_price#41 AS sales_price#42] + : : : +- *(2) Filter isnotnull(cs_sold_date_sk#39) + : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#39,cs_ext_sales_price#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#36, d_week_seq#24, d_day_name#33] + : : +- *(3) Filter (isnotnull(d_date_sk#36) && isnotnull(d_week_seq#24)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#36,d_week_seq#24,d_day_name#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [d_week_seq#32] + : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 2001)) && isnotnull(d_week_seq#32)) + : +- *(5) FileScan parquet default.date_dim[d_week_seq#32,d_year#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint))) + +- *(11) Project [d_week_seq#24 AS d_week_seq2#23, sun_sales#25 AS sun_sales2#3, mon_sales#26 AS mon_sales2#6, tue_sales#27 AS tue_sales2#9, wed_sales#28 AS wed_sales2#12, thu_sales#29 AS thu_sales2#15, fri_sales#30 AS fri_sales2#18, sat_sales#31 AS sat_sales2#21] + +- *(11) BroadcastHashJoin [d_week_seq#24], [d_week_seq#44], Inner, BuildRight + :- *(11) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) + : +- ReusedExchange [d_week_seq#24, sum#45, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51], Exchange hashpartitioning(d_week_seq#24, 200) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(10) Project [d_week_seq#44] + +- *(10) Filter ((isnotnull(d_year#52) && (d_year#52 = 2002)) && isnotnull(d_week_seq#44)) + +- *(10) FileScan parquet default.date_dim[d_week_seq#44,d_year#52] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt new file mode 100644 index 000000000..a3121688c --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt @@ -0,0 +1,52 @@ +WholeStageCodegen + Sort [d_week_seq1] + InputAdapter + Exchange [d_week_seq1] #1 + WholeStageCodegen + Project [wed_sales2,mon_sales1,sat_sales2,d_week_seq1,fri_sales2,thu_sales2,sat_sales1,tue_sales1,sun_sales2,sun_sales1,tue_sales2,wed_sales1,mon_sales2,thu_sales1,fri_sales1] + BroadcastHashJoin [d_week_seq1,d_week_seq2] + Project [sun_sales,thu_sales,sat_sales,wed_sales,fri_sales,tue_sales,d_week_seq,mon_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),mon_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum,thu_sales,sum,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum,sat_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum,wed_sales] + InputAdapter + Exchange [d_week_seq] #2 + WholeStageCodegen + HashAggregate [d_day_name,sum,sum,sum,sum,sum,sum,sum,d_week_seq,sum,sum,sum,sum,sales_price,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [sold_date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen + Project [ws_sold_date_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_ext_sales_price] + WholeStageCodegen + Project [cs_sold_date_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_week_seq,d_day_name] + Filter [d_date_sk,d_week_seq] + Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_week_seq] + Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [sun_sales,thu_sales,sat_sales,wed_sales,fri_sales,tue_sales,d_week_seq,mon_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),mon_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),thu_sales,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sun_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sat_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum,wed_sales] + InputAdapter + ReusedExchange [sum,sum,sum,sum,d_week_seq,sum,sum,sum] [sum,sum,sum,sum,d_week_seq,sum,sum,sum] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_week_seq] + Filter [d_year,d_week_seq] + Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt new file mode 100644 index 000000000..186e8daef --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt @@ -0,0 +1,24 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 ASC NULLS FIRST,i_item_id#3 ASC NULLS FIRST,i_item_desc#4 ASC NULLS FIRST,revenueratio#5 ASC NULLS FIRST], output=[i_item_desc#4,i_category#1,i_class#2,i_current_price#6,itemrevenue#7,revenueratio#5]) ++- *(6) Project [i_item_desc#4, i_category#1, i_class#2, i_current_price#6, itemrevenue#7, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#5, i_item_id#3] + +- Window [sum(_w1#10) windowspecdefinition(i_class#2, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#2] + +- *(5) Sort [i_class#2 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_class#2, 200) + +- *(4) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[sum(UnscaledValue(cs_ext_sales_price#11))]) + +- Exchange hashpartitioning(i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6, 200) + +- *(3) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#11))]) + +- *(3) Project [cs_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + +- *(3) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + :- *(3) Project [cs_sold_date_sk#12, cs_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + : +- *(3) BroadcastHashJoin [cs_item_sk#14], [i_item_sk#15], Inner, BuildRight + : :- *(3) Project [cs_sold_date_sk#12, cs_item_sk#14, cs_ext_sales_price#11] + : : +- *(3) Filter (isnotnull(cs_item_sk#14) && isnotnull(cs_sold_date_sk#12)) + : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_item_sk#14,cs_ext_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) + : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt new file mode 100644 index 000000000..ebfef4216 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,revenueratio,i_category,i_current_price,i_class] + WholeStageCodegen + Project [i_item_id,i_item_desc,itemrevenue,_we0,i_category,_w0,i_current_price,i_class] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen + Sort [i_class] + InputAdapter + Exchange [i_class] #1 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum(UnscaledValue(cs_ext_sales_price)),i_category,sum,i_current_price,i_class] [itemrevenue,sum(UnscaledValue(cs_ext_sales_price)),_w1,sum,_w0] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #2 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum,i_category,sum,i_current_price,cs_ext_sales_price,i_class] [sum,sum] + Project [i_class,i_current_price,i_category,i_item_desc,cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [i_class,i_current_price,cs_sold_date_sk,i_category,i_item_desc,cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Filter [i_category,i_item_sk] + Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt new file mode 100644 index 000000000..2950af5ff --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt @@ -0,0 +1,27 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST,i_item_id#2 ASC NULLS FIRST], output=[w_warehouse_name#1,i_item_id#2,inv_before#3,inv_after#4]) ++- *(5) Filter ((CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END >= 0.666667) && (CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END <= 1.5)) + +- *(5) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(w_warehouse_name#1, i_item_id#2, 200) + +- *(4) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[partial_sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) + +- *(4) Project [inv_quantity_on_hand#6, w_warehouse_name#1, i_item_id#2, d_date#5] + +- *(4) BroadcastHashJoin [inv_date_sk#7], [d_date_sk#8], Inner, BuildRight + :- *(4) Project [inv_date_sk#7, inv_quantity_on_hand#6, w_warehouse_name#1, i_item_id#2] + : +- *(4) BroadcastHashJoin [inv_item_sk#9], [i_item_sk#10], Inner, BuildRight + : :- *(4) Project [inv_date_sk#7, inv_item_sk#9, inv_quantity_on_hand#6, w_warehouse_name#1] + : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#11], [w_warehouse_sk#12], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#7, inv_item_sk#9, inv_warehouse_sk#11, inv_quantity_on_hand#6] + : : : +- *(4) Filter ((isnotnull(inv_warehouse_sk#11) && isnotnull(inv_item_sk#9)) && isnotnull(inv_date_sk#7)) + : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#7,inv_item_sk#9,inv_warehouse_sk#11,inv_quantity_on_hand#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [w_warehouse_sk#12, w_warehouse_name#1] + : : +- *(1) Filter isnotnull(w_warehouse_sk#12) + : : +- *(1) FileScan parquet default.warehouse[w_warehouse_sk#12,w_warehouse_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [i_item_sk#10, i_item_id#2] + : +- *(2) Filter (((isnotnull(i_current_price#13) && (i_current_price#13 >= 0.99)) && (i_current_price#13 <= 1.49)) && isnotnull(i_item_sk#10)) + : +- *(2) FileScan parquet default.item[i_item_sk#10,i_item_id#2,i_current_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [d_date_sk#8, d_date#5] + +- *(3) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#8)) + +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_date#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt new file mode 100644 index 000000000..b74976c2f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt @@ -0,0 +1,35 @@ +TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] + WholeStageCodegen + Filter [inv_before,inv_after] + HashAggregate [i_item_id,sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,w_warehouse_name,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum] [sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_before,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_after] + InputAdapter + Exchange [w_warehouse_name,i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,sum,d_date,w_warehouse_name,sum,inv_quantity_on_hand,sum,sum] [sum,sum,sum,sum] + Project [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_date_sk,inv_quantity_on_hand,w_warehouse_name,i_item_id] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_warehouse_sk,inv_item_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_current_price,i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id,i_current_price] [i_item_sk,i_item_id,i_current_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt new file mode 100644 index 000000000..2a265ae4f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt @@ -0,0 +1,27 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[qoh#1 ASC NULLS FIRST,i_product_name#2 ASC NULLS FIRST,i_brand#3 ASC NULLS FIRST,i_class#4 ASC NULLS FIRST,i_category#5 ASC NULLS FIRST], output=[i_product_name#2,i_brand#3,i_class#4,i_category#5,qoh#1]) ++- *(5) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[avg(cast(inv_quantity_on_hand#7 as bigint))]) + +- Exchange hashpartitioning(i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6, 200) + +- *(4) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[partial_avg(cast(inv_quantity_on_hand#7 as bigint))]) + +- *(4) Expand [List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, i_category#11, 0), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, null, 1), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, null, null, 3), List(inv_quantity_on_hand#7, i_product_name#8, null, null, null, 7), List(inv_quantity_on_hand#7, null, null, null, null, 15)], [inv_quantity_on_hand#7, i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6] + +- *(4) Project [inv_quantity_on_hand#7, i_product_name#12 AS i_product_name#8, i_brand#13 AS i_brand#9, i_class#14 AS i_class#10, i_category#15 AS i_category#11] + +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#17], Inner, BuildRight + :- *(4) Project [inv_warehouse_sk#16, inv_quantity_on_hand#7, i_brand#13, i_class#14, i_category#15, i_product_name#12] + : +- *(4) BroadcastHashJoin [inv_item_sk#18], [i_item_sk#19], Inner, BuildRight + : :- *(4) Project [inv_item_sk#18, inv_warehouse_sk#16, inv_quantity_on_hand#7] + : : +- *(4) BroadcastHashJoin [inv_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#20, inv_item_sk#18, inv_warehouse_sk#16, inv_quantity_on_hand#7] + : : : +- *(4) Filter ((isnotnull(inv_date_sk#20) && isnotnull(inv_item_sk#18)) && isnotnull(inv_warehouse_sk#16)) + : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#20,inv_item_sk#18,inv_warehouse_sk#16,inv_quantity_on_hand#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#21] + : : +- *(1) Filter (((isnotnull(d_month_seq#22) && (d_month_seq#22 >= 1200)) && (d_month_seq#22 <= 1211)) && isnotnull(d_date_sk#21)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_month_seq#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [i_item_sk#19, i_brand#13, i_class#14, i_category#15, i_product_name#12] + : +- *(2) Filter isnotnull(i_item_sk#19) + : +- *(2) FileScan parquet default.item[i_item_sk#19,i_brand#13,i_class#14,i_category#15,i_product_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [w_warehouse_sk#17] + +- *(3) Filter isnotnull(w_warehouse_sk#17) + +- *(3) FileScan parquet default.warehouse[w_warehouse_sk#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt new file mode 100644 index 000000000..49b575687 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt @@ -0,0 +1,35 @@ +TakeOrderedAndProject [i_category,i_class,i_product_name,i_brand,qoh] + WholeStageCodegen + HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),i_category,count,spark_grouping_id,sum,i_class,i_product_name,i_brand] [avg(cast(inv_quantity_on_hand as bigint)),qoh,sum,count] + InputAdapter + Exchange [i_category,spark_grouping_id,i_class,i_product_name,i_brand] #1 + WholeStageCodegen + HashAggregate [sum,i_category,count,count,spark_grouping_id,sum,i_class,i_product_name,inv_quantity_on_hand,i_brand] [sum,count,sum,count] + Expand [i_product_name,i_class,i_brand,i_category,inv_quantity_on_hand] + Project [i_brand,i_category,inv_quantity_on_hand,i_class,i_product_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [i_class,inv_quantity_on_hand,inv_warehouse_sk,i_product_name,i_category,i_brand] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_class,i_item_sk,i_product_name,i_category,i_brand] + Filter [i_item_sk] + Scan parquet default.item [i_class,i_item_sk,i_product_name,i_category,i_brand] [i_class,i_item_sk,i_product_name,i_category,i_brand] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [w_warehouse_sk] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk] [w_warehouse_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt new file mode 100644 index 000000000..5dbd09984 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt @@ -0,0 +1,89 @@ +== Physical Plan == +CollectLimit 100 ++- *(20) HashAggregate(keys=[], functions=[sum(sales#1)]) + +- Exchange SinglePartition + +- *(19) HashAggregate(keys=[], functions=[partial_sum(sales#1)]) + +- Union + :- *(9) Project [CheckOverflow((promote_precision(cast(cast(cs_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#3 as decimal(12,2)))), DecimalType(18,2)) AS sales#1] + : +- *(9) BroadcastHashJoin [cs_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + : :- *(9) Project [cs_sold_date_sk#4, cs_quantity#2, cs_list_price#3] + : : +- *(9) BroadcastHashJoin [cs_bill_customer_sk#6], [c_customer_sk#7], LeftSemi, BuildRight + : : :- *(9) Project [cs_sold_date_sk#4, cs_bill_customer_sk#6, cs_quantity#2, cs_list_price#3] + : : : +- *(9) BroadcastHashJoin [cs_item_sk#8], [item_sk#9], LeftSemi, BuildRight + : : : :- *(9) Project [cs_sold_date_sk#4, cs_bill_customer_sk#6, cs_item_sk#8, cs_quantity#2, cs_list_price#3] + : : : : +- *(9) Filter isnotnull(cs_sold_date_sk#4) + : : : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#6,cs_item_sk#8,cs_quantity#2,cs_list_price#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct 4) + : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[count(1)]) + : : : +- Exchange hashpartitioning(substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14, 200) + : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#11, 1, 30) AS substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[partial_count(1)]) + : : : +- *(3) Project [d_date#14, i_item_sk#13, i_item_desc#11] + : : : +- *(3) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#13], Inner, BuildRight + : : : :- *(3) Project [ss_item_sk#15, d_date#14] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight + : : : : :- *(3) Project [ss_sold_date_sk#16, ss_item_sk#15] + : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#16) && isnotnull(ss_item_sk#15)) + : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_item_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#5, d_date#14] + : : : : +- *(1) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#5,d_date#14,d_year#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [i_item_sk#13, i_item_desc#11] + : : : +- *(2) Filter isnotnull(i_item_sk#13) + : : : +- *(2) FileScan parquet default.item[i_item_sk#13,i_item_desc#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(7) Project [c_customer_sk#7] + : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3052 as decimal(32,6)))), DecimalType(38,8)))) + : : : +- Subquery subquery3052 + : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#21)]) + : : : +- Exchange SinglePartition + : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#21)]) + : : : +- *(4) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- Exchange hashpartitioning(c_customer_sk#7, 200) + : : : +- *(3) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- *(3) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#16, ss_quantity#18, ss_sales_price#19, c_customer_sk#7] + : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight + : : : : :- *(3) Project [ss_sold_date_sk#16, ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] + : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#22) && isnotnull(ss_sold_date_sk#16)) + : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [c_customer_sk#7] + : : : : +- *(1) Filter isnotnull(c_customer_sk#7) + : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#5] + : : : +- *(2) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#5,d_year#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(7) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange hashpartitioning(c_customer_sk#7, 200) + : : +- *(6) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- *(6) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] + : : : +- *(6) Filter isnotnull(ss_customer_sk#22) + : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [c_customer_sk#7] + : : +- *(5) Filter isnotnull(c_customer_sk#7) + : : +- *(5) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(8) Project [d_date_sk#5] + : +- *(8) Filter ((((isnotnull(d_year#17) && isnotnull(d_moy#23)) && (d_year#17 = 2000)) && (d_moy#23 = 2)) && isnotnull(d_date_sk#5)) + : +- *(8) FileScan parquet default.date_dim[d_date_sk#5,d_year#17,d_moy#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + +- *(18) Project [CheckOverflow((promote_precision(cast(cast(ws_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#25 as decimal(12,2)))), DecimalType(18,2)) AS sales#26] + +- *(18) BroadcastHashJoin [ws_sold_date_sk#27], [d_date_sk#5], Inner, BuildRight + :- *(18) Project [ws_sold_date_sk#27, ws_quantity#24, ws_list_price#25] + : +- *(18) BroadcastHashJoin [ws_bill_customer_sk#28], [c_customer_sk#7], LeftSemi, BuildRight + : :- *(18) Project [ws_sold_date_sk#27, ws_bill_customer_sk#28, ws_quantity#24, ws_list_price#25] + : : +- *(18) BroadcastHashJoin [ws_item_sk#29], [item_sk#9], LeftSemi, BuildRight + : : :- *(18) Project [ws_sold_date_sk#27, ws_item_sk#29, ws_bill_customer_sk#28, ws_quantity#24, ws_list_price#25] + : : : +- *(18) Filter isnotnull(ws_sold_date_sk#27) + : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#27,ws_item_sk#29,ws_bill_customer_sk#28,ws_quantity#24,ws_list_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct 4) + : : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[count(1)]) + : : : : +- Exchange hashpartitioning(substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17, 200) + : : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#14, 1, 30) AS substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[partial_count(1)]) + : : : : +- *(3) Project [d_date#17, i_item_sk#16, i_item_desc#14] + : : : : +- *(3) BroadcastHashJoin [ss_item_sk#18], [i_item_sk#16], Inner, BuildRight + : : : : :- *(3) Project [ss_item_sk#18, d_date#17] + : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight + : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_item_sk#18] + : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#19) && isnotnull(ss_item_sk#18)) + : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(1) Project [d_date_sk#7, d_date#17] + : : : : : +- *(1) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) + : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#7,d_date#17,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [i_item_sk#16, i_item_desc#14] + : : : : +- *(2) Filter isnotnull(i_item_sk#16) + : : : : +- *(2) FileScan parquet default.item[i_item_sk#16,i_item_desc#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(7) Project [c_customer_sk#9 AS c_customer_sk#9#10] + : : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3144 as decimal(32,6)))), DecimalType(38,8)))) + : : : : +- Subquery subquery3144 + : : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#24)]) + : : : : +- Exchange SinglePartition + : : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#24)]) + : : : : +- *(4) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : : +- Exchange hashpartitioning(c_customer_sk#9, 200) + : : : : +- *(3) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : : +- *(3) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight + : : : : :- *(3) Project [ss_sold_date_sk#19, ss_quantity#21, ss_sales_price#22, c_customer_sk#9] + : : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight + : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] + : : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#25) && isnotnull(ss_sold_date_sk#19)) + : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(1) Project [c_customer_sk#9] + : : : : : +- *(1) Filter isnotnull(c_customer_sk#9) + : : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [d_date_sk#7] + : : : : +- *(2) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(7) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- Exchange hashpartitioning(c_customer_sk#9, 200) + : : : +- *(6) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- *(6) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] + : : : +- *(6) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight + : : : :- *(6) Project [ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] + : : : : +- *(6) Filter isnotnull(ss_customer_sk#25) + : : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(5) Project [c_customer_sk#9] + : : : +- *(5) Filter isnotnull(c_customer_sk#9) + : : : +- *(5) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(11) BroadcastHashJoin [c_customer_sk#9], [c_customer_sk#9#10], LeftSemi, BuildRight + : : :- *(11) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] + : : : +- *(11) Filter isnotnull(c_customer_sk#9) + : : : +- *(11) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- ReusedExchange [c_customer_sk#9#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(12) Project [d_date_sk#7] + : +- *(12) Filter ((((isnotnull(d_year#20) && isnotnull(d_moy#26)) && (d_year#20 = 2000)) && (d_moy#26 = 2)) && isnotnull(d_date_sk#7)) + : +- *(12) FileScan parquet default.date_dim[d_date_sk#7,d_year#20,d_moy#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + +- *(28) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 200) + +- *(27) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) + +- *(27) Project [ws_quantity#27, ws_list_price#28, c_first_name#2, c_last_name#1] + +- *(27) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#7], Inner, BuildRight + :- *(27) Project [ws_sold_date_sk#29, ws_quantity#27, ws_list_price#28, c_first_name#2, c_last_name#1] + : +- *(27) BroadcastHashJoin [ws_bill_customer_sk#30], [c_customer_sk#9], Inner, BuildRight + : :- *(27) BroadcastHashJoin [ws_bill_customer_sk#30], [c_customer_sk#9#31], LeftSemi, BuildRight + : : :- *(27) Project [ws_sold_date_sk#29, ws_bill_customer_sk#30, ws_quantity#27, ws_list_price#28] + : : : +- *(27) BroadcastHashJoin [ws_item_sk#32], [item_sk#12], LeftSemi, BuildRight + : : : :- *(27) Project [ws_sold_date_sk#29, ws_item_sk#32, ws_bill_customer_sk#30, ws_quantity#27, ws_list_price#28] + : : : : +- *(27) Filter (isnotnull(ws_bill_customer_sk#30) && isnotnull(ws_sold_date_sk#29)) + : : : : +- *(27) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_item_sk#32,ws_bill_customer_sk#30,ws_quantity#27,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct cast(Subquery subquery3246 as decimal(33,8)))) + : +- Subquery subquery3246 + : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) + : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + : :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(3) Filter isnotnull(i_item_sk#22) + : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + : +- *(5) Filter isnotnull(ca_zip#18) + : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct + +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 200) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = pale)) && isnotnull(i_item_sk#22)) + : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + +- *(5) Filter isnotnull(ca_zip#18) + +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt new file mode 100644 index 000000000..08aae40a6 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt @@ -0,0 +1,111 @@ +WholeStageCodegen + Project [c_last_name,c_first_name,s_store_name,paid] + Filter [sum(netpaid)] + Subquery #1 + WholeStageCodegen + HashAggregate [sum,count,avg(netpaid)] [avg(netpaid),(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),sum,count] + InputAdapter + Exchange #8 + WholeStageCodegen + HashAggregate [sum,count,count,netpaid,sum] [sum,count,sum,count] + HashAggregate [i_units,i_color,i_size,s_state,sum(UnscaledValue(ss_net_paid)),sum,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #9 + WholeStageCodegen + HashAggregate [i_units,i_color,i_size,ss_net_paid,s_state,sum,i_manager_id,c_last_name,sum,i_current_price,c_first_name,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Filter [i_item_sk] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Filter [c_customer_sk,c_birth_country] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [ca_state,ca_zip,ca_country] + Filter [ca_zip] + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] + HashAggregate [c_first_name,s_store_name,c_last_name,sum(netpaid),sum] [sum(netpaid),paid,sum(netpaid),sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen + HashAggregate [c_first_name,s_store_name,c_last_name,sum,netpaid,sum] [sum,sum] + HashAggregate [i_units,i_color,i_size,sum,s_state,sum(UnscaledValue(ss_net_paid)),i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #2 + WholeStageCodegen + HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,sum,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Filter [i_color,i_item_sk] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Filter [c_customer_sk,c_birth_country] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [ca_state,ca_zip,ca_country] + Filter [ca_zip] + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt new file mode 100644 index 000000000..fb2c9c939 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt @@ -0,0 +1,82 @@ +== Physical Plan == +*(8) Project [c_last_name#1, c_first_name#2, s_store_name#3, paid#4] ++- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery3290 as decimal(33,8)))) + : +- Subquery subquery3290 + : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) + : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + : :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(3) Filter isnotnull(i_item_sk#22) + : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + : +- *(5) Filter isnotnull(ca_zip#18) + : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct + +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 200) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = chiffon)) && isnotnull(i_item_sk#22)) + : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + +- *(5) Filter isnotnull(ca_zip#18) + +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt new file mode 100644 index 000000000..87db312c3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt @@ -0,0 +1,111 @@ +WholeStageCodegen + Project [c_last_name,c_first_name,s_store_name,paid] + Filter [sum(netpaid)] + Subquery #1 + WholeStageCodegen + HashAggregate [sum,count,avg(netpaid)] [avg(netpaid),(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),sum,count] + InputAdapter + Exchange #8 + WholeStageCodegen + HashAggregate [count,sum,count,netpaid,sum] [sum,count,sum,count] + HashAggregate [i_units,sum,i_color,i_size,s_state,i_manager_id,c_last_name,sum(UnscaledValue(ss_net_paid)),i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #9 + WholeStageCodegen + HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,sum,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Filter [i_item_sk] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Filter [c_customer_sk,c_birth_country] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [ca_state,ca_zip,ca_country] + Filter [ca_zip] + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] + HashAggregate [c_first_name,s_store_name,c_last_name,sum(netpaid),sum] [sum(netpaid),paid,sum(netpaid),sum] + InputAdapter + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen + HashAggregate [c_first_name,s_store_name,c_last_name,netpaid,sum,sum] [sum,sum] + HashAggregate [i_units,sum,i_color,i_size,s_state,i_manager_id,c_last_name,sum(UnscaledValue(ss_net_paid)),i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + InputAdapter + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #2 + WholeStageCodegen + HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,sum,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_state,s_zip] + Filter [s_market_id,s_store_sk,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Filter [i_color,i_item_sk] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Filter [c_customer_sk,c_birth_country] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [ca_state,ca_zip,ca_country] + Filter [ca_zip] + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt new file mode 100644 index 000000000..0548dd397 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt @@ -0,0 +1,50 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_store_id#3 ASC NULLS FIRST,s_store_name#4 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_store_id#3,s_store_name#4,store_sales_profit#5,store_returns_loss#6,catalog_sales_profit#7]) ++- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[sum(UnscaledValue(ss_net_profit#8)), sum(UnscaledValue(sr_net_loss#9)), sum(UnscaledValue(cs_net_profit#10))]) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4, 200) + +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[partial_sum(UnscaledValue(ss_net_profit#8)), partial_sum(UnscaledValue(sr_net_loss#9)), partial_sum(UnscaledValue(cs_net_profit#10))]) + +- *(8) Project [ss_net_profit#8, sr_net_loss#9, cs_net_profit#10, s_store_id#3, s_store_name#4, i_item_id#1, i_item_desc#2] + +- *(8) BroadcastHashJoin [ss_item_sk#11], [i_item_sk#12], Inner, BuildRight + :- *(8) Project [ss_item_sk#11, ss_net_profit#8, sr_net_loss#9, cs_net_profit#10, s_store_id#3, s_store_name#4] + : +- *(8) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight + : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_net_loss#9, cs_net_profit#10] + : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#15], [d_date_sk#16], Inner, BuildRight + : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] + : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#17], [cast(d_date_sk#18 as bigint)], Inner, BuildRight + : : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] + : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] + : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#21, sr_item_sk#22], [cast(cs_bill_customer_sk#23 as bigint), cast(cs_item_sk#24 as bigint)], Inner, BuildRight + : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_item_sk#22, sr_customer_sk#21, sr_net_loss#9] + : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#25 as bigint), cast(ss_item_sk#11 as bigint), cast(ss_ticket_number#26 as bigint)], [sr_customer_sk#21, sr_item_sk#22, sr_ticket_number#27], Inner, BuildRight + : : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_customer_sk#25, ss_store_sk#13, ss_ticket_number#26, ss_net_profit#8] + : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#26) && isnotnull(ss_item_sk#11)) && isnotnull(ss_customer_sk#25)) && isnotnull(ss_sold_date_sk#19)) && isnotnull(ss_store_sk#13)) + : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_net_profit#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#20] + : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 4)) && (d_year#29 = 2001)) && isnotnull(d_date_sk#20)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [d_date_sk#18] + : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 4)) && (d_moy#31 <= 10)) && (d_year#30 = 2001)) && isnotnull(d_date_sk#18)) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [d_date_sk#16] + : : +- *(5) Filter (((((isnotnull(d_year#32) && isnotnull(d_moy#33)) && (d_moy#33 >= 4)) && (d_moy#33 <= 10)) && (d_year#32 = 2001)) && isnotnull(d_date_sk#16)) + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32,d_moy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] + : +- *(6) Filter isnotnull(s_store_sk#14) + : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] + +- *(7) Filter isnotnull(i_item_sk#12) + +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt new file mode 100644 index 000000000..20dec06b1 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,store_sales_profit,catalog_sales_profit,s_store_id,store_returns_loss,s_store_name] + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum,sum,s_store_id,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit)),s_store_name,sum] [sum,store_sales_profit,sum,catalog_sales_profit,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),store_returns_loss,sum(UnscaledValue(ss_net_profit)),sum] + InputAdapter + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum,sum,s_store_id,cs_net_profit,sum,sum,sum,s_store_name,sum,sr_net_loss,ss_net_profit] [sum,sum,sum,sum,sum,sum] + Project [s_store_id,s_store_name,ss_net_profit,cs_net_profit,i_item_desc,sr_net_loss,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_id,s_store_name,ss_item_sk,ss_net_profit,cs_net_profit,sr_net_loss] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,cs_net_profit,sr_net_loss] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,sr_net_loss] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,sr_returned_date_sk,sr_net_loss] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,ss_sold_date_sk,sr_returned_date_sk,sr_net_loss] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_item_sk,ss_store_sk,sr_customer_sk,sr_item_sk,ss_net_profit,ss_sold_date_sk,sr_returned_date_sk,sr_net_loss] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [s_store_sk,s_store_id,s_store_name] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id,s_store_name] [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_sk,i_item_id,i_item_desc] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] [i_item_sk,i_item_id,i_item_desc] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt new file mode 100644 index 000000000..ac72aff5b --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt @@ -0,0 +1,32 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,agg1#2,agg2#3,agg3#4,agg4#5]) ++- *(6) HashAggregate(keys=[i_item_id#1], functions=[avg(cast(cs_quantity#6 as bigint)), avg(UnscaledValue(cs_list_price#7)), avg(UnscaledValue(cs_coupon_amt#8)), avg(UnscaledValue(cs_sales_price#9))]) + +- Exchange hashpartitioning(i_item_id#1, 200) + +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_avg(cast(cs_quantity#6 as bigint)), partial_avg(UnscaledValue(cs_list_price#7)), partial_avg(UnscaledValue(cs_coupon_amt#8)), partial_avg(UnscaledValue(cs_sales_price#9))]) + +- *(5) Project [cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8, i_item_id#1] + +- *(5) BroadcastHashJoin [cs_promo_sk#10], [p_promo_sk#11], Inner, BuildRight + :- *(5) Project [cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8, i_item_id#1] + : +- *(5) BroadcastHashJoin [cs_item_sk#12], [i_item_sk#13], Inner, BuildRight + : :- *(5) Project [cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] + : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#14, cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] + : : : +- *(5) BroadcastHashJoin [cs_bill_cdemo_sk#16], [cd_demo_sk#17], Inner, BuildRight + : : : :- *(5) Project [cs_sold_date_sk#14, cs_bill_cdemo_sk#16, cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] + : : : : +- *(5) Filter (((isnotnull(cs_bill_cdemo_sk#16) && isnotnull(cs_sold_date_sk#14)) && isnotnull(cs_item_sk#12)) && isnotnull(cs_promo_sk#10)) + : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#14,cs_bill_cdemo_sk#16,cs_item_sk#12,cs_promo_sk#10,cs_quantity#6,cs_list_price#7,cs_sales_price#9,cs_coupon_amt#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_pro..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#15] + : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_item_sk#13, i_item_id#1] + : +- *(3) Filter isnotnull(i_item_sk#13) + : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [p_promo_sk#11] + +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) + +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt new file mode 100644 index 000000000..da9dc6cdf --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [i_item_id,agg3,agg1,agg2,agg4] + WholeStageCodegen + HashAggregate [i_item_id,count,avg(UnscaledValue(cs_list_price)),sum,sum,count,sum,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_coupon_amt)),count,avg(cast(cs_quantity as bigint)),sum,count] [count,avg(UnscaledValue(cs_list_price)),sum,sum,count,sum,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_coupon_amt)),agg3,count,agg1,agg2,agg4,avg(cast(cs_quantity as bigint)),sum,count] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [cs_list_price,cs_coupon_amt,i_item_id,count,count,sum,sum,count,sum,sum,count,count,count,sum,sum,sum,count,cs_sales_price,cs_quantity,sum,count] [count,count,sum,sum,count,sum,sum,count,count,count,sum,sum,sum,count,sum,count] + Project [cs_coupon_amt,cs_quantity,cs_sales_price,cs_list_price,i_item_id] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_list_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_item_sk,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] + Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_item_sk,cs_promo_sk] + Scan parquet default.catalog_sales [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event] [p_promo_sk,p_channel_email,p_channel_event] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt new file mode 100644 index 000000000..8d5a61224 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt @@ -0,0 +1,33 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,s_state#2 ASC NULLS FIRST], output=[i_item_id#1,s_state#2,g_state#3,agg1#4,agg2#5,agg3#6,agg4#7]) ++- *(6) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[avg(cast(ss_quantity#9 as bigint)), avg(UnscaledValue(ss_list_price#10)), avg(UnscaledValue(ss_coupon_amt#11)), avg(UnscaledValue(ss_sales_price#12))]) + +- Exchange hashpartitioning(i_item_id#1, s_state#2, spark_grouping_id#8, 200) + +- *(5) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[partial_avg(cast(ss_quantity#9 as bigint)), partial_avg(UnscaledValue(ss_list_price#10)), partial_avg(UnscaledValue(ss_coupon_amt#11)), partial_avg(UnscaledValue(ss_sales_price#12))]) + +- *(5) Expand [List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, s_state#14, 0), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, null, 1), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, null, null, 3)], [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#1, s_state#2, spark_grouping_id#8] + +- *(5) Project [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#15 AS i_item_id#13, s_state#16 AS s_state#14] + +- *(5) BroadcastHashJoin [ss_item_sk#17], [i_item_sk#18], Inner, BuildRight + :- *(5) Project [ss_item_sk#17, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, s_state#16] + : +- *(5) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#20], Inner, BuildRight + : :- *(5) Project [ss_item_sk#17, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] + : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : : :- *(5) Project [ss_sold_date_sk#21, ss_item_sk#17, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] + : : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#23], [cd_demo_sk#24], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#21, ss_item_sk#17, ss_cdemo_sk#23, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] + : : : : +- *(5) Filter (((isnotnull(ss_cdemo_sk#23) && isnotnull(ss_sold_date_sk#21)) && isnotnull(ss_store_sk#19)) && isnotnull(ss_item_sk#17)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_item_sk#17,ss_cdemo_sk#23,ss_store_sk#19,ss_quantity#9,ss_list_price#10,ss_sales_price#12,ss_coupon_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#22] + : : +- *(2) Filter ((isnotnull(d_year#28) && (d_year#28 = 2002)) && isnotnull(d_date_sk#22)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#22,d_year#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [s_store_sk#20, s_state#16] + : +- *(3) Filter ((isnotnull(s_state#16) && (s_state#16 = TN)) && isnotnull(s_store_sk#20)) + : +- *(3) FileScan parquet default.store[s_store_sk#20,s_state#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [i_item_sk#18, i_item_id#15] + +- *(4) Filter isnotnull(i_item_sk#18) + +- *(4) FileScan parquet default.item[i_item_sk#18,i_item_id#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt new file mode 100644 index 000000000..5eead43da --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [agg4,i_item_id,s_state,agg3,g_state,agg1,agg2] + WholeStageCodegen + HashAggregate [i_item_id,sum,count,count,avg(UnscaledValue(ss_list_price)),sum,count,avg(cast(ss_quantity as bigint)),sum,s_state,count,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),sum,spark_grouping_id] [agg4,sum,count,count,avg(UnscaledValue(ss_list_price)),sum,count,avg(cast(ss_quantity as bigint)),sum,agg3,count,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),sum,g_state,agg1,agg2] + InputAdapter + Exchange [i_item_id,s_state,spark_grouping_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum,count,sum,count,count,sum,sum,count,sum,ss_coupon_amt,count,ss_sales_price,sum,ss_list_price,count,s_state,count,count,ss_quantity,sum,spark_grouping_id] [sum,sum,count,sum,count,count,sum,sum,count,sum,count,sum,count,count,count,sum] + Expand [i_item_id,ss_coupon_amt,ss_sales_price,ss_list_price,ss_quantity,s_state] + Project [i_item_id,ss_coupon_amt,s_state,ss_sales_price,ss_list_price,ss_quantity] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,s_state,ss_sales_price] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] + Filter [ss_cdemo_sk,ss_sold_date_sk,ss_store_sk,ss_item_sk] + Scan parquet default.store_sales [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_state] + Filter [s_state,s_store_sk] + Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt new file mode 100644 index 000000000..2679a8bf5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt @@ -0,0 +1,66 @@ +== Physical Plan == +CollectLimit 100 ++- BroadcastNestedLoopJoin BuildRight, Inner + :- BroadcastNestedLoopJoin BuildRight, Inner + : :- BroadcastNestedLoopJoin BuildRight, Inner + : : :- BroadcastNestedLoopJoin BuildRight, Inner + : : : :- BroadcastNestedLoopJoin BuildRight, Inner + : : : : :- *(3) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_list_price#1)), count(ss_list_price#1), count(distinct ss_list_price#1)]) + : : : : : +- Exchange SinglePartition + : : : : : +- *(2) HashAggregate(keys=[], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1), partial_count(distinct ss_list_price#1)]) + : : : : : +- *(2) HashAggregate(keys=[ss_list_price#1], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1)]) + : : : : : +- Exchange hashpartitioning(ss_list_price#1, 200) + : : : : : +- *(1) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) + : : : : : +- *(1) Project [ss_list_price#1] + : : : : : +- *(1) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 0)) && (ss_quantity#2 <= 5)) && ((((ss_list_price#1 >= 8.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 18.00)) || ((ss_coupon_amt#3 >= 459.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1459.00))) || ((ss_wholesale_cost#4 >= 57.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 77.00)))) + : : : : : +- *(1) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)], ReadSchema: struct= 6)) && (ss_quantity#2 <= 10)) && ((((ss_list_price#1 >= 90.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 100.00)) || ((ss_coupon_amt#3 >= 2323.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 3323.00))) || ((ss_wholesale_cost#4 >= 31.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 51.00)))) + : : : : +- *(4) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)], ReadSchema: struct= 11)) && (ss_quantity#2 <= 15)) && ((((ss_list_price#1 >= 142.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 152.00)) || ((ss_coupon_amt#3 >= 12214.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 13214.00))) || ((ss_wholesale_cost#4 >= 79.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 99.00)))) + : : : +- *(7) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)], ReadSchema: struct= 16)) && (ss_quantity#2 <= 20)) && ((((ss_list_price#1 >= 135.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 145.00)) || ((ss_coupon_amt#3 >= 6071.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 7071.00))) || ((ss_wholesale_cost#4 >= 38.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 58.00)))) + : : +- *(10) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct= 21)) && (ss_quantity#2 <= 25)) && ((((ss_list_price#1 >= 122.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 132.00)) || ((ss_coupon_amt#3 >= 836.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1836.00))) || ((ss_wholesale_cost#4 >= 17.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 37.00)))) + : +- *(13) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)], ReadSchema: struct= 26)) && (ss_quantity#2 <= 30)) && ((((ss_list_price#1 >= 154.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 164.00)) || ((ss_coupon_amt#3 >= 7326.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 8326.00))) || ((ss_wholesale_cost#4 >= 7.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 27.00)))) + +- *(16) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#20] + : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 9)) && (d_year#29 = 1999)) && isnotnull(d_date_sk#20)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [d_date_sk#18] + : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 9)) && (d_moy#31 <= 12)) && (d_year#30 = 1999)) && isnotnull(d_date_sk#18)) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), Equ..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [d_date_sk#16] + : : +- *(5) Filter (d_year#32 IN (1999,2000,2001) && isnotnull(d_date_sk#16)) + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] + : +- *(6) Filter isnotnull(s_store_sk#14) + : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] + +- *(7) Filter isnotnull(i_item_sk#12) + +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt new file mode 100644 index 000000000..64fa8afa9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [store_returns_quantity,i_item_id,i_item_desc,store_sales_quantity,s_store_id,catalog_sales_quantity,s_store_name] + WholeStageCodegen + HashAggregate [sum(cast(cs_quantity as bigint)),i_item_id,i_item_desc,sum,sum(cast(ss_quantity as bigint)),sum,sum(cast(sr_return_quantity as bigint)),s_store_id,sum,s_store_name] [sum(cast(cs_quantity as bigint)),store_returns_quantity,sum,sum(cast(ss_quantity as bigint)),store_sales_quantity,sum,sum(cast(sr_return_quantity as bigint)),sum,catalog_sales_quantity] + InputAdapter + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen + HashAggregate [i_item_id,sum,i_item_desc,sum,sum,sum,s_store_id,sr_return_quantity,sum,sum,ss_quantity,cs_quantity,s_store_name] [sum,sum,sum,sum,sum,sum] + Project [s_store_id,s_store_name,ss_quantity,cs_quantity,sr_return_quantity,i_item_desc,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_id,s_store_name,ss_quantity,ss_item_sk,cs_quantity,sr_return_quantity] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,sr_return_quantity,sr_customer_sk,sr_item_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [s_store_sk,s_store_id,s_store_name] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id,s_store_name] [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_sk,i_item_id,i_item_desc] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] [i_item_sk,i_item_id,i_item_desc] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt new file mode 100644 index 000000000..8d08a8b03 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt @@ -0,0 +1,20 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,sum_agg#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,sum_agg#2]) ++- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) + +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 200) + +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) + +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] + +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight + :- *(3) Project [d_year#1, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight + : :- *(3) Project [d_date_sk#10, d_year#1] + : : +- *(3) Filter ((isnotnull(d_moy#12) && (d_moy#12 = 11)) && isnotnull(d_date_sk#10)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] + +- *(2) Filter ((isnotnull(i_manufact_id#13) && (i_manufact_id#13 = 128)) && isnotnull(i_item_sk#9)) + +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manufact_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt new file mode 100644 index 000000000..a8432c52f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [d_year,sum_agg,brand_id,brand] + WholeStageCodegen + HashAggregate [d_year,i_brand,sum(UnscaledValue(ss_ext_sales_price)),i_brand_id,sum] [brand_id,sum(UnscaledValue(ss_ext_sales_price)),sum_agg,sum,brand] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen + HashAggregate [sum,d_year,i_brand,ss_ext_sales_price,i_brand_id,sum] [sum,sum] + Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manufact_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id] [i_item_sk,i_brand_id,i_brand,i_manufact_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt new file mode 100644 index 000000000..758cb7159 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt @@ -0,0 +1,52 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salutation#2 ASC NULLS FIRST,c_first_name#3 ASC NULLS FIRST,c_last_name#4 ASC NULLS FIRST,c_preferred_cust_flag#5 ASC NULLS FIRST,c_birth_day#6 ASC NULLS FIRST,c_birth_month#7 ASC NULLS FIRST,c_birth_year#8 ASC NULLS FIRST,c_birth_country#9 ASC NULLS FIRST,c_login#10 ASC NULLS FIRST,c_email_address#11 ASC NULLS FIRST,c_last_review_date#12 ASC NULLS FIRST,ctr_total_return#13 ASC NULLS FIRST], output=[c_customer_id#1,c_salutation#2,c_first_name#3,c_last_name#4,c_preferred_cust_flag#5,c_birth_day#6,c_birth_month#7,c_birth_year#8,c_birth_country#9,c_login#10,c_email_address#11,c_last_review_date#12,ctr_total_return#13]) ++- *(11) Project [c_customer_id#1, c_salutation#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_day#6, c_birth_month#7, c_birth_year#8, c_birth_country#9, c_login#10, c_email_address#11, c_last_review_date#12, ctr_total_return#13] + +- *(11) BroadcastHashJoin [c_current_addr_sk#14], [ca_address_sk#15], Inner, BuildRight + :- *(11) Project [ctr_total_return#13, c_customer_id#1, c_current_addr_sk#14, c_salutation#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_day#6, c_birth_month#7, c_birth_year#8, c_birth_country#9, c_login#10, c_email_address#11, c_last_review_date#12] + : +- *(11) BroadcastHashJoin [ctr_customer_sk#16], [cast(c_customer_sk#17 as bigint)], Inner, BuildRight + : :- *(11) Project [ctr_customer_sk#16, ctr_total_return#13] + : : +- *(11) BroadcastHashJoin [ctr_state#18], [ctr_state#18#19], Inner, BuildRight, (cast(ctr_total_return#13 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) + : : :- *(11) Filter isnotnull(ctr_total_return#13) + : : : +- *(11) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) + : : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 200) + : : : +- *(3) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) + : : : +- *(3) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] + : : : +- *(3) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight + : : : :- *(3) Project [wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : : +- *(3) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight + : : : : :- *(3) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : : : +- *(3) Filter ((isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) && isnotnull(wr_returning_customer_sk#21)) + : : : : : +- *(3) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#15, ca_state#22] + : : : +- *(2) Filter (isnotnull(ca_address_sk#15) && isnotnull(ca_state#22)) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) + : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) + : : +- *(8) HashAggregate(keys=[ctr_state#18], functions=[avg(ctr_total_return#13)]) + : : +- Exchange hashpartitioning(ctr_state#18, 200) + : : +- *(7) HashAggregate(keys=[ctr_state#18], functions=[partial_avg(ctr_total_return#13)]) + : : +- *(7) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) + : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 200) + : : +- *(6) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) + : : +- *(6) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] + : : +- *(6) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight + : : :- *(6) Project [wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : +- *(6) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight + : : : :- *(6) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : : +- *(6) Filter (isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) + : : : : +- *(6) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt new file mode 100644 index 000000000..1be056d32 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt @@ -0,0 +1,70 @@ +TakeOrderedAndProject [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth_month,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_birth_year,c_last_review_date] + WholeStageCodegen + Project [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth_month,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_birth_year,c_last_review_date] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_preferred_cust_flag,c_current_addr_sk,ctr_total_return,c_email_address,c_customer_id,c_birth_year,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [ctr_customer_sk,ctr_total_return] + BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + Filter [ctr_total_return] + HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [ctr_total_return,ctr_state,sum(UnscaledValue(wr_return_amt)),sum,ctr_customer_sk] + InputAdapter + Exchange [wr_returning_customer_sk,ca_state] #1 + WholeStageCodegen + HashAggregate [wr_return_amt,sum,wr_returning_customer_sk,sum,ca_state] [sum,sum] + Project [wr_returning_customer_sk,wr_return_amt,ca_state] + BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] + Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + Filter [wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + Scan parquet default.web_returns [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_state] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [ctr_state,sum,count,avg(ctr_total_return)] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),sum,ctr_state,avg(ctr_total_return),count] + InputAdapter + Exchange [ctr_state] #5 + WholeStageCodegen + HashAggregate [ctr_state,count,sum,ctr_total_return,count,sum] [sum,count,sum,count] + HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [sum(UnscaledValue(wr_return_amt)),ctr_state,ctr_total_return,sum] + InputAdapter + Exchange [wr_returning_customer_sk,ca_state] #6 + WholeStageCodegen + HashAggregate [sum,wr_return_amt,sum,wr_returning_customer_sk,ca_state] [sum,sum] + Project [wr_returning_customer_sk,wr_return_amt,ca_state] + BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] + Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + Filter [wr_returned_date_sk,wr_returning_addr_sk] + Scan parquet default.web_returns [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #2 + InputAdapter + ReusedExchange [ca_address_sk,ca_state] [ca_address_sk,ca_state] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt new file mode 100644 index 000000000..22739d4c7 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt @@ -0,0 +1,100 @@ +== Physical Plan == +*(25) Sort [ca_county#1 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(ca_county#1 ASC NULLS FIRST, 200) + +- *(24) Project [ca_county#1, d_year#2, CheckOverflow((promote_precision(web_sales#3) / promote_precision(web_sales#4)), DecimalType(37,20)) AS web_q1_q2_increase#5, CheckOverflow((promote_precision(store_sales#6) / promote_precision(store_sales#7)), DecimalType(37,20)) AS store_q1_q2_increase#8, CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) AS web_q2_q3_increase#10, CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) AS store_q2_q3_increase#12] + +- *(24) BroadcastHashJoin [ca_county#13], [ca_county#14], Inner, BuildRight, (CASE WHEN (web_sales#3 > 0.00) THEN CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#6 > 0.00) THEN CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) ELSE null END) + :- *(24) Project [ca_county#1, d_year#2, store_sales#7, store_sales#6, store_sales#11, ca_county#13, web_sales#4, web_sales#3] + : +- *(24) BroadcastHashJoin [ca_county#13], [ca_county#15], Inner, BuildRight, (CASE WHEN (web_sales#4 > 0.00) THEN CheckOverflow((promote_precision(web_sales#3) / promote_precision(web_sales#4)), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#7 > 0.00) THEN CheckOverflow((promote_precision(store_sales#6) / promote_precision(store_sales#7)), DecimalType(37,20)) ELSE null END) + : :- *(24) BroadcastHashJoin [ca_county#1], [ca_county#13], Inner, BuildRight + : : :- *(24) Project [ca_county#1, d_year#2, store_sales#7, store_sales#6, store_sales#11] + : : : +- *(24) BroadcastHashJoin [ca_county#16], [ca_county#17], Inner, BuildRight + : : : :- *(24) BroadcastHashJoin [ca_county#1], [ca_county#16], Inner, BuildRight + : : : : :- *(24) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : : +- Exchange hashpartitioning(ca_county#1, d_qoy#18, d_year#2, 200) + : : : : : +- *(3) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : : +- *(3) Project [ss_ext_sales_price#19, d_year#2, d_qoy#18, ca_county#1] + : : : : : +- *(3) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#21], Inner, BuildRight + : : : : : :- *(3) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#2, d_qoy#18] + : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight + : : : : : : :- *(3) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] + : : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) + : : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(1) Project [d_date_sk#23, d_year#2, d_qoy#18] + : : : : : : +- *(1) Filter ((((isnotnull(d_qoy#18) && isnotnull(d_year#2)) && (d_qoy#18 = 1)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#23)) + : : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#23,d_year#2,d_qoy#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [ca_address_sk#21, ca_county#1] + : : : : : +- *(2) Filter (isnotnull(ca_address_sk#21) && isnotnull(ca_county#1)) + : : : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#21,ca_county#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : : +- *(7) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : +- Exchange hashpartitioning(ca_county#16, d_qoy#24, d_year#25, 200) + : : : : +- *(6) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : +- *(6) Project [ss_ext_sales_price#19, d_year#25, d_qoy#24, ca_county#16] + : : : : +- *(6) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#26], Inner, BuildRight + : : : : :- *(6) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#25, d_qoy#24] + : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#27], Inner, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] + : : : : : : +- *(6) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(4) Project [d_date_sk#27, d_year#25, d_qoy#24] + : : : : : +- *(4) Filter ((((isnotnull(d_qoy#24) && isnotnull(d_year#25)) && (d_qoy#24 = 2)) && (d_year#25 = 2000)) && isnotnull(d_date_sk#27)) + : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#27,d_year#25,d_qoy#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- ReusedExchange [ca_address_sk#26, ca_county#16], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : +- *(11) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : +- Exchange hashpartitioning(ca_county#17, d_qoy#28, d_year#29, 200) + : : : +- *(10) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : +- *(10) Project [ss_ext_sales_price#19, d_year#29, d_qoy#28, ca_county#17] + : : : +- *(10) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#30], Inner, BuildRight + : : : :- *(10) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#29, d_qoy#28] + : : : : +- *(10) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#31], Inner, BuildRight + : : : : :- *(10) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] + : : : : : +- *(10) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) + : : : : : +- *(10) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(8) Project [d_date_sk#31, d_year#29, d_qoy#28] + : : : : +- *(8) Filter ((((isnotnull(d_qoy#28) && isnotnull(d_year#29)) && (d_qoy#28 = 3)) && (d_year#29 = 2000)) && isnotnull(d_date_sk#31)) + : : : : +- *(8) FileScan parquet default.date_dim[d_date_sk#31,d_year#29,d_qoy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [ca_address_sk#30, ca_county#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- *(15) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) + : : +- Exchange hashpartitioning(ca_county#13, d_qoy#32, d_year#33, 200) + : : +- *(14) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) + : : +- *(14) Project [ws_ext_sales_price#34, d_year#33, d_qoy#32, ca_county#13] + : : +- *(14) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#36], Inner, BuildRight + : : :- *(14) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#33, d_qoy#32] + : : : +- *(14) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#38], Inner, BuildRight + : : : :- *(14) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] + : : : : +- *(14) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) + : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#38, d_year#33, d_qoy#32], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#36, ca_county#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(19) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) + : +- Exchange hashpartitioning(ca_county#15, d_qoy#39, d_year#40, 200) + : +- *(18) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) + : +- *(18) Project [ws_ext_sales_price#34, d_year#40, d_qoy#39, ca_county#15] + : +- *(18) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#41], Inner, BuildRight + : :- *(18) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#40, d_qoy#39] + : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#42], Inner, BuildRight + : : :- *(18) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] + : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) + : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#42, d_year#40, d_qoy#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#41, ca_county#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(23) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) + +- Exchange hashpartitioning(ca_county#14, d_qoy#43, d_year#44, 200) + +- *(22) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) + +- *(22) Project [ws_ext_sales_price#34, d_year#44, d_qoy#43, ca_county#14] + +- *(22) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#45], Inner, BuildRight + :- *(22) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#44, d_qoy#43] + : +- *(22) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#46], Inner, BuildRight + : :- *(22) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] + : : +- *(22) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) + : : +- *(22) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#46, d_year#44, d_qoy#43], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [ca_address_sk#45, ca_county#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt new file mode 100644 index 000000000..78c6383a4 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt @@ -0,0 +1,140 @@ +WholeStageCodegen + Sort [ca_county] + InputAdapter + Exchange [ca_county] #1 + WholeStageCodegen + Project [web_sales,store_sales,store_sales,web_sales,d_year,store_sales,web_sales,ca_county] + BroadcastHashJoin [web_sales,ca_county,store_sales,ca_county,store_sales,web_sales] + Project [d_year,store_sales,store_sales,ca_county,web_sales,ca_county,store_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,store_sales,web_sales,store_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county] + Project [d_year,store_sales,store_sales,ca_county,store_sales] + BroadcastHashJoin [ca_county,ca_county] + BroadcastHashJoin [ca_county,ca_county] + HashAggregate [d_year,d_qoy,ca_county,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #2 + WholeStageCodegen + HashAggregate [d_year,d_qoy,sum,ca_county,sum,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_year,d_qoy] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ca_address_sk,ca_county] + Filter [ca_address_sk,ca_county] + Scan parquet default.customer_address [ca_address_sk,ca_county] [ca_address_sk,ca_county] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [d_qoy,d_year,sum(UnscaledValue(ss_ext_sales_price)),sum,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #6 + WholeStageCodegen + HashAggregate [sum,d_qoy,d_year,sum,ss_ext_sales_price,ca_county] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk,d_year,d_qoy] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price)),d_year,d_qoy,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #9 + WholeStageCodegen + HashAggregate [sum,d_year,d_qoy,sum,ss_ext_sales_price,ca_county] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [d_date_sk,d_year,d_qoy] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + InputAdapter + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + HashAggregate [sum,ca_county,d_qoy,d_year,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #12 + WholeStageCodegen + HashAggregate [sum,ws_ext_sales_price,ca_county,d_qoy,d_year,sum] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #3 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + HashAggregate [d_qoy,ca_county,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #14 + WholeStageCodegen + HashAggregate [d_qoy,ca_county,d_year,ws_ext_sales_price,sum,sum] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #7 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen + HashAggregate [sum,d_qoy,ca_county,d_year,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + InputAdapter + Exchange [ca_county,d_qoy,d_year] #16 + WholeStageCodegen + HashAggregate [sum,sum,d_qoy,ca_county,ws_ext_sales_price,d_year] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #10 + InputAdapter + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt new file mode 100644 index 000000000..f542e4cef --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt @@ -0,0 +1,33 @@ +== Physical Plan == +CollectLimit 100 ++- *(6) Project [1 AS excess discount amount #1] + +- *(6) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + :- *(6) Project [cs_sold_date_sk#2] + : +- *(6) BroadcastHashJoin [i_item_sk#4], [cs_item_sk#5#6], Inner, BuildRight, (cast(cs_ext_discount_amt#7 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) + : :- *(6) Project [cs_sold_date_sk#2, cs_ext_discount_amt#7, i_item_sk#4] + : : +- *(6) BroadcastHashJoin [cs_item_sk#5], [i_item_sk#4], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] + : : : +- *(6) Filter ((isnotnull(cs_item_sk#5) && isnotnull(cs_ext_discount_amt#7)) && isnotnull(cs_sold_date_sk#2)) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#4] + : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 977)) && isnotnull(i_item_sk#4)) + : : +- *(1) FileScan parquet default.item[i_item_sk#4,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) + : +- *(4) HashAggregate(keys=[cs_item_sk#5], functions=[avg(UnscaledValue(cs_ext_discount_amt#7))]) + : +- Exchange hashpartitioning(cs_item_sk#5, 200) + : +- *(3) HashAggregate(keys=[cs_item_sk#5], functions=[partial_avg(UnscaledValue(cs_ext_discount_amt#7))]) + : +- *(3) Project [cs_item_sk#5, cs_ext_discount_amt#7] + : +- *(3) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + : :- *(3) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] + : : +- *(3) Filter (isnotnull(cs_sold_date_sk#2) && isnotnull(cs_item_sk#5)) + : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#3] + : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27])) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#3)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#3,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [d_date_sk#3] + +- *(5) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#3)) + +- *(5) FileScan parquet default.date_dim[d_date_sk#3,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt new file mode 100644 index 000000000..39e8073fa --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt @@ -0,0 +1,43 @@ +CollectLimit + WholeStageCodegen + Project + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] + Project [cs_sold_date_sk,cs_ext_discount_amt,i_item_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + Filter [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen + Project [i_item_sk] + Filter [i_manufact_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] + HashAggregate [cs_item_sk,sum,count,avg(UnscaledValue(cs_ext_discount_amt))] [avg(UnscaledValue(cs_ext_discount_amt)),(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),sum,count,cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen + HashAggregate [cs_ext_discount_amt,count,sum,cs_item_sk,sum,count] [sum,count,sum,count] + Project [cs_item_sk,cs_ext_discount_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + Filter [cs_sold_date_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt new file mode 100644 index 000000000..234413bf8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt @@ -0,0 +1,65 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_manufact_id#2,total_sales#1]) ++- *(20) HashAggregate(keys=[i_manufact_id#2], functions=[sum(total_sales#3)]) + +- Exchange hashpartitioning(i_manufact_id#2, 200) + +- *(19) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(total_sales#3)]) + +- Union + :- *(6) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange hashpartitioning(i_manufact_id#2, 200) + : +- *(5) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(5) Project [ss_ext_sales_price#4, i_manufact_id#2] + : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#10] + : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 5)) && isnotnull(d_date_sk#10)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [ca_address_sk#8] + : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) BroadcastHashJoin [i_manufact_id#2], [i_manufact_id#2#14], LeftSemi, BuildRight + : :- *(4) Project [i_item_sk#6, i_manufact_id#2] + : : +- *(4) Filter isnotnull(i_item_sk#6) + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_manufact_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_manufact_id#2 AS i_manufact_id#2#14] + : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Electronics)) + : +- *(3) FileScan parquet default.item[i_category#15,i_manufact_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)], ReadSchema: struct + :- *(12) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- Exchange hashpartitioning(i_manufact_id#2, 200) + : +- *(11) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- *(11) Project [cs_ext_sales_price#16, i_manufact_id#2] + : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight + : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] + : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight + : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(18) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) + +- Exchange hashpartitioning(i_manufact_id#2, 200) + +- *(17) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) + +- *(17) Project [ws_ext_sales_price#20, i_manufact_id#2] + +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight + :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] + : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight + : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight + : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt new file mode 100644 index 000000000..f3e63e8a8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt @@ -0,0 +1,91 @@ +TakeOrderedAndProject [total_sales,i_manufact_id] + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(total_sales)] [sum(total_sales),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen + HashAggregate [i_manufact_id,total_sales,sum,sum] [sum,sum] + InputAdapter + Union + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #2 + WholeStageCodegen + HashAggregate [i_manufact_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ss_ext_sales_price,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + BroadcastHashJoin [i_manufact_id,i_manufact_id] + Project [i_item_sk,i_manufact_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_manufact_id] + Filter [i_category] + Scan parquet default.item [i_category,i_manufact_id] [i_category,i_manufact_id] + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #7 + WholeStageCodegen + HashAggregate [i_manufact_id,cs_ext_sales_price,sum,sum] [sum,sum] + Project [cs_ext_sales_price,i_manufact_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] #5 + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_manufact_id] #8 + WholeStageCodegen + HashAggregate [i_manufact_id,ws_ext_sales_price,sum,sum] [sum,sum] + Project [ws_ext_sales_price,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt new file mode 100644 index 000000000..e71e76c27 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt @@ -0,0 +1,34 @@ +== Physical Plan == +*(7) Sort [c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST], true, 0 ++- Exchange rangepartitioning(c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST, 200) + +- *(6) Project [c_last_name#1, c_first_name#2, c_salutation#3, c_preferred_cust_flag#4, ss_ticket_number#5, cnt#6] + +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight + :- *(6) Filter ((cnt#6 >= 15) && (cnt#6 <= 20)) + : +- *(6) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[count(1)]) + : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#7, 200) + : +- *(4) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[partial_count(1)]) + : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#5] + : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight + : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_ticket_number#5] + : : +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight + : : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#5] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#5] + : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#14] + : : : +- *(1) Filter (((((d_dom#15 >= 1) && (d_dom#15 <= 3)) || ((d_dom#15 >= 25) && (d_dom#15 <= 28))) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),Le..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#12] + : : +- *(2) Filter ((isnotnull(s_county#17) && (s_county#17 = Williamson County)) && isnotnull(s_store_sk#12)) + : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [hd_demo_sk#10] + : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.2)) && isnotnull(hd_demo_sk#10)) + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [c_customer_sk#8, c_salutation#3, c_first_name#2, c_last_name#1, c_preferred_cust_flag#4] + +- *(5) Filter isnotnull(c_customer_sk#8) + +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#3,c_first_name#2,c_last_name#1,c_preferred_cust_flag#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [ss_customer_sk#29] + : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight + : : : : :- *(2) Project [ss_sold_date_sk#30, ss_customer_sk#29] + : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#30) + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_customer_sk#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#31] + : : : : +- *(1) Filter ((((isnotnull(d_year#32) && isnotnull(d_qoy#33)) && (d_year#32 = 2002)) && (d_qoy#33 < 4)) && isnotnull(d_date_sk#31)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#31,d_year#32,d_qoy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_bill_customer_sk#28] + : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#34], [d_date_sk#31], Inner, BuildRight + : : : :- *(4) Project [ws_sold_date_sk#34, ws_bill_customer_sk#28] + : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#34) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#34,ws_bill_customer_sk#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [cs_ship_customer_sk#27] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#35], [d_date_sk#31], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#35, cs_ship_customer_sk#27] + : : : +- *(6) Filter isnotnull(cs_sold_date_sk#35) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#35,cs_ship_customer_sk#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#23, ca_state#1] + : +- *(7) Filter isnotnull(ca_address_sk#23) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#23,ca_state#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [cd_demo_sk#21, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6] + +- *(8) Filter isnotnull(cd_demo_sk#21) + +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_gender#2,cd_marital_status#3,cd_dep_count#19,cd_dep_employed_count#5,cd_dep_college_count#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [i_item_sk#19, i_class#15, i_category#14] + : +- *(2) Filter isnotnull(i_item_sk#19) + : +- *(2) FileScan parquet default.item[i_item_sk#19,i_class#15,i_category#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#17] + +- *(3) Filter ((isnotnull(s_state#23) && (s_state#23 = TN)) && isnotnull(s_store_sk#17)) + +- *(3) FileScan parquet default.store[s_store_sk#17,s_state#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt new file mode 100644 index 000000000..ac9f32c0a --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [i_class,i_category,lochierarchy,gross_margin,rank_within_parent] + WholeStageCodegen + Project [i_class,i_category,lochierarchy,gross_margin,rank_within_parent] + InputAdapter + Window [_w3,_w1,_w2] + WholeStageCodegen + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen + HashAggregate [sum,i_class,i_category,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),spark_grouping_id] [_w2,sum,_w1,sum,lochierarchy,sum(UnscaledValue(ss_net_profit)),_w3,sum(UnscaledValue(ss_ext_sales_price)),gross_margin] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen + HashAggregate [sum,sum,sum,i_class,i_category,sum,ss_ext_sales_price,spark_grouping_id,ss_net_profit] [sum,sum,sum,sum] + Expand [ss_ext_sales_price,ss_net_profit,i_category,i_class] + Project [ss_ext_sales_price,ss_net_profit,i_category,i_class] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_class,ss_store_sk,ss_ext_sales_price,i_category,ss_net_profit] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_class,i_category] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_class,i_category] [i_item_sk,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk] + Filter [s_state,s_store_sk] + Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt new file mode 100644 index 000000000..d9db47342 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt @@ -0,0 +1,26 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,i_current_price#3]) ++- *(5) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, i_current_price#3, 200) + +- *(4) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) + +- *(4) Project [i_item_id#1, i_item_desc#2, i_current_price#3] + +- *(4) BroadcastHashJoin [i_item_sk#4], [cs_item_sk#5], Inner, BuildRight + :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] + : +- *(4) BroadcastHashJoin [inv_date_sk#6], [d_date_sk#7], Inner, BuildRight + : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3, inv_date_sk#6] + : : +- *(4) BroadcastHashJoin [i_item_sk#4], [inv_item_sk#8], Inner, BuildRight + : : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] + : : : +- *(4) Filter ((((isnotnull(i_current_price#3) && (i_current_price#3 >= 68.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 98.00)) && i_manufact_id#9 IN (677,940,694,808)) && isnotnull(i_item_sk#4)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), In(i_manufact_id, [677,94..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) + : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#7] + : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 10988)) && (d_date#11 <= 11048)) && isnotnull(d_date_sk#7)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), Is..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [cs_item_sk#5] + +- *(3) Filter isnotnull(cs_item_sk#5) + +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt new file mode 100644 index 000000000..afe830f9d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,i_current_price] + InputAdapter + Exchange [i_item_id,i_item_desc,i_current_price] #1 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,i_current_price] + Project [i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [i_item_sk,cs_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [i_current_price,i_item_sk,inv_date_sk,i_item_desc,i_item_id] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + Filter [i_current_price,i_manufact_id,i_item_sk] + Scan parquet default.item [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [inv_date_sk,inv_item_sk] + Filter [inv_quantity_on_hand,inv_item_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [cs_item_sk] + Filter [cs_item_sk] + Scan parquet default.catalog_sales [cs_item_sk] [cs_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt new file mode 100644 index 000000000..ac423d3c3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt @@ -0,0 +1,55 @@ +== Physical Plan == +CollectLimit 100 ++- *(13) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(12) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#4, ), coalesce(c_first_name#5, ), coalesce(d_date#6, 0)], LeftSemi, BuildRight, (((c_last_name#1 <=> c_last_name#4) && (c_first_name#2 <=> c_first_name#5)) && (d_date#3 <=> d_date#6)) + :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftSemi, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) + : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 200) + : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] + : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight + : : :- *(3) Project [ss_customer_sk#10, d_date#3] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] + : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#13, d_date#3] + : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] + : : +- *(2) Filter isnotnull(c_customer_sk#11) + : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 200) + : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] + : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight + : :- *(6) Project [cs_bill_customer_sk#15, d_date#9] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] + : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 200) + +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] + +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + :- *(10) Project [ws_bill_customer_sk#19, d_date#6] + : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] + : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) + : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt new file mode 100644 index 000000000..caec9c677 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt @@ -0,0 +1,75 @@ +CollectLimit + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,d_date,c_first_name] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_first_name,d_date,d_date,c_last_name,c_last_name,c_first_name] + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_customer_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] [ss_sold_date_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] [cs_sold_date_sk,cs_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_bill_customer_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt new file mode 100644 index 000000000..5d89d65da --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt @@ -0,0 +1,51 @@ +== Physical Plan == +*(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 200) + +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight + :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] + : +- *(10) Filter (CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) + : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 200) + : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] + : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight + : :- *(4) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#1], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#2] + : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight + : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#2] + : : : +- *(1) Filter isnotnull(i_item_sk#2) + : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(2) Filter isnotnull(w_warehouse_sk#1) + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [d_date_sk#15, d_moy#3] + : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) + : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] + +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) + +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 200) + +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] + +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight + :- *(8) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20] + : +- *(8) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#10], Inner, BuildRight + : :- *(8) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#9] + : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight + : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [d_date_sk#21, d_moy#6] + +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) + +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt new file mode 100644 index 000000000..0fadf4ea4 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt @@ -0,0 +1,69 @@ +WholeStageCodegen + Sort [d_moy,mean,i_item_sk,w_warehouse_sk,d_moy,mean,cov,cov] + InputAdapter + Exchange [d_moy,mean,i_item_sk,w_warehouse_sk,d_moy,mean,cov,cov] #1 + WholeStageCodegen + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [d_moy,mean,w_warehouse_sk,i_item_sk,stdev] + Filter [mean,stdev] + HashAggregate [m2,w_warehouse_sk,d_moy,sum,count,n,w_warehouse_name,avg,stddev_samp(cast(inv_quantity_on_hand as double)),i_item_sk,avg(cast(inv_quantity_on_hand as bigint))] [stdev,mean,m2,sum,count,n,avg,stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint))] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen + HashAggregate [m2,w_warehouse_sk,d_moy,m2,sum,count,n,w_warehouse_name,sum,avg,i_item_sk,inv_quantity_on_hand,avg,count,n] [m2,m2,sum,count,n,sum,avg,avg,count,n] + Project [d_moy,inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name,inv_date_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk] [i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_sk,d_moy,mean,stdev,w_warehouse_sk] + Filter [mean,stdev] + HashAggregate [m2,i_item_sk,sum,count,w_warehouse_name,n,d_moy,avg,stddev_samp(cast(inv_quantity_on_hand as double)),w_warehouse_sk,avg(cast(inv_quantity_on_hand as bigint))] [m2,sum,count,n,mean,avg,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,avg(cast(inv_quantity_on_hand as bigint))] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen + HashAggregate [m2,m2,i_item_sk,sum,count,w_warehouse_name,n,d_moy,sum,avg,w_warehouse_sk,inv_quantity_on_hand,avg,count,n] [m2,m2,sum,count,n,sum,avg,avg,count,n] + Project [w_warehouse_sk,inv_quantity_on_hand,w_warehouse_name,i_item_sk,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [w_warehouse_sk,inv_quantity_on_hand,w_warehouse_name,i_item_sk,inv_date_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + ReusedExchange [i_item_sk] [i_item_sk] #3 + InputAdapter + ReusedExchange [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt new file mode 100644 index 000000000..bd660d199 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt @@ -0,0 +1,51 @@ +== Physical Plan == +*(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 200) + +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight + :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] + : +- *(10) Filter ((CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) && (CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END > 1.5)) + : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 200) + : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] + : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight + : :- *(4) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#1], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#2] + : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight + : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#2] + : : : +- *(1) Filter isnotnull(i_item_sk#2) + : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(2) Filter isnotnull(w_warehouse_sk#1) + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [d_date_sk#15, d_moy#3] + : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) + : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] + +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) + +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 200) + +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] + +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight + :- *(8) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20] + : +- *(8) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#10], Inner, BuildRight + : :- *(8) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#9] + : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight + : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [d_date_sk#21, d_moy#6] + +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) + +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt new file mode 100644 index 000000000..846b7cca1 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt @@ -0,0 +1,69 @@ +WholeStageCodegen + Sort [i_item_sk,mean,w_warehouse_sk,d_moy,mean,cov,d_moy,cov] + InputAdapter + Exchange [i_item_sk,mean,w_warehouse_sk,d_moy,mean,cov,d_moy,cov] #1 + WholeStageCodegen + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [d_moy,w_warehouse_sk,i_item_sk,stdev,mean] + Filter [mean,stdev] + HashAggregate [avg,w_warehouse_sk,d_moy,w_warehouse_name,m2,avg(cast(inv_quantity_on_hand as bigint)),i_item_sk,n,count,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg,m2,stdev,avg(cast(inv_quantity_on_hand as bigint)),n,count,mean,sum,stddev_samp(cast(inv_quantity_on_hand as double))] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen + HashAggregate [avg,n,w_warehouse_sk,d_moy,count,w_warehouse_name,m2,sum,i_item_sk,inv_quantity_on_hand,m2,n,avg,count,sum] [avg,n,count,m2,sum,m2,n,avg,count,sum] + Project [d_moy,inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name,inv_date_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk] [i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [mean,stdev,d_moy,w_warehouse_sk,i_item_sk] + Filter [mean,stdev] + HashAggregate [avg,m2,avg(cast(inv_quantity_on_hand as bigint)),d_moy,w_warehouse_sk,n,i_item_sk,count,w_warehouse_name,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg,stdev,m2,avg(cast(inv_quantity_on_hand as bigint)),n,count,sum,mean,stddev_samp(cast(inv_quantity_on_hand as double))] + InputAdapter + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen + HashAggregate [avg,n,count,m2,sum,d_moy,w_warehouse_sk,inv_quantity_on_hand,m2,n,i_item_sk,avg,count,w_warehouse_name,sum] [avg,n,count,m2,sum,m2,n,avg,count,sum] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,d_moy,w_warehouse_name] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + ReusedExchange [i_item_sk] [i_item_sk] #3 + InputAdapter + ReusedExchange [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] #4 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt new file mode 100644 index 000000000..7284720b7 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt @@ -0,0 +1,124 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer_first_name#2 ASC NULLS FIRST,customer_last_name#3 ASC NULLS FIRST,customer_preferred_cust_flag#4 ASC NULLS FIRST,customer_birth_country#5 ASC NULLS FIRST,customer_login#6 ASC NULLS FIRST,customer_email_address#7 ASC NULLS FIRST], output=[customer_id#1,customer_first_name#2,customer_last_name#3,customer_preferred_cust_flag#4,customer_birth_country#5,customer_login#6,customer_email_address#7]) ++- *(25) Project [customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7] + +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#9], Inner, BuildRight, (CASE WHEN (year_total#10 > 0.000000) THEN CheckOverflow((promote_precision(year_total#11) / promote_precision(year_total#10)), DecimalType(38,14)) ELSE null END > CASE WHEN (year_total#12 > 0.000000) THEN CheckOverflow((promote_precision(year_total#13) / promote_precision(year_total#12)), DecimalType(38,14)) ELSE null END) + :- *(25) Project [customer_id#8, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#10, year_total#11, year_total#12] + : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#14], Inner, BuildRight + : :- *(25) Project [customer_id#8, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#10, year_total#11] + : : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#15], Inner, BuildRight, (CASE WHEN (year_total#10 > 0.000000) THEN CheckOverflow((promote_precision(year_total#11) / promote_precision(year_total#10)), DecimalType(38,14)) ELSE null END > CASE WHEN (year_total#16 > 0.000000) THEN CheckOverflow((promote_precision(year_total#17) / promote_precision(year_total#16)), DecimalType(38,14)) ELSE null END) + : : :- *(25) Project [customer_id#8, year_total#16, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#17, year_total#10] + : : : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#18], Inner, BuildRight + : : : :- *(25) BroadcastHashJoin [customer_id#8], [customer_id#1], Inner, BuildRight + : : : : :- Union + : : : : : :- *(4) Filter (isnotnull(year_total#16) && (year_total#16 > 0.000000)) + : : : : : : +- *(4) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : : : : +- *(3) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : : +- *(3) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] + : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight + : : : : : : :- *(3) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_sold_date_sk#31, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27] + : : : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight + : : : : : : : :- *(3) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : : :- LocalTableScan , [customer_id#35, year_total#36] + : : : : : +- LocalTableScan , [customer_id#37, year_total#38] + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : : +- Union + : : : : :- *(8) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : : : +- *(7) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : +- *(7) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] + : : : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight + : : : : : :- *(7) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_sold_date_sk#31, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27] + : : : : : : +- *(7) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight + : : : : : : :- *(7) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : : : +- *(7) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : :- LocalTableScan , [customer_id#35, customer_first_name#39, customer_last_name#40, customer_preferred_cust_flag#41, customer_birth_country#42, customer_login#43, customer_email_address#44, year_total#36] + : : : : +- LocalTableScan , [customer_id#37, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#38] + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : +- Union + : : : :- LocalTableScan , [customer_id#18, year_total#10] + : : : :- *(12) Filter (isnotnull(year_total#36) && (year_total#36 > 0.000000)) + : : : : +- *(12) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : : +- *(11) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : +- *(11) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] + : : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight + : : : : :- *(11) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_sold_date_sk#55, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51] + : : : : : +- *(11) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight + : : : : : :- *(11) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : : +- *(11) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : : +- *(11) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- Union + : : :- LocalTableScan , [customer_id#15, year_total#11] + : : :- *(16) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : +- *(15) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : +- *(15) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] + : : : +- *(15) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight + : : : :- *(15) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_sold_date_sk#55, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51] + : : : : +- *(15) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight + : : : : :- *(15) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : +- *(15) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : +- *(15) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- Union + : :- LocalTableScan , [customer_id#14, year_total#12] + : :- LocalTableScan , [customer_id#35, year_total#36] + : +- *(20) Filter (isnotnull(year_total#38) && (year_total#38 > 0.000000)) + : +- *(20) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : +- *(19) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : +- *(19) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] + : +- *(19) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight + : :- *(19) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_sold_date_sk#61, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57] + : : +- *(19) BroadcastHashJoin [c_customer_sk#33], [ws_bill_customer_sk#62], Inner, BuildRight + : : :- *(19) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : +- *(19) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : +- *(19) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#9, year_total#13] + :- LocalTableScan , [customer_id#35, year_total#36] + +- *(24) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + +- *(23) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + +- *(23) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] + +- *(23) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight + :- *(23) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_sold_date_sk#61, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57] + : +- *(23) BroadcastHashJoin [c_customer_sk#33], [ws_bill_customer_sk#62], Inner, BuildRight + : :- *(23) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : +- *(23) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : +- *(23) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) + +- Exchange hashpartitioning(w_state#1, i_item_id#2, 200) + +- *(5) HashAggregate(keys=[w_state#1, i_item_id#2], functions=[partial_sum(CASE WHEN (d_date#5 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#5 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) + +- *(5) Project [cs_sales_price#6, cr_refunded_cash#7, w_state#1, i_item_id#2, d_date#5] + +- *(5) BroadcastHashJoin [cs_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + :- *(5) Project [cs_sold_date_sk#8, cs_sales_price#6, cr_refunded_cash#7, w_state#1, i_item_id#2] + : +- *(5) BroadcastHashJoin [cs_item_sk#10], [i_item_sk#11], Inner, BuildRight + : :- *(5) Project [cs_sold_date_sk#8, cs_item_sk#10, cs_sales_price#6, cr_refunded_cash#7, w_state#1] + : : +- *(5) BroadcastHashJoin [cs_warehouse_sk#12], [w_warehouse_sk#13], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#8, cs_warehouse_sk#12, cs_item_sk#10, cs_sales_price#6, cr_refunded_cash#7] + : : : +- *(5) BroadcastHashJoin [cs_order_number#14, cs_item_sk#10], [cr_order_number#15, cr_item_sk#16], LeftOuter, BuildRight + : : : :- *(5) Project [cs_sold_date_sk#8, cs_warehouse_sk#12, cs_item_sk#10, cs_order_number#14, cs_sales_price#6] + : : : : +- *(5) Filter ((isnotnull(cs_warehouse_sk#12) && isnotnull(cs_item_sk#10)) && isnotnull(cs_sold_date_sk#8)) + : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#8,cs_warehouse_sk#12,cs_item_sk#10,cs_order_number#14,cs_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [w_warehouse_sk#13, w_state#1] + : : +- *(2) Filter isnotnull(w_warehouse_sk#13) + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#13,w_state#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_item_sk#11, i_item_id#2] + : +- *(3) Filter (((isnotnull(i_current_price#17) && (i_current_price#17 >= 0.99)) && (i_current_price#17 <= 1.49)) && isnotnull(i_item_sk#11)) + : +- *(3) FileScan parquet default.item[i_item_sk#11,i_item_id#2,i_current_price#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#9, d_date#5] + +- *(4) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#9)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#9,d_date#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt new file mode 100644 index 000000000..2e480b9ea --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] + WholeStageCodegen + HashAggregate [i_item_id,sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),w_state,sum] [sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sales_before,sales_after,sum] + InputAdapter + Exchange [w_state,i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum,d_date,w_state,sum,cs_sales_price,cr_refunded_cash,sum] [sum,sum,sum,sum] + Project [d_date,cs_sales_price,w_state,cr_refunded_cash,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sales_price,w_state,cs_sold_date_sk,cr_refunded_cash,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sales_price,w_state,cs_sold_date_sk,cs_item_sk,cr_refunded_cash] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Project [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cr_refunded_cash] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + Filter [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_refunded_cash] + Filter [cr_order_number,cr_item_sk] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash] [cr_item_sk,cr_order_number,cr_refunded_cash] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [w_warehouse_sk,w_state] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_state] [w_warehouse_sk,w_state] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_current_price,i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id,i_current_price] [i_item_sk,i_item_id,i_current_price] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt new file mode 100644 index 000000000..886b4e209 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt @@ -0,0 +1,19 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_product_name#1 ASC NULLS FIRST], output=[i_product_name#1]) ++- *(4) HashAggregate(keys=[i_product_name#1], functions=[]) + +- Exchange hashpartitioning(i_product_name#1, 200) + +- *(3) HashAggregate(keys=[i_product_name#1], functions=[]) + +- *(3) Project [i_product_name#1] + +- *(3) BroadcastHashJoin [i_manufact#2], [i_manufact#2#3], Inner, BuildRight + :- *(3) Project [i_manufact#2, i_product_name#1] + : +- *(3) Filter (((isnotnull(i_manufact_id#4) && (i_manufact_id#4 >= 738)) && (i_manufact_id#4 <= 778)) && isnotnull(i_manufact#2)) + : +- *(3) FileScan parquet default.item[i_manufact_id#4,i_manufact#2,i_product_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,7..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(2) Project [i_manufact#2#3] + +- *(2) Filter (if (isnull(alwaysTrue#5)) 0 else item_cnt#6 > 0) + +- *(2) HashAggregate(keys=[i_manufact#2], functions=[count(1)]) + +- Exchange hashpartitioning(i_manufact#2, 200) + +- *(1) HashAggregate(keys=[i_manufact#2], functions=[partial_count(1)]) + +- *(1) Project [i_manufact#2] + +- *(1) Filter (((((i_category#7 = Women) && (((((i_color#8 = powder) || (i_color#8 = khaki)) && ((i_units#9 = Ounce) || (i_units#9 = Oz))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = brown) || (i_color#8 = honeydew)) && ((i_units#9 = Bunch) || (i_units#9 = Ton))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = floral) || (i_color#8 = deep)) && ((i_units#9 = N/A) || (i_units#9 = Dozen))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = light) || (i_color#8 = cornflower)) && ((i_units#9 = Box) || (i_units#9 = Pound))) && ((i_size#10 = medium) || (i_size#10 = extra large)))))) || (((i_category#7 = Women) && (((((i_color#8 = midnight) || (i_color#8 = snow)) && ((i_units#9 = Pallet) || (i_units#9 = Gross))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = cyan) || (i_color#8 = papaya)) && ((i_units#9 = Cup) || (i_units#9 = Dram))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = orange) || (i_color#8 = frosted)) && ((i_units#9 = Each) || (i_units#9 = Tbl))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = forest) || (i_color#8 = ghost)) && ((i_units#9 = Lb) || (i_units#9 = Bundle))) && ((i_size#10 = medium) || (i_size#10 = extra large))))))) && isnotnull(i_manufact#2)) + +- *(1) FileScan parquet default.item[i_category#7,i_manufact#2,i_size#10,i_color#8,i_units#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,powder),EqualTo(i_color,khaki)..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt new file mode 100644 index 000000000..caac67bf3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt @@ -0,0 +1,25 @@ +TakeOrderedAndProject [i_product_name] + WholeStageCodegen + HashAggregate [i_product_name] + InputAdapter + Exchange [i_product_name] #1 + WholeStageCodegen + HashAggregate [i_product_name] + Project [i_product_name] + BroadcastHashJoin [i_manufact,i_manufact] + Project [i_manufact,i_product_name] + Filter [i_manufact_id,i_manufact] + Scan parquet default.item [i_manufact_id,i_manufact,i_product_name] [i_manufact_id,i_manufact,i_product_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_manufact] + Filter [alwaysTrue,item_cnt] + HashAggregate [i_manufact,count,count(1)] [i_manufact,alwaysTrue,item_cnt,count,count(1)] + InputAdapter + Exchange [i_manufact] #3 + WholeStageCodegen + HashAggregate [i_manufact,count,count] [count,count] + Project [i_manufact] + Filter [i_units,i_manufact,i_color,i_size,i_category] + Scan parquet default.item [i_manufact,i_units,i_size,i_category,i_color] [i_manufact,i_units,i_size,i_category,i_color] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt new file mode 100644 index 000000000..6fb079e4a --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt @@ -0,0 +1,20 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[sum(ss_ext_sales_price)#1 DESC NULLS LAST,d_year#2 ASC NULLS FIRST,i_category_id#3 ASC NULLS FIRST,i_category#4 ASC NULLS FIRST], output=[d_year#2,i_category_id#3,i_category#4,sum(ss_ext_sales_price)#1]) ++- *(4) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) + +- Exchange hashpartitioning(d_year#2, i_category_id#3, i_category#4, 200) + +- *(3) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) + +- *(3) Project [d_year#2, ss_ext_sales_price#5, i_category_id#3, i_category#4] + +- *(3) BroadcastHashJoin [ss_item_sk#6], [i_item_sk#7], Inner, BuildRight + :- *(3) Project [d_year#2, ss_item_sk#6, ss_ext_sales_price#5] + : +- *(3) BroadcastHashJoin [d_date_sk#8], [ss_sold_date_sk#9], Inner, BuildRight + : :- *(3) Project [d_date_sk#8, d_year#2] + : : +- *(3) Filter ((((isnotnull(d_moy#10) && isnotnull(d_year#2)) && (d_moy#10 = 11)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#8)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_year#2,d_moy#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#6, ss_ext_sales_price#5] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#9) && isnotnull(ss_item_sk#6)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#6,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#7, i_category_id#3, i_category#4] + +- *(2) Filter ((isnotnull(i_manager_id#11) && (i_manager_id#11 = 1)) && isnotnull(i_item_sk#7)) + +- *(2) FileScan parquet default.item[i_item_sk#7,i_category_id#3,i_category#4,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt new file mode 100644 index 000000000..abcbca371 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category] + WholeStageCodegen + HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),sum,d_year,i_category,i_category_id] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] + InputAdapter + Exchange [d_year,i_category_id,i_category] #1 + WholeStageCodegen + HashAggregate [sum,d_year,sum,i_category,i_category_id,ss_ext_sales_price] [sum,sum] + Project [d_year,ss_ext_sales_price,i_category_id,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_category_id,i_category] + Filter [i_manager_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_category_id,i_category,i_manager_id] [i_item_sk,i_category_id,i_category,i_manager_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt new file mode 100644 index 000000000..d2b16d01d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt @@ -0,0 +1,20 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_store_id#2 ASC NULLS FIRST,sun_sales#3 ASC NULLS FIRST,mon_sales#4 ASC NULLS FIRST,tue_sales#5 ASC NULLS FIRST,wed_sales#6 ASC NULLS FIRST,thu_sales#7 ASC NULLS FIRST,fri_sales#8 ASC NULLS FIRST,sat_sales#9 ASC NULLS FIRST], output=[s_store_name#1,s_store_id#2,sun_sales#3,mon_sales#4,tue_sales#5,wed_sales#6,thu_sales#7,fri_sales#8,sat_sales#9]) ++- *(4) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) + +- Exchange hashpartitioning(s_store_name#1, s_store_id#2, 200) + +- *(3) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) + +- *(3) Project [d_day_name#10, ss_sales_price#11, s_store_id#2, s_store_name#1] + +- *(3) BroadcastHashJoin [ss_store_sk#12], [s_store_sk#13], Inner, BuildRight + :- *(3) Project [d_day_name#10, ss_store_sk#12, ss_sales_price#11] + : +- *(3) BroadcastHashJoin [d_date_sk#14], [ss_sold_date_sk#15], Inner, BuildRight + : :- *(3) Project [d_date_sk#14, d_day_name#10] + : : +- *(3) Filter ((isnotnull(d_year#16) && (d_year#16 = 2000)) && isnotnull(d_date_sk#14)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_day_name#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#15, ss_store_sk#12, ss_sales_price#11] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#15) && isnotnull(ss_store_sk#12)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#15,ss_store_sk#12,ss_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [s_store_sk#13, s_store_id#2, s_store_name#1] + +- *(2) Filter ((isnotnull(s_gmt_offset#17) && (s_gmt_offset#17 = -5.00)) && isnotnull(s_store_sk#13)) + +- *(2) FileScan parquet default.store[s_store_sk#13,s_store_id#2,s_store_name#1,s_gmt_offset#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt new file mode 100644 index 000000000..a5e60a60c --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [sat_sales,fri_sales,sun_sales,s_store_id,tue_sales,mon_sales,s_store_name,thu_sales,wed_sales] + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum,s_store_id,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),s_store_name,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END))] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sat_sales,fri_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sun_sales,sum,sum,sum,sum,tue_sales,mon_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),thu_sales,wed_sales] + InputAdapter + Exchange [s_store_name,s_store_id] #1 + WholeStageCodegen + HashAggregate [sum,sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,s_store_id,ss_sales_price,sum,sum,sum,sum,sum,s_store_name] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,ss_sales_price,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [d_day_name,ss_store_sk,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_day_name] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_day_name] [d_date_sk,d_year,d_day_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_store_sk,ss_sales_price] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk,s_store_id,s_store_name] + Filter [s_gmt_offset,s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset] [s_store_sk,s_store_id,s_store_name,s_gmt_offset] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt new file mode 100644 index 000000000..91c290e9f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt @@ -0,0 +1,50 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1,best_performing#2,worst_performing#3]) ++- *(10) Project [rnk#1, i_product_name#4 AS best_performing#2, i_product_name#5 AS worst_performing#3] + +- *(10) BroadcastHashJoin [item_sk#6], [i_item_sk#7], Inner, BuildRight + :- *(10) Project [rnk#1, item_sk#6, i_product_name#4] + : +- *(10) BroadcastHashJoin [item_sk#8], [i_item_sk#9], Inner, BuildRight + : :- *(10) Project [item_sk#8, rnk#1, item_sk#6] + : : +- *(10) BroadcastHashJoin [rnk#1], [rnk#10], Inner, BuildRight + : : :- *(10) Project [item_sk#8, rnk#1] + : : : +- *(10) Filter ((isnotnull(rnk#1) && (rnk#1 < 11)) && isnotnull(item_sk#8)) + : : : +- Window [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#1], [rank_col#11 ASC NULLS FIRST] + : : : +- *(3) Sort [rank_col#11 ASC NULLS FIRST], false, 0 + : : : +- Exchange SinglePartition + : : : +- *(2) Project [item_sk#8, rank_col#11] + : : : +- *(2) Filter (isnotnull(avg(ss_net_profit#12)#13) && (cast(avg(ss_net_profit#12)#13 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7089)), DecimalType(13,7)))) + : : : : +- Subquery subquery7089 + : : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : : : +- Exchange hashpartitioning(ss_store_sk#14, 200) + : : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) + : : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] + : : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct + : : : +- *(2) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : : +- Exchange hashpartitioning(ss_item_sk#16, 200) + : : : +- *(1) HashAggregate(keys=[ss_item_sk#16], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) + : : : +- *(1) Project [ss_item_sk#16, ss_net_profit#12] + : : : +- *(1) Filter (isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) + : : : +- *(1) FileScan parquet default.store_sales[ss_item_sk#16,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(7) Project [item_sk#6, rnk#10] + : : +- *(7) Filter ((isnotnull(rnk#10) && (rnk#10 < 11)) && isnotnull(item_sk#6)) + : : +- Window [rank(rank_col#17) windowspecdefinition(rank_col#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#10], [rank_col#17 DESC NULLS LAST] + : : +- *(6) Sort [rank_col#17 DESC NULLS LAST], false, 0 + : : +- Exchange SinglePartition + : : +- *(5) Project [item_sk#6, rank_col#17] + : : +- *(5) Filter (isnotnull(avg(ss_net_profit#12)#18) && (cast(avg(ss_net_profit#12)#18 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7094)), DecimalType(13,7)))) + : : : +- Subquery subquery7094 + : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : : +- Exchange hashpartitioning(ss_store_sk#14, 200) + : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) + : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] + : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) + : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct + : : +- *(5) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : +- ReusedExchange [ss_item_sk#16, sum#19, count#20], Exchange hashpartitioning(ss_item_sk#16, 200) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(8) Project [i_item_sk#9, i_product_name#4] + : +- *(8) Filter isnotnull(i_item_sk#9) + : +- *(8) FileScan parquet default.item[i_item_sk#9,i_product_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- ReusedExchange [i_item_sk#7, i_product_name#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt new file mode 100644 index 000000000..55aab5cc2 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt @@ -0,0 +1,72 @@ +TakeOrderedAndProject [rnk,best_performing,worst_performing] + WholeStageCodegen + Project [rnk,i_product_name,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [rnk,item_sk,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [item_sk,rnk,item_sk] + BroadcastHashJoin [rnk,rnk] + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WholeStageCodegen + Sort [rank_col] + InputAdapter + Exchange #1 + WholeStageCodegen + Project [item_sk,rank_col] + Filter [avg(ss_net_profit)] + Subquery #1 + WholeStageCodegen + HashAggregate [ss_store_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,count] + InputAdapter + Exchange [ss_store_sk] #3 + WholeStageCodegen + HashAggregate [count,ss_store_sk,sum,sum,count,ss_net_profit] [sum,count,sum,count] + Project [ss_store_sk,ss_net_profit] + Filter [ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] [ss_addr_sk,ss_store_sk,ss_net_profit] + HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,avg(ss_net_profit),item_sk,count] + InputAdapter + Exchange [ss_item_sk] #2 + WholeStageCodegen + HashAggregate [ss_item_sk,sum,sum,count,count,ss_net_profit] [sum,count,sum,count] + Project [ss_item_sk,ss_net_profit] + Filter [ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit] [ss_item_sk,ss_store_sk,ss_net_profit] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [item_sk,rnk] + Filter [rnk,item_sk] + InputAdapter + Window [rank_col] + WholeStageCodegen + Sort [rank_col] + InputAdapter + Exchange #5 + WholeStageCodegen + Project [item_sk,rank_col] + Filter [avg(ss_net_profit)] + Subquery #2 + WholeStageCodegen + HashAggregate [ss_store_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,count] + InputAdapter + Exchange [ss_store_sk] #6 + WholeStageCodegen + HashAggregate [count,ss_store_sk,sum,count,sum,ss_net_profit] [sum,count,sum,count] + Project [ss_store_sk,ss_net_profit] + Filter [ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] [ss_addr_sk,ss_store_sk,ss_net_profit] + HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(ss_net_profit),item_sk,count,avg(UnscaledValue(ss_net_profit)),rank_col,sum] + InputAdapter + ReusedExchange [ss_item_sk,sum,count] [ss_item_sk,sum,count] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [i_item_sk,i_product_name] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_product_name] [i_item_sk,i_product_name] + InputAdapter + ReusedExchange [i_item_sk,i_product_name] [i_item_sk,i_product_name] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt new file mode 100644 index 000000000..687085a12 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt @@ -0,0 +1,39 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST,ca_city#2 ASC NULLS FIRST], output=[ca_zip#1,ca_city#2,sum(ws_sales_price)#3]) ++- *(7) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[sum(UnscaledValue(ws_sales_price#4))]) + +- Exchange hashpartitioning(ca_zip#1, ca_city#2, 200) + +- *(6) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[partial_sum(UnscaledValue(ws_sales_price#4))]) + +- *(6) Project [ws_sales_price#4, ca_city#2, ca_zip#1] + +- *(6) Filter (substring(ca_zip#1, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) || exists#5) + +- *(6) BroadcastHashJoin [i_item_id#6], [i_item_id#6#7], ExistenceJoin(exists#5), BuildRight + :- *(6) Project [ws_sales_price#4, ca_city#2, ca_zip#1, i_item_id#6] + : +- *(6) BroadcastHashJoin [ws_item_sk#8], [i_item_sk#9], Inner, BuildRight + : :- *(6) Project [ws_item_sk#8, ws_sales_price#4, ca_city#2, ca_zip#1] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_sales_price#4, ca_city#2, ca_zip#1] + : : : +- *(6) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight + : : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_sales_price#4, c_current_addr_sk#12] + : : : : +- *(6) BroadcastHashJoin [ws_bill_customer_sk#14], [c_customer_sk#15], Inner, BuildRight + : : : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_bill_customer_sk#14, ws_sales_price#4] + : : : : : +- *(6) Filter ((isnotnull(ws_bill_customer_sk#14) && isnotnull(ws_sold_date_sk#10)) && isnotnull(ws_item_sk#8)) + : : : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#10,ws_item_sk#8,ws_bill_customer_sk#14,ws_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [c_customer_sk#15, c_current_addr_sk#12] + : : : : +- *(1) Filter (isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) + : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#15,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#13, ca_city#2, ca_zip#1] + : : : +- *(2) Filter isnotnull(ca_address_sk#13) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#13,ca_city#2,ca_zip#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#11] + : : +- *(3) Filter ((((isnotnull(d_qoy#16) && isnotnull(d_year#17)) && (d_qoy#16 = 2)) && (d_year#17 = 2001)) && isnotnull(d_date_sk#11)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#11,d_year#17,d_qoy#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [i_item_sk#9, i_item_id#6] + : +- *(4) Filter isnotnull(i_item_sk#9) + : +- *(4) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(5) Project [i_item_id#6 AS i_item_id#6#7] + +- *(5) Filter i_item_sk#9 IN (2,3,5,7,11,13,17,19,23,29) + +- *(5) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_item_sk, [2,3,5,7,11,13,17,19,23,29])], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt new file mode 100644 index 000000000..f6d66999d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt @@ -0,0 +1,51 @@ +TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] + WholeStageCodegen + HashAggregate [ca_zip,ca_city,sum,sum(UnscaledValue(ws_sales_price))] [sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price),sum] + InputAdapter + Exchange [ca_zip,ca_city] #1 + WholeStageCodegen + HashAggregate [ca_city,ca_zip,sum,ws_sales_price,sum] [sum,sum] + Project [ws_sales_price,ca_city,ca_zip] + Filter [ca_zip,exists] + BroadcastHashJoin [i_item_id,i_item_id] + Project [ws_sales_price,ca_city,ca_zip,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_sales_price,ca_city,ca_zip] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ca_zip,ca_city,ws_sold_date_sk,ws_sales_price,ws_item_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_sales_price,c_current_addr_sk] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_sales_price] + Filter [ws_bill_customer_sk,ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_sales_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ca_address_sk,ca_city,ca_zip] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip] [ca_address_sk,ca_city,ca_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt new file mode 100644 index 000000000..0ae77a813 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt @@ -0,0 +1,41 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,ca_city#3 ASC NULLS FIRST,bought_city#4 ASC NULLS FIRST,ss_ticket_number#5 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,ca_city#3,bought_city#4,ss_ticket_number#5,amt#6,profit#7]) ++- *(8) Project [c_last_name#1, c_first_name#2, ca_city#3, bought_city#4, ss_ticket_number#5, amt#6, profit#7] + +- *(8) BroadcastHashJoin [c_current_addr_sk#8], [ca_address_sk#9], Inner, BuildRight, NOT (ca_city#3 = bought_city#4) + :- *(8) Project [ss_ticket_number#5, bought_city#4, amt#6, profit#7, c_current_addr_sk#8, c_first_name#2, c_last_name#1] + : +- *(8) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight + : :- *(8) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#13)), sum(UnscaledValue(ss_net_profit#14))]) + : : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3, 200) + : : +- *(5) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#13)), partial_sum(UnscaledValue(ss_net_profit#14))]) + : : +- *(5) Project [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14, ca_city#3] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#12], [ca_address_sk#9], Inner, BuildRight + : : :- *(5) Project [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : +- *(5) BroadcastHashJoin [ss_hdemo_sk#15], [hd_demo_sk#16], Inner, BuildRight + : : : :- *(5) Project [ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : : +- *(5) BroadcastHashJoin [ss_store_sk#17], [s_store_sk#18], Inner, BuildRight + : : : : :- *(5) Project [ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_store_sk#17, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : : : :- *(5) Project [ss_sold_date_sk#19, ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_store_sk#17, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#19) && isnotnull(ss_store_sk#17)) && isnotnull(ss_hdemo_sk#15)) && isnotnull(ss_addr_sk#12)) && isnotnull(ss_customer_sk#10)) + : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#10,ss_hdemo_sk#15,ss_addr_sk#12,ss_store_sk#17,ss_ticket_number#5,ss_coupon_amt#13,ss_net_profit#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#18] + : : : : +- *(2) Filter (s_city#23 IN (Fairview,Midway) && isnotnull(s_store_sk#18)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#18,s_city#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [hd_demo_sk#16] + : : : +- *(3) Filter (((hd_dep_count#24 = 4) || (hd_vehicle_count#25 = 3)) && isnotnull(hd_demo_sk#16)) + : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#16,hd_dep_count#24,hd_vehicle_count#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#9, ca_city#3] + : : +- *(4) Filter (isnotnull(ca_address_sk#9) && isnotnull(ca_city#3)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#9,ca_city#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [c_customer_sk#11, c_current_addr_sk#8, c_first_name#2, c_last_name#1] + : +- *(6) Filter (isnotnull(c_customer_sk#11) && isnotnull(c_current_addr_sk#8)) + : +- *(6) FileScan parquet default.customer[c_customer_sk#11,c_current_addr_sk#8,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + +- ReusedExchange [ca_address_sk#9, ca_city#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt new file mode 100644 index 000000000..3010603c0 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt @@ -0,0 +1,54 @@ +TakeOrderedAndProject [bought_city,ss_ticket_number,profit,c_last_name,c_first_name,amt,ca_city] + WholeStageCodegen + Project [bought_city,ss_ticket_number,profit,c_last_name,c_first_name,amt,ca_city] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [c_current_addr_sk,profit,amt,c_last_name,c_first_name,ss_ticket_number,bought_city] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_coupon_amt)),sum,ca_city,ss_customer_sk,sum,ss_ticket_number,ss_addr_sk] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_coupon_amt)),profit,sum,amt,sum,bought_city] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen + HashAggregate [sum,ca_city,ss_customer_sk,ss_coupon_amt,sum,sum,sum,ss_ticket_number,ss_addr_sk,ss_net_profit] [sum,sum,sum,sum] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ca_city,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Filter [ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_dow,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_dow] [d_date_sk,d_year,d_dow] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk] + Filter [s_city,s_store_sk] + Scan parquet default.store [s_store_sk,s_city] [s_store_sk,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [ca_address_sk,ca_city] + Filter [ca_address_sk,ca_city] + Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt new file mode 100644 index 000000000..914ebe9f5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt @@ -0,0 +1,51 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,s_store_name#3,s_company_name#6,d_year#7,d_moy#8,avg_monthly_sales#2,sum_sales#1,psum#9,nsum#10]) ++- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] + +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight + :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] + : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight + : :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13] + : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) + : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] + : : +- *(7) Filter (isnotnull(d_year#7) && (d_year#7 = 1999)) + : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] + : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 200) + : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 200) + : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) + : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] + : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight + : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight + : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight + : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] + : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] + : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) + : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1))) + : +- *(14) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] + : +- *(14) Filter isnotnull(rn#23) + : +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#23], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] + : +- *(13) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 200) + : +- *(12) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : +- ReusedExchange [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33, sum#34], Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 200) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1))) + +- *(21) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] + +- *(21) Filter isnotnull(rn#18) + +- Window [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] + +- *(20) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +- ReusedExchange [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35, d_moy#36, sum_sales#12], Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 200) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt new file mode 100644 index 000000000..656b50091 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [d_year,d_moy,s_store_name,i_category,psum,i_brand,sum_sales,s_company_name,avg_monthly_sales,nsum] + WholeStageCodegen + Project [s_store_name,d_year,avg_monthly_sales,d_moy,sum_sales,sum_sales,i_category,i_brand,s_company_name,sum_sales] + BroadcastHashJoin [s_store_name,s_company_name,i_brand,i_category,i_category,rn,i_brand,s_store_name,rn,s_company_name] + Project [d_year,s_company_name,rn,d_moy,sum_sales,i_brand,sum_sales,i_category,s_store_name,avg_monthly_sales] + BroadcastHashJoin [s_store_name,i_category,i_category,s_company_name,rn,i_brand,rn,s_company_name,s_store_name,i_brand] + Project [d_year,s_company_name,rn,d_moy,i_brand,sum_sales,i_category,s_store_name,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales,rn] + InputAdapter + Window [d_year,s_company_name,_w0,i_brand,i_category,s_store_name] + WholeStageCodegen + Filter [d_year] + InputAdapter + Window [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] + WholeStageCodegen + Sort [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen + HashAggregate [d_year,s_company_name,d_moy,sum,sum(UnscaledValue(ss_sales_price)),i_brand,i_category,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] #2 + WholeStageCodegen + HashAggregate [d_year,s_company_name,d_moy,sum,i_brand,ss_sales_price,i_category,sum,s_store_name] [sum,sum] + Project [s_store_name,s_company_name,d_moy,d_year,i_category,ss_sales_price,i_brand] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [d_moy,d_year,ss_store_sk,i_category,ss_sales_price,i_brand] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_store_sk,i_category,ss_sales_price,ss_sold_date_sk,i_brand] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_brand,i_category] + Filter [i_item_sk,i_brand,i_category] + Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_year,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_company_name] + Filter [s_store_sk,s_store_name,s_company_name] + Scan parquet default.store [s_store_sk,s_store_name,s_company_name] [s_store_sk,s_store_name,s_company_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_category,rn,s_company_name,s_store_name,sum_sales,i_brand] + Filter [rn] + InputAdapter + Window [i_category,s_company_name,s_store_name,d_moy,d_year,i_brand] + WholeStageCodegen + Sort [i_category,s_company_name,s_store_name,d_moy,d_year,i_brand] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #7 + WholeStageCodegen + HashAggregate [i_category,s_company_name,s_store_name,sum(UnscaledValue(ss_sales_price)),d_moy,d_year,sum,i_brand] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [i_category,s_company_name,s_store_name,d_moy,d_year,sum,i_brand] [i_category,s_company_name,s_store_name,d_moy,d_year,sum,i_brand] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [rn,i_category,s_store_name,i_brand,s_company_name,sum_sales] + Filter [rn] + InputAdapter + Window [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year] + WholeStageCodegen + Sort [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year] + InputAdapter + ReusedExchange [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year,sum_sales] [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year,sum_sales] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt new file mode 100644 index 000000000..735a739a8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt @@ -0,0 +1,31 @@ +== Physical Plan == +*(6) HashAggregate(keys=[], functions=[sum(cast(ss_quantity#1 as bigint))]) ++- Exchange SinglePartition + +- *(5) HashAggregate(keys=[], functions=[partial_sum(cast(ss_quantity#1 as bigint))]) + +- *(5) Project [ss_quantity#1] + +- *(5) BroadcastHashJoin [ss_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + :- *(5) Project [ss_sold_date_sk#2, ss_quantity#1] + : +- *(5) BroadcastHashJoin [ss_addr_sk#4], [ca_address_sk#5], Inner, BuildRight, ((((ca_state#6 IN (CO,OH,TX) && (ss_net_profit#7 >= 0.00)) && (ss_net_profit#7 <= 2000.00)) || ((ca_state#6 IN (OR,MN,KY) && (ss_net_profit#7 >= 150.00)) && (ss_net_profit#7 <= 3000.00))) || ((ca_state#6 IN (VA,CA,MS) && (ss_net_profit#7 >= 50.00)) && (ss_net_profit#7 <= 25000.00))) + : :- *(5) Project [ss_sold_date_sk#2, ss_addr_sk#4, ss_quantity#1, ss_net_profit#7] + : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#8], [cd_demo_sk#9], Inner, BuildRight, ((((((cd_marital_status#10 = M) && (cd_education_status#11 = 4 yr Degree)) && (ss_sales_price#12 >= 100.00)) && (ss_sales_price#12 <= 150.00)) || ((((cd_marital_status#10 = D) && (cd_education_status#11 = 2 yr Degree)) && (ss_sales_price#12 >= 50.00)) && (ss_sales_price#12 <= 100.00))) || ((((cd_marital_status#10 = S) && (cd_education_status#11 = College)) && (ss_sales_price#12 >= 150.00)) && (ss_sales_price#12 <= 200.00))) + : : :- *(5) Project [ss_sold_date_sk#2, ss_cdemo_sk#8, ss_addr_sk#4, ss_quantity#1, ss_sales_price#12, ss_net_profit#7] + : : : +- *(5) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#2, ss_cdemo_sk#8, ss_addr_sk#4, ss_store_sk#13, ss_quantity#1, ss_sales_price#12, ss_net_profit#7] + : : : : +- *(5) Filter (((isnotnull(ss_store_sk#13) && isnotnull(ss_cdemo_sk#8)) && isnotnull(ss_addr_sk#4)) && isnotnull(ss_sold_date_sk#2)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#2,ss_cdemo_sk#8,ss_addr_sk#4,ss_store_sk#13,ss_quantity#1,ss_sales_price#12,ss_net_profit#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + : : +- *(2) Filter isnotnull(cd_demo_sk#9) + : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#9,cd_marital_status#10,cd_education_status#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [ca_address_sk#5, ca_state#6] + : +- *(3) Filter ((isnotnull(ca_country#15) && (ca_country#15 = United States)) && isnotnull(ca_address_sk#5)) + : +- *(3) FileScan parquet default.customer_address[ca_address_sk#5,ca_state#6,ca_country#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#3] + +- *(4) Filter ((isnotnull(d_year#16) && (d_year#16 = 2001)) && isnotnull(d_date_sk#3)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#3,d_year#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt new file mode 100644 index 000000000..78fb16b88 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt @@ -0,0 +1,41 @@ +WholeStageCodegen + HashAggregate [sum,sum(cast(ss_quantity as bigint))] [sum(cast(ss_quantity as bigint)),sum(ss_quantity),sum] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [ss_quantity,sum,sum] [sum,sum] + Project [ss_quantity] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity] + BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] + Project [ss_sold_date_sk,ss_addr_sk,ss_quantity,ss_net_profit] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,ss_sales_price,cd_education_status,cd_marital_status] + Project [ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] + Filter [ss_store_sk,ss_cdemo_sk,ss_addr_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt new file mode 100644 index 000000000..bf898b8ba --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt @@ -0,0 +1,75 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank#2 ASC NULLS FIRST,currency_rank#3 ASC NULLS FIRST], output=[channel#1,item#4,return_ratio#5,return_rank#2,currency_rank#3]) ++- *(23) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) + +- Exchange hashpartitioning(channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3, 200) + +- *(22) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) + +- Union + :- *(7) Project [web AS channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3] + : +- *(7) Filter ((return_rank#2 <= 10) || (currency_rank#3 <= 10)) + : +- Window [rank(currency_ratio#6) windowspecdefinition(currency_ratio#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#3], [currency_ratio#6 ASC NULLS FIRST] + : +- *(6) Sort [currency_ratio#6 ASC NULLS FIRST], false, 0 + : +- Window [rank(return_ratio#5) windowspecdefinition(return_ratio#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#2], [return_ratio#5 ASC NULLS FIRST] + : +- *(5) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(ws_item_sk#7, 200) + : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight + : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] + : : : +- *(3) Filter ((((((((isnotnull(ws_net_profit#17) && isnotnull(ws_quantity#9)) && isnotnull(ws_net_paid#11)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_quantity), IsNotNull(ws_net_paid), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_item_sk#16)) && isnotnull(wr_order_number#15)) + : : +- *(1) FileScan parquet default.web_returns[wr_item_sk#16,wr_order_number#15,wr_return_quantity#8,wr_return_amt#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_item_sk), IsNotNull(..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#13] + : +- *(2) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#19)) && (d_year#18 = 2001)) && (d_moy#19 = 12)) && isnotnull(d_date_sk#13)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct + :- *(14) Project [catalog AS channel#20, item#21, return_ratio#22, return_rank#23, currency_rank#24] + : +- *(14) Filter ((return_rank#23 <= 10) || (currency_rank#24 <= 10)) + : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] + : +- *(13) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 + : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] + : +- *(12) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(cs_item_sk#26, 200) + : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight + : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight + : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] + : : : +- *(10) Filter ((((((((isnotnull(cs_net_profit#35) && isnotnull(cs_net_paid#30)) && isnotnull(cs_quantity#28)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) + : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_item_sk#34)) && isnotnull(cr_order_number#33)) + : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_item_sk), IsNo..., ReadSchema: struct + : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(21) Project [store AS channel#36, item#37, return_ratio#38, return_rank#39, currency_rank#40] + +- *(21) Filter ((return_rank#39 <= 10) || (currency_rank#40 <= 10)) + +- Window [rank(currency_ratio#41) windowspecdefinition(currency_ratio#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#40], [currency_ratio#41 ASC NULLS FIRST] + +- *(20) Sort [currency_ratio#41 ASC NULLS FIRST], false, 0 + +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] + +- *(19) Sort [return_ratio#38 ASC NULLS FIRST], false, 0 + +- Exchange SinglePartition + +- *(18) HashAggregate(keys=[ss_item_sk#42], functions=[sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), sum(cast(coalesce(ss_quantity#44, 0) as bigint)), sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- Exchange hashpartitioning(ss_item_sk#42, 200) + +- *(17) HashAggregate(keys=[ss_item_sk#42], functions=[partial_sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- *(17) Project [ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + +- *(17) BroadcastHashJoin [ss_sold_date_sk#47], [d_date_sk#13], Inner, BuildRight + :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight + : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] + : : +- *(17) Filter ((((((((isnotnull(ss_quantity#44) && isnotnull(ss_net_paid#46)) && isnotnull(ss_net_profit#51)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) + : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_ticket_number#49)) && isnotnull(sr_item_sk#50)) + : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNo..., ReadSchema: struct + +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt new file mode 100644 index 000000000..e0a243a63 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt @@ -0,0 +1,115 @@ +TakeOrderedAndProject [channel,currency_rank,return_rank,item,return_ratio] + WholeStageCodegen + HashAggregate [channel,currency_rank,return_rank,item,return_ratio] + InputAdapter + Exchange [channel,currency_rank,return_rank,item,return_ratio] #1 + WholeStageCodegen + HashAggregate [channel,currency_rank,return_rank,item,return_ratio] + InputAdapter + Union + WholeStageCodegen + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen + HashAggregate [sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),ws_item_sk,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum,sum(cast(coalesce(ws_quantity, 0) as bigint))] [currency_ratio,return_ratio,sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum,sum(cast(coalesce(ws_quantity, 0) as bigint))] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen + HashAggregate [sum,wr_return_amt,sum,ws_item_sk,sum,sum,sum,ws_net_paid,ws_quantity,wr_return_quantity,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,ws_net_paid,ws_quantity,wr_return_quantity,ws_item_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [wr_return_amt,ws_net_paid,ws_quantity,ws_sold_date_sk,wr_return_quantity,ws_item_sk] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + Project [ws_net_paid,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] + Filter [ws_net_profit,ws_order_number,ws_item_sk,ws_net_paid,ws_sold_date_sk,ws_quantity] + Scan parquet default.web_sales [ws_net_paid,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_item_sk] [ws_net_paid,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Filter [wr_return_amt,wr_item_sk,wr_order_number] + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + WholeStageCodegen + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum,cs_item_sk,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cs_quantity, 0) as bigint))] [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum,item,return_ratio,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cs_quantity, 0) as bigint)),currency_ratio] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen + HashAggregate [sum,cr_return_amount,cr_return_quantity,sum,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,cs_item_sk,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_net_paid,cs_quantity,cr_return_quantity,cr_return_amount,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_net_paid,cs_quantity,cr_return_quantity,cs_sold_date_sk,cr_return_amount,cs_item_sk] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk] + Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_item_sk,cs_sold_date_sk,cs_order_number] + Scan parquet default.catalog_sales [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit] [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_return_amount,cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 + WholeStageCodegen + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen + HashAggregate [sum,ss_item_sk,sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum,sum] [sum,sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(sr_return_quantity, 0) as bigint)),return_ratio,sum,sum,currency_ratio] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen + HashAggregate [ss_net_paid,sum,ss_item_sk,sum,sum,sr_return_amt,sum,sum,ss_quantity,sr_return_quantity,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_quantity,ss_item_sk,ss_net_paid,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,ss_net_paid,sr_return_quantity,ss_sold_date_sk,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_quantity,ss_item_sk,ss_net_paid,ss_sold_date_sk,ss_ticket_number] + Filter [ss_net_paid,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_net_profit,ss_ticket_number] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_net_paid,ss_net_profit,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_net_paid,ss_net_profit,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_return_amt,sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt new file mode 100644 index 000000000..983bb86ee --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt @@ -0,0 +1,76 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) ++- *(21) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 200) + +- *(20) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) + +- *(20) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] + +- Union + :- *(6) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(sales_price#13)), sum(UnscaledValue(return_amt#14)), sum(UnscaledValue(profit#15)), sum(UnscaledValue(net_loss#16))]) + : +- Exchange hashpartitioning(s_store_id#12, 200) + : +- *(5) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(sales_price#13)), partial_sum(UnscaledValue(return_amt#14)), partial_sum(UnscaledValue(profit#15)), partial_sum(UnscaledValue(net_loss#16))]) + : +- *(5) Project [sales_price#13, profit#15, return_amt#14, net_loss#16, s_store_id#12] + : +- *(5) BroadcastHashJoin [store_sk#17], [cast(s_store_sk#18 as bigint)], Inner, BuildRight + : :- *(5) Project [store_sk#17, sales_price#13, profit#15, return_amt#14, net_loss#16] + : : +- *(5) BroadcastHashJoin [date_sk#19], [cast(d_date_sk#20 as bigint)], Inner, BuildRight + : : :- Union + : : : :- *(1) Project [cast(ss_store_sk#21 as bigint) AS store_sk#17, cast(ss_sold_date_sk#22 as bigint) AS date_sk#19, ss_ext_sales_price#23 AS sales_price#13, ss_net_profit#24 AS profit#15, 0.00 AS return_amt#14, 0.00 AS net_loss#16] + : : : : +- *(1) Filter (isnotnull(cast(ss_sold_date_sk#22 as bigint)) && isnotnull(cast(ss_store_sk#21 as bigint))) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_store_sk#21,ss_ext_sales_price#23,ss_net_profit#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [s_store_sk#18, s_store_id#12] + : +- *(4) Filter isnotnull(s_store_sk#18) + : +- *(4) FileScan parquet default.store[s_store_sk#18,s_store_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + :- *(12) HashAggregate(keys=[cp_catalog_page_id#36], functions=[sum(UnscaledValue(sales_price#37)), sum(UnscaledValue(return_amt#38)), sum(UnscaledValue(profit#39)), sum(UnscaledValue(net_loss#40))]) + : +- Exchange hashpartitioning(cp_catalog_page_id#36, 200) + : +- *(11) HashAggregate(keys=[cp_catalog_page_id#36], functions=[partial_sum(UnscaledValue(sales_price#37)), partial_sum(UnscaledValue(return_amt#38)), partial_sum(UnscaledValue(profit#39)), partial_sum(UnscaledValue(net_loss#40))]) + : +- *(11) Project [sales_price#37, profit#39, return_amt#38, net_loss#40, cp_catalog_page_id#36] + : +- *(11) BroadcastHashJoin [page_sk#41], [cp_catalog_page_sk#42], Inner, BuildRight + : :- *(11) Project [page_sk#41, sales_price#37, profit#39, return_amt#38, net_loss#40] + : : +- *(11) BroadcastHashJoin [date_sk#43], [d_date_sk#20], Inner, BuildRight + : : :- Union + : : : :- *(7) Project [cs_catalog_page_sk#44 AS page_sk#41, cs_sold_date_sk#45 AS date_sk#43, cs_ext_sales_price#46 AS sales_price#37, cs_net_profit#47 AS profit#39, 0.00 AS return_amt#38, 0.00 AS net_loss#40] + : : : : +- *(7) Filter (isnotnull(cs_sold_date_sk#45) && isnotnull(cs_catalog_page_sk#44)) + : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#45,cs_catalog_page_sk#44,cs_ext_sales_price#46,cs_net_profit#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) + : : +- *(9) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(10) Project [cp_catalog_page_sk#42, cp_catalog_page_id#36] + : +- *(10) Filter isnotnull(cp_catalog_page_sk#42) + : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#42,cp_catalog_page_id#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct + +- *(19) HashAggregate(keys=[web_site_id#58], functions=[sum(UnscaledValue(sales_price#59)), sum(UnscaledValue(return_amt#60)), sum(UnscaledValue(profit#61)), sum(UnscaledValue(net_loss#62))]) + +- Exchange hashpartitioning(web_site_id#58, 200) + +- *(18) HashAggregate(keys=[web_site_id#58], functions=[partial_sum(UnscaledValue(sales_price#59)), partial_sum(UnscaledValue(return_amt#60)), partial_sum(UnscaledValue(profit#61)), partial_sum(UnscaledValue(net_loss#62))]) + +- *(18) Project [sales_price#59, profit#61, return_amt#60, net_loss#62, web_site_id#58] + +- *(18) BroadcastHashJoin [wsr_web_site_sk#63], [web_site_sk#64], Inner, BuildRight + :- *(18) Project [wsr_web_site_sk#63, sales_price#59, profit#61, return_amt#60, net_loss#62] + : +- *(18) BroadcastHashJoin [date_sk#65], [cast(d_date_sk#20 as bigint)], Inner, BuildRight + : :- Union + : : :- *(13) Project [ws_web_site_sk#66 AS wsr_web_site_sk#63, cast(ws_sold_date_sk#67 as bigint) AS date_sk#65, ws_ext_sales_price#68 AS sales_price#59, ws_net_profit#69 AS profit#61, 0.00 AS return_amt#60, 0.00 AS net_loss#62] + : : : +- *(13) Filter (isnotnull(cast(ws_sold_date_sk#67 as bigint)) && isnotnull(ws_web_site_sk#66)) + : : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#67,ws_web_site_sk#66,ws_ext_sales_price#68,ws_net_profit#69] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_web_site_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(17) Project [web_site_sk#64, web_site_id#58] + +- *(17) Filter isnotnull(web_site_sk#64) + +- *(17) FileScan parquet default.web_site[web_site_sk#64,web_site_id#58] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt new file mode 100644 index 000000000..a3baee3e1 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt @@ -0,0 +1,110 @@ +TakeOrderedAndProject [profit,channel,sales,returns,id] + WholeStageCodegen + HashAggregate [sum(returns),channel,spark_grouping_id,sum,sum(profit),sum(sales),sum,sum,id] [sum(returns),profit,sum,sum(profit),sum(sales),sales,sum,sum,returns] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen + HashAggregate [sum,channel,spark_grouping_id,sum,sum,sum,profit,returns,sum,sum,id,sales] [sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] + InputAdapter + Union + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),sum,sum(UnscaledValue(profit)),s_store_id,sum(UnscaledValue(sales_price)),sum,sum] [sales,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),RETURNS,sum,sum(UnscaledValue(profit)),channel,profit,sum(UnscaledValue(sales_price)),id,sum,sum] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen + HashAggregate [sum,sum,return_amt,net_loss,sum,sum,s_store_id,sum,sales_price,sum,sum,sum,profit] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [s_store_id,sales_price,net_loss,return_amt,profit] + BroadcastHashJoin [store_sk,s_store_sk] + Project [store_sk,sales_price,net_loss,return_amt,profit] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen + Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + WholeStageCodegen + Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] + Filter [sr_returned_date_sk,sr_store_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_id] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + WholeStageCodegen + HashAggregate [cp_catalog_page_id,sum(UnscaledValue(profit)),sum,sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum,sum,sum(UnscaledValue(net_loss)),sum] [sum(UnscaledValue(profit)),sum,sum(UnscaledValue(sales_price)),channel,sum(UnscaledValue(return_amt)),sales,sum,profit,sum,id,sum(UnscaledValue(net_loss)),sum,RETURNS] + InputAdapter + Exchange [cp_catalog_page_id] #5 + WholeStageCodegen + HashAggregate [cp_catalog_page_id,sum,sum,sum,sales_price,return_amt,net_loss,sum,profit,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,cp_catalog_page_id,profit,net_loss,return_amt] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] + Project [sales_price,profit,page_sk,net_loss,return_amt] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen + Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] + Filter [cs_sold_date_sk,cs_catalog_page_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] + WholeStageCodegen + Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] + Filter [cr_returned_date_sk,cr_catalog_page_sk] + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [cp_catalog_page_sk,cp_catalog_page_id] + Filter [cp_catalog_page_sk] + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen + HashAggregate [web_site_id,sum,sum,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,sum(UnscaledValue(net_loss))] [sales,profit,sum,sum,RETURNS,channel,id,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,sum(UnscaledValue(net_loss))] + InputAdapter + Exchange [web_site_id] #8 + WholeStageCodegen + HashAggregate [web_site_id,sum,profit,sum,sum,net_loss,sales_price,sum,sum,sum,sum,sum,return_amt] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [web_site_id,profit,sales_price,return_amt,net_loss] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] + Project [profit,wsr_web_site_sk,sales_price,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] + InputAdapter + Union + WholeStageCodegen + Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] + Filter [ws_sold_date_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] + WholeStageCodegen + Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] + Filter [wr_returned_date_sk] + Scan parquet default.web_returns [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [ws_item_sk,ws_web_site_sk,ws_order_number] + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] [ws_item_sk,ws_web_site_sk,ws_order_number] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [web_site_sk,web_site_id] + Filter [web_site_sk] + Scan parquet default.web_site [web_site_sk,web_site_id] [web_site_sk,web_site_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt new file mode 100644 index 000000000..f1df63c4f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt @@ -0,0 +1,32 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_company_id#2 ASC NULLS FIRST,s_street_number#3 ASC NULLS FIRST,s_street_name#4 ASC NULLS FIRST,s_street_type#5 ASC NULLS FIRST,s_suite_number#6 ASC NULLS FIRST,s_city#7 ASC NULLS FIRST,s_county#8 ASC NULLS FIRST,s_state#9 ASC NULLS FIRST,s_zip#10 ASC NULLS FIRST], output=[s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10,30 days #11,31 - 60 days #12,61 - 90 days #13,91 - 120 days #14,>120 days #15]) ++- *(6) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10, 200) + +- *(5) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) + +- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + +- *(5) BroadcastHashJoin [sr_returned_date_sk#16], [cast(d_date_sk#18 as bigint)], Inner, BuildRight + :- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + : +- *(5) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#19], Inner, BuildRight + : :- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + : : +- *(5) BroadcastHashJoin [ss_store_sk#20], [s_store_sk#21], Inner, BuildRight + : : :- *(5) Project [ss_sold_date_sk#17, ss_store_sk#20, sr_returned_date_sk#16] + : : : +- *(5) BroadcastHashJoin [cast(ss_ticket_number#22 as bigint), cast(ss_item_sk#23 as bigint), cast(ss_customer_sk#24 as bigint)], [sr_ticket_number#25, sr_item_sk#26, sr_customer_sk#27], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#17, ss_item_sk#23, ss_customer_sk#24, ss_store_sk#20, ss_ticket_number#22] + : : : : +- *(5) Filter ((((isnotnull(ss_ticket_number#22) && isnotnull(ss_item_sk#23)) && isnotnull(ss_customer_sk#24)) && isnotnull(ss_store_sk#20)) && isnotnull(ss_sold_date_sk#17)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_item_sk#23,ss_customer_sk#24,ss_store_sk#20,ss_ticket_number#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_stor..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[3, bigint, true], input[1, bigint, true], input[2, bigint, true])) + : : : +- *(1) Project [sr_returned_date_sk#16, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#25] + : : : +- *(1) Filter (((isnotnull(sr_ticket_number#25) && isnotnull(sr_customer_sk#27)) && isnotnull(sr_item_sk#26)) && isnotnull(sr_returned_date_sk#16)) + : : : +- *(1) FileScan parquet default.store_returns[sr_returned_date_sk#16,sr_item_sk#26,sr_customer_sk#27,sr_ticket_number#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_retu..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#21, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + : : +- *(2) Filter isnotnull(s_store_sk#21) + : : +- *(2) FileScan parquet default.store[s_store_sk#21,s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#18] + +- *(4) Filter ((((isnotnull(d_year#28) && isnotnull(d_moy#29)) && (d_year#28 = 2001)) && (d_moy#29 = 8)) && isnotnull(d_date_sk#18)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#28,d_moy#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt new file mode 100644 index 000000000..fcbf25c1d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [s_street_number,s_street_type,s_company_id,31 - 60 days ,61 - 90 days ,s_county,>120 days ,30 days ,91 - 120 days ,s_state,s_street_name,s_city,s_suite_number,s_store_name,s_zip] + WholeStageCodegen + HashAggregate [s_street_number,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),s_street_type,sum,s_company_id,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),s_county,sum,s_state,s_street_name,sum,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum,s_city,s_suite_number,s_store_name,s_zip,sum] [sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum,31 - 60 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum,>120 days ,30 days ,91 - 120 days ,sum,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum,sum] + InputAdapter + Exchange [s_street_number,s_street_type,s_company_id,s_county,s_state,s_street_name,s_city,s_suite_number,s_store_name,s_zip] #1 + WholeStageCodegen + HashAggregate [sum,sum,s_street_number,ss_sold_date_sk,s_street_type,sum,s_company_id,sum,s_county,sum,s_state,s_street_name,sum,sum,sr_returned_date_sk,sum,sum,s_city,s_suite_number,s_store_name,s_zip,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_sold_date_sk,ss_store_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] + Filter [s_store_sk] + Scan parquet default.store [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk] [d_date_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt new file mode 100644 index 000000000..004057dbc --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt @@ -0,0 +1,41 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[item_sk#1 ASC NULLS FIRST,d_date#2 ASC NULLS FIRST], output=[item_sk#1,d_date#2,web_sales#3,store_sales#4,web_cumulative#5,store_cumulative#6]) ++- *(15) Filter ((isnotnull(web_cumulative#5) && isnotnull(store_cumulative#6)) && (web_cumulative#5 > store_cumulative#6)) + +- Window [max(web_sales#3) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#5, max(store_sales#4) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#6], [item_sk#1], [d_date#2 ASC NULLS FIRST] + +- *(14) Sort [item_sk#1 ASC NULLS FIRST, d_date#2 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(item_sk#1, 200) + +- *(13) Project [CASE WHEN isnotnull(item_sk#7) THEN item_sk#7 ELSE item_sk#8 END AS item_sk#1, CASE WHEN isnotnull(d_date#9) THEN d_date#9 ELSE d_date#10 END AS d_date#2, cume_sales#11 AS web_sales#3, cume_sales#12 AS store_sales#4] + +- SortMergeJoin [item_sk#7, d_date#9], [item_sk#8, d_date#10], FullOuter + :- *(6) Sort [item_sk#7 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(item_sk#7, d_date#9, 200) + : +- *(5) Project [item_sk#7, d_date#9, cume_sales#11] + : +- Window [sum(_w0#13) windowspecdefinition(ws_item_sk#14, d_date#9 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#11], [ws_item_sk#14], [d_date#9 ASC NULLS FIRST] + : +- *(4) Sort [ws_item_sk#14 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(ws_item_sk#14, 200) + : +- *(3) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[sum(UnscaledValue(ws_sales_price#15))]) + : +- Exchange hashpartitioning(ws_item_sk#14, d_date#9, 200) + : +- *(2) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[partial_sum(UnscaledValue(ws_sales_price#15))]) + : +- *(2) Project [ws_item_sk#14, ws_sales_price#15, d_date#9] + : +- *(2) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : :- *(2) Project [ws_sold_date_sk#16, ws_item_sk#14, ws_sales_price#15] + : : +- *(2) Filter (isnotnull(ws_item_sk#14) && isnotnull(ws_sold_date_sk#16)) + : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_item_sk#14,ws_sales_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#17, d_date#9] + : +- *(1) Filter (((isnotnull(d_month_seq#18) && (d_month_seq#18 >= 1200)) && (d_month_seq#18 <= 1211)) && isnotnull(d_date_sk#17)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#17,d_date#9,d_month_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- *(12) Sort [item_sk#8 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(item_sk#8, d_date#10, 200) + +- *(11) Project [item_sk#8, d_date#10, cume_sales#12] + +- Window [sum(_w0#19) windowspecdefinition(ss_item_sk#20, d_date#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#12], [ss_item_sk#20], [d_date#10 ASC NULLS FIRST] + +- *(10) Sort [ss_item_sk#20 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(ss_item_sk#20, 200) + +- *(9) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[sum(UnscaledValue(ss_sales_price#21))]) + +- Exchange hashpartitioning(ss_item_sk#20, d_date#10, 200) + +- *(8) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[partial_sum(UnscaledValue(ss_sales_price#21))]) + +- *(8) Project [ss_item_sk#20, ss_sales_price#21, d_date#10] + +- *(8) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight + :- *(8) Project [ss_sold_date_sk#22, ss_item_sk#20, ss_sales_price#21] + : +- *(8) Filter (isnotnull(ss_item_sk#20) && isnotnull(ss_sold_date_sk#22)) + : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_item_sk#20,ss_sales_price#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#23, d_date#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt new file mode 100644 index 000000000..af45acaf5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt @@ -0,0 +1,67 @@ +TakeOrderedAndProject [item_sk,store_cumulative,store_sales,web_sales,d_date,web_cumulative] + WholeStageCodegen + Filter [web_cumulative,store_cumulative] + InputAdapter + Window [web_sales,item_sk,d_date,store_sales] + WholeStageCodegen + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk] #1 + WholeStageCodegen + Project [d_date,d_date,item_sk,item_sk,cume_sales,cume_sales] + InputAdapter + SortMergeJoin [item_sk,d_date,item_sk,d_date] + WholeStageCodegen + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #2 + WholeStageCodegen + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ws_item_sk,d_date] + WholeStageCodegen + Sort [ws_item_sk,d_date] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen + HashAggregate [ws_item_sk,d_date,sum,sum(UnscaledValue(ws_sales_price))] [sum(UnscaledValue(ws_sales_price)),item_sk,_w0,sum] + InputAdapter + Exchange [ws_item_sk,d_date] #4 + WholeStageCodegen + HashAggregate [sum,ws_item_sk,sum,d_date,ws_sales_price] [sum,sum] + Project [ws_item_sk,ws_sales_price,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_sales_price] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_sales_price] [ws_sold_date_sk,ws_item_sk,ws_sales_price] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] [d_date_sk,d_date,d_month_seq] + WholeStageCodegen + Sort [item_sk,d_date] + InputAdapter + Exchange [item_sk,d_date] #6 + WholeStageCodegen + Project [item_sk,d_date,cume_sales] + InputAdapter + Window [_w0,ss_item_sk,d_date] + WholeStageCodegen + Sort [ss_item_sk,d_date] + InputAdapter + Exchange [ss_item_sk] #7 + WholeStageCodegen + HashAggregate [ss_item_sk,d_date,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),item_sk,_w0,sum] + InputAdapter + Exchange [ss_item_sk,d_date] #8 + WholeStageCodegen + HashAggregate [d_date,sum,ss_sales_price,sum,ss_item_sk] [sum,sum] + Project [ss_item_sk,ss_sales_price,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_sales_price] + InputAdapter + ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt new file mode 100644 index 000000000..aefc40ed5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt @@ -0,0 +1,20 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,ext_price#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,ext_price#2]) ++- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) + +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 200) + +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) + +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] + +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight + :- *(3) Project [d_year#1, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight + : :- *(3) Project [d_date_sk#10, d_year#1] + : : +- *(3) Filter ((((isnotnull(d_moy#12) && isnotnull(d_year#1)) && (d_moy#12 = 11)) && (d_year#1 = 2000)) && isnotnull(d_date_sk#10)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] + +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 1)) && isnotnull(i_item_sk#9)) + +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manager_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt new file mode 100644 index 000000000..00c2d4b14 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [d_year,ext_price,brand_id,brand] + WholeStageCodegen + HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),d_year,i_brand,i_brand_id,sum] [brand_id,sum(UnscaledValue(ss_ext_sales_price)),brand,sum,ext_price] + InputAdapter + Exchange [d_year,i_brand,i_brand_id] #1 + WholeStageCodegen + HashAggregate [sum,d_year,i_brand,ss_ext_sales_price,i_brand_id,sum] [sum,sum] + Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt new file mode 100644 index 000000000..80e2ae182 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt @@ -0,0 +1,31 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[avg_quarterly_sales#1 ASC NULLS FIRST,sum_sales#2 ASC NULLS FIRST,i_manufact_id#3 ASC NULLS FIRST], output=[i_manufact_id#3,sum_sales#2,avg_quarterly_sales#1]) ++- *(7) Project [i_manufact_id#3, sum_sales#2, avg_quarterly_sales#1] + +- *(7) Filter (CASE WHEN (avg_quarterly_sales#1 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#2 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) + +- Window [avg(_w0#4) windowspecdefinition(i_manufact_id#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#1], [i_manufact_id#3] + +- *(6) Sort [i_manufact_id#3 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_manufact_id#3, 200) + +- *(5) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) + +- Exchange hashpartitioning(i_manufact_id#3, d_qoy#5, 200) + +- *(4) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) + +- *(4) Project [i_manufact_id#3, ss_sales_price#6, d_qoy#5] + +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight + :- *(4) Project [i_manufact_id#3, ss_store_sk#7, ss_sales_price#6, d_qoy#5] + : +- *(4) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : :- *(4) Project [i_manufact_id#3, ss_sold_date_sk#9, ss_store_sk#7, ss_sales_price#6] + : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight + : : :- *(4) Project [i_item_sk#11, i_manufact_id#3] + : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,reference,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manufact_id#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] + : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#10, d_qoy#5] + : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_qoy#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#8] + +- *(3) Filter isnotnull(s_store_sk#8) + +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt new file mode 100644 index 000000000..6a8804ae2 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] + WholeStageCodegen + Project [i_manufact_id,sum_sales,avg_quarterly_sales] + Filter [avg_quarterly_sales,sum_sales] + InputAdapter + Window [_w0,i_manufact_id] + WholeStageCodegen + Sort [i_manufact_id] + InputAdapter + Exchange [i_manufact_id] #1 + WholeStageCodegen + HashAggregate [i_manufact_id,d_qoy,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_manufact_id,d_qoy] #2 + WholeStageCodegen + HashAggregate [d_qoy,sum,ss_sales_price,sum,i_manufact_id] [sum,sum] + Project [i_manufact_id,ss_sales_price,d_qoy] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_manufact_id,ss_sold_date_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manufact_id] + Filter [i_category,i_class,i_brand,i_item_sk] + Scan parquet default.item [i_class,i_manufact_id,i_item_sk,i_category,i_brand] [i_class,i_manufact_id,i_item_sk,i_category,i_brand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_qoy] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] [d_date_sk,d_month_seq,d_qoy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt new file mode 100644 index 000000000..142360af3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt @@ -0,0 +1,88 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customers#2 ASC NULLS FIRST], output=[segment#1,num_customers#2,segment_base#3]) ++- *(13) HashAggregate(keys=[segment#1], functions=[count(1)]) + +- Exchange hashpartitioning(segment#1, 200) + +- *(12) HashAggregate(keys=[segment#1], functions=[partial_count(1)]) + +- *(12) HashAggregate(keys=[c_customer_sk#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) + +- Exchange hashpartitioning(c_customer_sk#4, 200) + +- *(11) HashAggregate(keys=[c_customer_sk#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) + +- *(11) Project [c_customer_sk#4, ss_ext_sales_price#5] + +- *(11) BroadcastHashJoin [ss_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight + :- *(11) Project [c_customer_sk#4, ss_sold_date_sk#6, ss_ext_sales_price#5] + : +- *(11) BroadcastHashJoin [ca_county#8, ca_state#9], [s_county#10, s_state#11], Inner, BuildRight + : :- *(11) Project [c_customer_sk#4, ss_sold_date_sk#6, ss_ext_sales_price#5, ca_county#8, ca_state#9] + : : +- *(11) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight + : : :- *(11) Project [c_customer_sk#4, c_current_addr_sk#12, ss_sold_date_sk#6, ss_ext_sales_price#5] + : : : +- *(11) BroadcastHashJoin [c_customer_sk#4], [ss_customer_sk#14], Inner, BuildRight + : : : :- *(11) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) + : : : : +- Exchange hashpartitioning(c_customer_sk#4, c_current_addr_sk#12, 200) + : : : : +- *(6) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) + : : : : +- *(6) Project [c_customer_sk#4, c_current_addr_sk#12] + : : : : +- *(6) BroadcastHashJoin [customer_sk#15], [c_customer_sk#4], Inner, BuildRight + : : : : :- *(6) Project [customer_sk#15] + : : : : : +- *(6) BroadcastHashJoin [sold_date_sk#16], [d_date_sk#7], Inner, BuildRight + : : : : : :- *(6) Project [sold_date_sk#16, customer_sk#15] + : : : : : : +- *(6) BroadcastHashJoin [item_sk#17], [i_item_sk#18], Inner, BuildRight + : : : : : : :- Union + : : : : : : : :- *(1) Project [cs_sold_date_sk#19 AS sold_date_sk#16, cs_bill_customer_sk#20 AS customer_sk#15, cs_item_sk#21 AS item_sk#17] + : : : : : : : : +- *(1) Filter ((isnotnull(cs_item_sk#21) && isnotnull(cs_sold_date_sk#19)) && isnotnull(cs_bill_customer_sk#20)) + : : : : : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_customer_sk#20,cs_item_sk#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : : : : : : +- *(2) Project [ws_sold_date_sk#22 AS sold_date_sk#23, ws_bill_customer_sk#24 AS customer_sk#25, ws_item_sk#26 AS item_sk#27] + : : : : : : : +- *(2) Filter ((isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#22)) && isnotnull(ws_bill_customer_sk#24)) + : : : : : : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#22,ws_item_sk#26,ws_bill_customer_sk#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(3) Project [i_item_sk#18] + : : : : : : +- *(3) Filter ((((isnotnull(i_category#28) && isnotnull(i_class#29)) && (i_category#28 = Women)) && (i_class#29 = maternity)) && isnotnull(i_item_sk#18)) + : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#18,i_class#29,i_category#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity)..., ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(4) Project [d_date_sk#7] + : : : : : +- *(4) Filter ((((isnotnull(d_moy#30) && isnotnull(d_year#31)) && (d_moy#30 = 12)) && (d_year#31 = 1998)) && isnotnull(d_date_sk#7)) + : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#7,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(5) Project [c_customer_sk#4, c_current_addr_sk#12] + : : : : +- *(5) Filter (isnotnull(c_customer_sk#4) && isnotnull(c_current_addr_sk#12)) + : : : : +- *(5) FileScan parquet default.customer[c_customer_sk#4,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : +- *(7) Project [ss_sold_date_sk#6, ss_customer_sk#14, ss_ext_sales_price#5] + : : : +- *(7) Filter (isnotnull(ss_customer_sk#14) && isnotnull(ss_sold_date_sk#6)) + : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#6,ss_customer_sk#14,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(8) Project [ca_address_sk#13, ca_county#8, ca_state#9] + : : +- *(8) Filter ((isnotnull(ca_address_sk#13) && isnotnull(ca_state#9)) && isnotnull(ca_county#8)) + : : +- *(8) FileScan parquet default.customer_address[ca_address_sk#13,ca_county#8,ca_state#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state), IsNotNull(ca_county)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true])) + : +- *(9) Project [s_county#10, s_state#11] + : +- *(9) Filter (isnotnull(s_state#11) && isnotnull(s_county#10)) + : +- *(9) FileScan parquet default.store[s_county#10,s_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(10) Project [d_date_sk#7] + +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery10089)) && (d_month_seq#32 <= Subquery subquery10090)) && isnotnull(d_date_sk#7)) + : :- Subquery subquery10089 + : : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : : +- Exchange hashpartitioning((d_month_seq + 1)#33, 200) + : : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] + : : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + : : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + : +- Subquery subquery10090 + : +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + : +- Exchange hashpartitioning((d_month_seq + 3)#34, 200) + : +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + : +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] + : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + +- *(10) FileScan parquet default.date_dim[d_date_sk#7,d_month_seq#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct + :- Subquery subquery10089 + : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : +- Exchange hashpartitioning((d_month_seq + 1)#33, 200) + : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] + : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + +- Subquery subquery10090 + +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + +- Exchange hashpartitioning((d_month_seq + 3)#34, 200) + +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] + +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt new file mode 100644 index 000000000..3707aa2f8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt @@ -0,0 +1,123 @@ +TakeOrderedAndProject [segment,num_customers,segment_base] + WholeStageCodegen + HashAggregate [segment,count,count(1)] [count(1),num_customers,segment_base,count] + InputAdapter + Exchange [segment] #1 + WholeStageCodegen + HashAggregate [segment,count,count] [count,count] + HashAggregate [c_customer_sk,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),segment,sum] + InputAdapter + Exchange [c_customer_sk] #2 + WholeStageCodegen + HashAggregate [c_customer_sk,ss_ext_sales_price,sum,sum] [sum,sum] + Project [c_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price] + BroadcastHashJoin [ca_county,ca_state,s_county,s_state] + Project [ca_state,c_customer_sk,ss_ext_sales_price,ca_county,ss_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_customer_sk,c_current_addr_sk,ss_sold_date_sk,ss_ext_sales_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [c_customer_sk,c_current_addr_sk] + InputAdapter + Exchange [c_customer_sk,c_current_addr_sk] #3 + WholeStageCodegen + HashAggregate [c_customer_sk,c_current_addr_sk] + Project [c_customer_sk,c_current_addr_sk] + BroadcastHashJoin [customer_sk,c_customer_sk] + Project [customer_sk] + BroadcastHashJoin [sold_date_sk,d_date_sk] + Project [sold_date_sk,customer_sk] + BroadcastHashJoin [item_sk,i_item_sk] + InputAdapter + Union + WholeStageCodegen + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Filter [cs_item_sk,cs_sold_date_sk,cs_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + WholeStageCodegen + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk] + Filter [i_category,i_class,i_item_sk] + Scan parquet default.item [i_item_sk,i_class,i_category] [i_item_sk,i_class,i_category] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] + Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [ca_address_sk,ca_county,ca_state] + Filter [ca_address_sk,ca_state,ca_county] + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] [ca_address_sk,ca_county,ca_state] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [s_county,s_state] + Filter [s_state,s_county] + Scan parquet default.store [s_county,s_state] [s_county,s_state] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen + HashAggregate [(d_month_seq + 1)] + InputAdapter + Exchange [(d_month_seq + 1)] #11 + WholeStageCodegen + HashAggregate [(d_month_seq + 1)] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Subquery #2 + WholeStageCodegen + HashAggregate [(d_month_seq + 3)] + InputAdapter + Exchange [(d_month_seq + 3)] #12 + WholeStageCodegen + HashAggregate [(d_month_seq + 3)] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen + HashAggregate [(d_month_seq + 1)] + InputAdapter + Exchange [(d_month_seq + 1)] #11 + WholeStageCodegen + HashAggregate [(d_month_seq + 1)] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Subquery #2 + WholeStageCodegen + HashAggregate [(d_month_seq + 3)] + InputAdapter + Exchange [(d_month_seq + 3)] #12 + WholeStageCodegen + HashAggregate [(d_month_seq + 3)] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt new file mode 100644 index 000000000..eccd937f1 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt @@ -0,0 +1,20 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand_id#2 ASC NULLS FIRST], output=[brand_id#2,brand#3,ext_price#1]) ++- *(4) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[sum(UnscaledValue(ss_ext_sales_price#6))]) + +- Exchange hashpartitioning(i_brand#4, i_brand_id#5, 200) + +- *(3) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#6))]) + +- *(3) Project [ss_ext_sales_price#6, i_brand_id#5, i_brand#4] + +- *(3) BroadcastHashJoin [ss_item_sk#7], [i_item_sk#8], Inner, BuildRight + :- *(3) Project [ss_item_sk#7, ss_ext_sales_price#6] + : +- *(3) BroadcastHashJoin [d_date_sk#9], [ss_sold_date_sk#10], Inner, BuildRight + : :- *(3) Project [d_date_sk#9] + : : +- *(3) Filter ((((isnotnull(d_moy#11) && isnotnull(d_year#12)) && (d_moy#11 = 11)) && (d_year#12 = 1999)) && isnotnull(d_date_sk#9)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#9,d_year#12,d_moy#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#10, ss_item_sk#7, ss_ext_sales_price#6] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#10) && isnotnull(ss_item_sk#7)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#10,ss_item_sk#7,ss_ext_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#8, i_brand_id#5, i_brand#4] + +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 28)) && isnotnull(i_item_sk#8)) + +- *(2) FileScan parquet default.item[i_item_sk#8,i_brand_id#5,i_brand#4,i_manager_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt new file mode 100644 index 000000000..e5ff08959 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt @@ -0,0 +1,26 @@ +TakeOrderedAndProject [ext_price,brand_id,brand] + WholeStageCodegen + HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,ext_price,sum(UnscaledValue(ss_ext_sales_price)),sum,brand_id] + InputAdapter + Exchange [i_brand,i_brand_id] #1 + WholeStageCodegen + HashAggregate [i_brand,sum,sum,ss_ext_sales_price,i_brand_id] [sum,sum] + Project [ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt new file mode 100644 index 000000000..5678609bc --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt @@ -0,0 +1,65 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_item_id#2,total_sales#1]) ++- *(20) HashAggregate(keys=[i_item_id#2], functions=[sum(total_sales#3)]) + +- Exchange hashpartitioning(i_item_id#2, 200) + +- *(19) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(total_sales#3)]) + +- Union + :- *(6) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange hashpartitioning(i_item_id#2, 200) + : +- *(5) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(5) Project [ss_ext_sales_price#4, i_item_id#2] + : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#10] + : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 2001)) && (d_moy#12 = 2)) && isnotnull(d_date_sk#10)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [ca_address_sk#8] + : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) BroadcastHashJoin [i_item_id#2], [i_item_id#2#14], LeftSemi, BuildRight + : :- *(4) Project [i_item_sk#6, i_item_id#2] + : : +- *(4) Filter isnotnull(i_item_sk#6) + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(3) Project [i_item_id#2 AS i_item_id#2#14] + : +- *(3) Filter i_color#15 IN (slate,blanched,burnished) + : +- *(3) FileScan parquet default.item[i_item_id#2,i_color#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_color, [slate,blanched,burnished])], ReadSchema: struct + :- *(12) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- Exchange hashpartitioning(i_item_id#2, 200) + : +- *(11) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- *(11) Project [cs_ext_sales_price#16, i_item_id#2] + : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight + : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] + : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight + : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(18) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) + +- Exchange hashpartitioning(i_item_id#2, 200) + +- *(17) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) + +- *(17) Project [ws_ext_sales_price#20, i_item_id#2] + +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight + :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] + : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight + : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight + : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt new file mode 100644 index 000000000..693fb71e6 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt @@ -0,0 +1,91 @@ +TakeOrderedAndProject [total_sales,i_item_id] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(total_sales)] [sum(total_sales),total_sales,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,total_sales,sum,sum] [sum,sum] + InputAdapter + Union + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen + HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + BroadcastHashJoin [i_item_id,i_item_id] + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_id] + Filter [i_color] + Scan parquet default.item [i_item_id,i_color] [i_item_id,i_color] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen + HashAggregate [i_item_id,cs_ext_sales_price,sum,sum] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen + HashAggregate [i_item_id,ws_ext_sales_price,sum,sum] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt new file mode 100644 index 000000000..458642360 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt @@ -0,0 +1,51 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,cc_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,cc_name#3,d_year#6,d_moy#7,avg_monthly_sales#2,sum_sales#1,psum#8,nsum#9]) ++- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, avg_monthly_sales#2, sum_sales#1, sum_sales#10 AS psum#8, sum_sales#11 AS nsum#9] + +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, cc_name#3, rn#12], [i_category#13, i_brand#14, cc_name#15, (rn#16 - 1)], Inner, BuildRight + :- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, sum_sales#1, avg_monthly_sales#2, rn#12, sum_sales#10] + : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, cc_name#3, rn#12], [i_category#17, i_brand#18, cc_name#19, (rn#20 + 1)], Inner, BuildRight + : :- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, sum_sales#1, avg_monthly_sales#2, rn#12] + : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#12)) + : : +- Window [avg(_w0#21) windowspecdefinition(i_category#4, i_brand#5, cc_name#3, d_year#6, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, cc_name#3, d_year#6] + : : +- *(7) Filter (isnotnull(d_year#6) && (d_year#6 = 1999)) + : : +- Window [rank(d_year#6, d_moy#7) windowspecdefinition(i_category#4, i_brand#5, cc_name#3, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#12], [i_category#4, i_brand#5, cc_name#3], [d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST] + : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, cc_name#3 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, 200) + : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[sum(UnscaledValue(cs_sales_price#22))]) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 200) + : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[partial_sum(UnscaledValue(cs_sales_price#22))]) + : : +- *(4) Project [i_brand#5, i_category#4, cs_sales_price#22, d_year#6, d_moy#7, cc_name#3] + : : +- *(4) BroadcastHashJoin [cs_call_center_sk#23], [cc_call_center_sk#24], Inner, BuildRight + : : :- *(4) Project [i_brand#5, i_category#4, cs_call_center_sk#23, cs_sales_price#22, d_year#6, d_moy#7] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#26], Inner, BuildRight + : : : :- *(4) Project [i_brand#5, i_category#4, cs_sold_date_sk#25, cs_call_center_sk#23, cs_sales_price#22] + : : : : +- *(4) BroadcastHashJoin [i_item_sk#27], [cs_item_sk#28], Inner, BuildRight + : : : : :- *(4) Project [i_item_sk#27, i_brand#5, i_category#4] + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#27) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#27,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) + : : : : +- *(1) Project [cs_sold_date_sk#25, cs_call_center_sk#23, cs_item_sk#28, cs_sales_price#22] + : : : : +- *(1) Filter ((isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#25)) && isnotnull(cs_call_center_sk#23)) + : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_call_center_sk#23,cs_item_sk#28,cs_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#26, d_year#6, d_moy#7] + : : : +- *(2) Filter ((((d_year#6 = 1999) || ((d_year#6 = 1998) && (d_moy#7 = 12))) || ((d_year#6 = 2000) && (d_moy#7 = 1))) && isnotnull(d_date_sk#26)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#26,d_year#6,d_moy#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [cc_call_center_sk#24, cc_name#3] + : : +- *(3) Filter (isnotnull(cc_call_center_sk#24) && isnotnull(cc_name#3)) + : : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#24,cc_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] + 1))) + : +- *(14) Project [i_category#17, i_brand#18, cc_name#19, sum_sales#10, rn#20] + : +- *(14) Filter isnotnull(rn#20) + : +- Window [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#17, i_brand#18, cc_name#19, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#20], [i_category#17, i_brand#18, cc_name#19], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] + : +- *(13) Sort [i_category#17 ASC NULLS FIRST, i_brand#18 ASC NULLS FIRST, cc_name#19 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 200) + : +- *(12) HashAggregate(keys=[i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30], functions=[sum(UnscaledValue(cs_sales_price#22))]) + : +- ReusedExchange [i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30, sum#31], Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 200) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] - 1))) + +- *(21) Project [i_category#13, i_brand#14, cc_name#15, sum_sales#11, rn#16] + +- *(21) Filter isnotnull(rn#16) + +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#13, i_brand#14, cc_name#15, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#16], [i_category#13, i_brand#14, cc_name#15], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] + +- *(20) Sort [i_category#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#15 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + +- ReusedExchange [i_category#13, i_brand#14, cc_name#15, d_year#32, d_moy#33, sum_sales#11], Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 200) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt new file mode 100644 index 000000000..d41aaf1e2 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt @@ -0,0 +1,77 @@ +TakeOrderedAndProject [d_year,avg_monthly_sales,nsum,d_moy,sum_sales,i_category,i_brand,cc_name,psum] + WholeStageCodegen + Project [sum_sales,d_year,sum_sales,d_moy,sum_sales,i_category,i_brand,cc_name,avg_monthly_sales] + BroadcastHashJoin [cc_name,i_brand,rn,rn,i_category,i_category,i_brand,cc_name] + Project [d_year,d_moy,sum_sales,rn,i_brand,cc_name,sum_sales,i_category,avg_monthly_sales] + BroadcastHashJoin [i_category,cc_name,rn,i_category,rn,i_brand,cc_name,i_brand] + Project [d_year,d_moy,sum_sales,rn,i_brand,cc_name,i_category,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales,rn] + InputAdapter + Window [d_year,i_brand,cc_name,i_category,_w0] + WholeStageCodegen + Filter [d_year] + InputAdapter + Window [d_year,d_moy,i_brand,cc_name,i_category] + WholeStageCodegen + Sort [d_year,d_moy,i_brand,cc_name,i_category] + InputAdapter + Exchange [i_category,i_brand,cc_name] #1 + WholeStageCodegen + HashAggregate [d_year,d_moy,sum(UnscaledValue(cs_sales_price)),i_brand,sum,cc_name,i_category] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [d_year,d_moy,i_brand,cc_name,i_category] #2 + WholeStageCodegen + HashAggregate [d_year,d_moy,i_brand,sum,sum,cc_name,i_category,cs_sales_price] [sum,sum] + Project [d_moy,d_year,cs_sales_price,i_category,cc_name,i_brand] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [d_moy,d_year,cs_sales_price,i_category,i_brand,cs_call_center_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sales_price,cs_sold_date_sk,i_category,i_brand,cs_call_center_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk] + Project [i_item_sk,i_brand,i_category] + Filter [i_item_sk,i_brand,i_category] + Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cs_sold_date_sk,cs_call_center_sk,cs_item_sk,cs_sales_price] + Filter [cs_item_sk,cs_sold_date_sk,cs_call_center_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_item_sk,cs_sales_price] [cs_sold_date_sk,cs_call_center_sk,cs_item_sk,cs_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_year,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [cc_call_center_sk,cc_name] + Filter [cc_call_center_sk,cc_name] + Scan parquet default.call_center [cc_call_center_sk,cc_name] [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [cc_name,sum_sales,i_brand,rn,i_category] + Filter [rn] + InputAdapter + Window [cc_name,i_brand,d_moy,d_year,i_category] + WholeStageCodegen + Sort [cc_name,i_brand,d_moy,d_year,i_category] + InputAdapter + Exchange [i_category,i_brand,cc_name] #7 + WholeStageCodegen + HashAggregate [sum(UnscaledValue(cs_sales_price)),sum,cc_name,i_brand,d_moy,d_year,i_category] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] + InputAdapter + ReusedExchange [cc_name,i_brand,d_moy,sum,d_year,i_category] [cc_name,i_brand,d_moy,sum,d_year,i_category] #2 + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [cc_name,rn,i_brand,i_category,sum_sales] + Filter [rn] + InputAdapter + Window [cc_name,d_year,i_brand,i_category,d_moy] + WholeStageCodegen + Sort [cc_name,d_year,i_brand,i_category,d_moy] + InputAdapter + ReusedExchange [cc_name,d_year,i_brand,i_category,d_moy,sum_sales] [cc_name,d_year,i_brand,i_category,d_moy,sum_sales] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt new file mode 100644 index 000000000..5b1f75cde --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt @@ -0,0 +1,101 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev#2 ASC NULLS FIRST], output=[item_id#1,ss_item_rev#2,ss_dev#3,cs_item_rev#4,cs_dev#5,ws_item_rev#6,ws_dev#7,average#8]) ++- *(15) Project [item_id#1, ss_item_rev#2, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#3, cs_item_rev#4, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#4 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#5, ws_item_rev#6, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#6 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#7, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#8] + +- *(15) BroadcastHashJoin [item_id#1], [item_id#9], Inner, BuildRight, ((((((((cast(ss_item_rev#2 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#6)), DecimalType(19,3))) && (cast(ss_item_rev#2 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#6)), DecimalType(20,3)))) && (cast(cs_item_rev#4 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#6)), DecimalType(19,3)))) && (cast(cs_item_rev#4 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#6)), DecimalType(20,3)))) && (cast(ws_item_rev#6 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#2)), DecimalType(19,3)))) && (cast(ws_item_rev#6 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#2)), DecimalType(20,3)))) && (cast(ws_item_rev#6 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#4)), DecimalType(19,3)))) && (cast(ws_item_rev#6 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#4)), DecimalType(20,3)))) + :- *(15) Project [item_id#1, ss_item_rev#2, cs_item_rev#4] + : +- *(15) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight, ((((cast(ss_item_rev#2 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#4)), DecimalType(19,3))) && (cast(ss_item_rev#2 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#4)), DecimalType(20,3)))) && (cast(cs_item_rev#4 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#2)), DecimalType(19,3)))) && (cast(cs_item_rev#4 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#2)), DecimalType(20,3)))) + : :- *(15) Filter isnotnull(ss_item_rev#2) + : : +- *(15) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ss_ext_sales_price#12))]) + : : +- Exchange hashpartitioning(i_item_id#11, 200) + : : +- *(4) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#12))]) + : : +- *(4) Project [ss_ext_sales_price#12, i_item_id#11] + : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : : :- *(4) Project [ss_sold_date_sk#13, ss_ext_sales_price#12, i_item_id#11] + : : : +- *(4) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#16], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#13, ss_item_sk#15, ss_ext_sales_price#12] + : : : : +- *(4) Filter (isnotnull(ss_item_sk#15) && isnotnull(ss_sold_date_sk#13)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#15,ss_ext_sales_price#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#16, i_item_id#11] + : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) + : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#14] + : : +- *(3) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight + : : :- *(3) Project [d_date_sk#14, d_date#17] + : : : +- *(3) Filter isnotnull(d_date_sk#14) + : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + : : +- *(2) Project [d_date#17 AS d_date#17#18] + : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12416)) + : : : +- Subquery subquery12416 + : : : +- *(1) Project [d_week_seq#19] + : : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + : : +- Subquery subquery12416 + : : +- *(1) Project [d_week_seq#19] + : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(9) Filter isnotnull(cs_item_rev#4) + : +- *(9) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(cs_ext_sales_price#20))]) + : +- Exchange hashpartitioning(i_item_id#11, 200) + : +- *(8) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#20))]) + : +- *(8) Project [cs_ext_sales_price#20, i_item_id#11] + : +- *(8) BroadcastHashJoin [cs_sold_date_sk#21], [d_date_sk#14], Inner, BuildRight + : :- *(8) Project [cs_sold_date_sk#21, cs_ext_sales_price#20, i_item_id#11] + : : +- *(8) BroadcastHashJoin [cs_item_sk#22], [i_item_sk#16], Inner, BuildRight + : : :- *(8) Project [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#20] + : : : +- *(8) Filter (isnotnull(cs_item_sk#22) && isnotnull(cs_sold_date_sk#21)) + : : : +- *(8) FileScan parquet default.catalog_sales[cs_sold_date_sk#21,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [d_date_sk#14] + : +- *(7) BroadcastHashJoin [d_date#17], [d_date#17#23], LeftSemi, BuildRight + : :- *(7) Project [d_date_sk#14, d_date#17] + : : +- *(7) Filter isnotnull(d_date_sk#14) + : : +- *(7) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + : +- *(6) Project [d_date#17 AS d_date#17#23] + : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12420)) + : : +- Subquery subquery12420 + : : +- *(1) Project [d_week_seq#19] + : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : +- *(6) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + : +- Subquery subquery12420 + : +- *(1) Project [d_week_seq#19] + : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(14) Filter isnotnull(ws_item_rev#6) + +- *(14) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ws_ext_sales_price#24))]) + +- Exchange hashpartitioning(i_item_id#11, 200) + +- *(13) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#24))]) + +- *(13) Project [ws_ext_sales_price#24, i_item_id#11] + +- *(13) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#14], Inner, BuildRight + :- *(13) Project [ws_sold_date_sk#25, ws_ext_sales_price#24, i_item_id#11] + : +- *(13) BroadcastHashJoin [ws_item_sk#26], [i_item_sk#16], Inner, BuildRight + : :- *(13) Project [ws_sold_date_sk#25, ws_item_sk#26, ws_ext_sales_price#24] + : : +- *(13) Filter (isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#25)) + : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_item_sk#26,ws_ext_sales_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(12) Project [d_date_sk#14] + +- *(12) BroadcastHashJoin [d_date#17], [d_date#17#27], LeftSemi, BuildRight + :- *(12) Project [d_date_sk#14, d_date#17] + : +- *(12) Filter isnotnull(d_date_sk#14) + : +- *(12) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + +- *(11) Project [d_date#17 AS d_date#17#27] + +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12424)) + : +- Subquery subquery12424 + : +- *(1) Project [d_week_seq#19] + : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + +- *(11) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + +- Subquery subquery12424 + +- *(1) Project [d_week_seq#19] + +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt new file mode 100644 index 000000000..fd4503647 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt @@ -0,0 +1,133 @@ +TakeOrderedAndProject [cs_item_rev,item_id,ws_dev,ws_item_rev,average,cs_dev,ss_item_rev,ss_dev] + WholeStageCodegen + Project [item_id,ss_item_rev,cs_item_rev,ws_item_rev] + BroadcastHashJoin [cs_item_rev,item_id,ws_item_rev,item_id,ss_item_rev] + Project [item_id,ss_item_rev,cs_item_rev] + BroadcastHashJoin [item_id,item_id,ss_item_rev,cs_item_rev] + Filter [ss_item_rev] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),item_id,ss_item_rev,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk,i_item_id] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Project [d_date_sk,d_date] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date] + Filter [d_week_seq] + Subquery #1 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Subquery #1 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Filter [cs_item_rev] + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),item_id,cs_item_rev,sum] + InputAdapter + Exchange [i_item_id] #6 + WholeStageCodegen + HashAggregate [i_item_id,cs_ext_sales_price,sum,sum] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Project [d_date_sk,d_date] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [d_date] + Filter [d_week_seq] + Subquery #2 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Subquery #2 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Filter [ws_item_rev] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),item_id,ws_item_rev,sum] + InputAdapter + Exchange [i_item_id] #10 + WholeStageCodegen + HashAggregate [i_item_id,ws_ext_sales_price,sum,sum] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #2 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Project [d_date_sk,d_date] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen + Project [d_date] + Filter [d_week_seq] + Subquery #3 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Subquery #3 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt new file mode 100644 index 000000000..56e7c1173 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt @@ -0,0 +1,43 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[s_store_name1#1 ASC NULLS FIRST,s_store_id1#2 ASC NULLS FIRST,d_week_seq1#3 ASC NULLS FIRST], output=[s_store_name1#1,s_store_id1#2,d_week_seq1#3,(sun_sales1 / sun_sales2)#4,(mon_sales1 / mon_sales2)#5,(tue_sales1 / tue_sales2)#6,(wed_sales1 / wed_sales2)#7,(thu_sales1 / thu_sales2)#8,(fri_sales1 / fri_sales2)#9,(sat_sales1 / sat_sales2)#10]) ++- *(10) Project [s_store_name1#1, s_store_id1#2, d_week_seq1#3, CheckOverflow((promote_precision(sun_sales1#11) / promote_precision(sun_sales2#12)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#4, CheckOverflow((promote_precision(mon_sales1#13) / promote_precision(mon_sales2#14)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#5, CheckOverflow((promote_precision(tue_sales1#15) / promote_precision(tue_sales2#16)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#6, CheckOverflow((promote_precision(wed_sales1#17) / promote_precision(wed_sales2#18)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#7, CheckOverflow((promote_precision(thu_sales1#19) / promote_precision(thu_sales2#20)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#8, CheckOverflow((promote_precision(fri_sales1#21) / promote_precision(fri_sales2#22)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#9, CheckOverflow((promote_precision(sat_sales1#23) / promote_precision(sat_sales2#24)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#10] + +- *(10) BroadcastHashJoin [s_store_id1#2, d_week_seq1#3], [s_store_id2#25, (d_week_seq2#26 - 52)], Inner, BuildRight + :- *(10) Project [s_store_name#27 AS s_store_name1#1, d_week_seq#28 AS d_week_seq1#3, s_store_id#29 AS s_store_id1#2, sun_sales#30 AS sun_sales1#11, mon_sales#31 AS mon_sales1#13, tue_sales#32 AS tue_sales1#15, wed_sales#33 AS wed_sales1#17, thu_sales#34 AS thu_sales1#19, fri_sales#35 AS fri_sales1#21, sat_sales#36 AS sat_sales1#23] + : +- *(10) BroadcastHashJoin [d_week_seq#28], [d_week_seq#37], Inner, BuildRight + : :- *(10) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29, s_store_name#27] + : : +- *(10) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight + : : :- *(10) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) + : : : +- Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 200) + : : : +- *(2) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) + : : : +- *(2) Project [ss_store_sk#38, ss_sales_price#41, d_week_seq#28, d_day_name#40] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#42], [d_date_sk#43], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#42, ss_store_sk#38, ss_sales_price#41] + : : : : +- *(2) Filter (isnotnull(ss_sold_date_sk#42) && isnotnull(ss_store_sk#38)) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#42,ss_store_sk#38,ss_sales_price#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#43, d_week_seq#28, d_day_name#40] + : : : +- *(1) Filter (isnotnull(d_date_sk#43) && isnotnull(d_week_seq#28)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#43,d_week_seq#28,d_day_name#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [s_store_sk#39, s_store_id#29, s_store_name#27] + : : +- *(3) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) + : : +- *(3) FileScan parquet default.store[s_store_sk#39,s_store_id#29,s_store_name#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [d_week_seq#37] + : +- *(4) Filter (((isnotnull(d_month_seq#44) && (d_month_seq#44 >= 1212)) && (d_month_seq#44 <= 1223)) && isnotnull(d_week_seq#37)) + : +- *(4) FileScan parquet default.date_dim[d_month_seq#44,d_week_seq#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223),..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52))) + +- *(9) Project [d_week_seq#28 AS d_week_seq2#26, s_store_id#29 AS s_store_id2#25, sun_sales#30 AS sun_sales2#12, mon_sales#31 AS mon_sales2#14, tue_sales#32 AS tue_sales2#16, wed_sales#33 AS wed_sales2#18, thu_sales#34 AS thu_sales2#20, fri_sales#35 AS fri_sales2#22, sat_sales#36 AS sat_sales2#24] + +- *(9) BroadcastHashJoin [d_week_seq#28], [d_week_seq#45], Inner, BuildRight + :- *(9) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29] + : +- *(9) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight + : :- *(9) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) + : : +- ReusedExchange [d_week_seq#28, ss_store_sk#38, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51, sum#52], Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 200) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [s_store_sk#39, s_store_id#29] + : +- *(7) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) + : +- *(7) FileScan parquet default.store[s_store_sk#39,s_store_id#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [d_week_seq#45] + +- *(8) Filter (((isnotnull(d_month_seq#53) && (d_month_seq#53 >= 1224)) && (d_month_seq#53 <= 1235)) && isnotnull(d_week_seq#45)) + +- *(8) FileScan parquet default.date_dim[d_month_seq#53,d_week_seq#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt new file mode 100644 index 000000000..0ffe4eb4e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt @@ -0,0 +1,58 @@ +TakeOrderedAndProject [(wed_sales1 / wed_sales2),d_week_seq1,(mon_sales1 / mon_sales2),(thu_sales1 / thu_sales2),(sat_sales1 / sat_sales2),s_store_name1,(sun_sales1 / sun_sales2),s_store_id1,(fri_sales1 / fri_sales2),(tue_sales1 / tue_sales2)] + WholeStageCodegen + Project [sat_sales1,sat_sales2,sun_sales2,tue_sales1,d_week_seq1,mon_sales1,fri_sales1,wed_sales1,thu_sales1,mon_sales2,s_store_name1,s_store_id1,wed_sales2,thu_sales2,fri_sales2,tue_sales2,sun_sales1] + BroadcastHashJoin [s_store_id1,d_week_seq1,s_store_id2,d_week_seq2] + Project [fri_sales,tue_sales,sat_sales,s_store_id,sun_sales,d_week_seq,wed_sales,mon_sales,s_store_name,thu_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [s_store_id,s_store_name,thu_sales,sun_sales,fri_sales,tue_sales,d_week_seq,wed_sales,sat_sales,mon_sales] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,sum,ss_store_sk,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),thu_sales,sun_sales,sum,fri_sales,tue_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,wed_sales,sat_sales,sum,mon_sales,sum] + InputAdapter + Exchange [d_week_seq,ss_store_sk] #1 + WholeStageCodegen + HashAggregate [sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,ss_store_sk,d_week_seq,ss_sales_price,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_sales_price] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk,d_week_seq,d_day_name] + Filter [d_date_sk,d_week_seq] + Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] [d_date_sk,d_week_seq,d_day_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk,s_store_id,s_store_name] + Filter [s_store_sk,s_store_id] + Scan parquet default.store [s_store_sk,s_store_id,s_store_name] [s_store_sk,s_store_id,s_store_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + Scan parquet default.date_dim [d_month_seq,d_week_seq] [d_month_seq,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [fri_sales,tue_sales,sat_sales,s_store_id,sun_sales,d_week_seq,wed_sales,mon_sales,thu_sales] + BroadcastHashJoin [d_week_seq,d_week_seq] + Project [s_store_id,thu_sales,sun_sales,fri_sales,tue_sales,d_week_seq,wed_sales,sat_sales,mon_sales] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,ss_store_sk,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),thu_sales,sum,sun_sales,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,wed_sales,sat_sales,sum,mon_sales] + InputAdapter + ReusedExchange [sum,sum,sum,ss_store_sk,d_week_seq,sum,sum,sum,sum] [sum,sum,sum,ss_store_sk,d_week_seq,sum,sum,sum,sum] #1 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [s_store_sk,s_store_id] + Filter [s_store_sk,s_store_id] + Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_week_seq] + Filter [d_month_seq,d_week_seq] + Scan parquet default.date_dim [d_month_seq,d_week_seq] [d_month_seq,d_week_seq] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt new file mode 100644 index 000000000..410992cf5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt @@ -0,0 +1,58 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state#2,cnt#1]) ++- *(8) Project [state#2, cnt#1] + +- *(8) Filter (count(1)#3 >= 10) + +- *(8) HashAggregate(keys=[ca_state#4], functions=[count(1)]) + +- Exchange hashpartitioning(ca_state#4, 200) + +- *(7) HashAggregate(keys=[ca_state#4], functions=[partial_count(1)]) + +- *(7) Project [ca_state#4] + +- *(7) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + :- *(7) Project [ca_state#4, ss_item_sk#5] + : +- *(7) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + : :- *(7) Project [ca_state#4, ss_sold_date_sk#7, ss_item_sk#5] + : : +- *(7) BroadcastHashJoin [c_customer_sk#9], [ss_customer_sk#10], Inner, BuildRight + : : :- *(7) Project [ca_state#4, c_customer_sk#9] + : : : +- *(7) BroadcastHashJoin [ca_address_sk#11], [c_current_addr_sk#12], Inner, BuildRight + : : : :- *(7) Project [ca_address_sk#11, ca_state#4] + : : : : +- *(7) Filter isnotnull(ca_address_sk#11) + : : : : +- *(7) FileScan parquet default.customer_address[ca_address_sk#11,ca_state#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : +- *(1) Project [c_customer_sk#9, c_current_addr_sk#12] + : : : +- *(1) Filter (isnotnull(c_current_addr_sk#12) && isnotnull(c_customer_sk#9)) + : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) + : : +- *(2) Project [ss_sold_date_sk#7, ss_item_sk#5, ss_customer_sk#10] + : : +- *(2) Filter ((isnotnull(ss_customer_sk#10) && isnotnull(ss_sold_date_sk#7)) && isnotnull(ss_item_sk#5)) + : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#5,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [d_date_sk#8] + : +- *(3) Filter ((isnotnull(d_month_seq#13) && (d_month_seq#13 = Subquery subquery982)) && isnotnull(d_date_sk#8)) + : : +- Subquery subquery982 + : : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) + : : +- Exchange hashpartitioning(d_month_seq#13, 200) + : : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) + : : +- *(1) Project [d_month_seq#13] + : : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) + : : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct + : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : +- Subquery subquery982 + : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) + : +- Exchange hashpartitioning(d_month_seq#13, 200) + : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) + : +- *(1) Project [d_month_seq#13] + : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) + : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(6) Project [i_item_sk#6] + +- *(6) Filter (cast(i_current_price#16 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#17)), DecimalType(14,7))) + +- *(6) BroadcastHashJoin [i_category#18], [i_category#18#19], LeftOuter, BuildRight + :- *(6) Project [i_item_sk#6, i_current_price#16, i_category#18] + : +- *(6) Filter (isnotnull(i_current_price#16) && isnotnull(i_item_sk#6)) + : +- *(6) FileScan parquet default.item[i_item_sk#6,i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) + +- *(5) HashAggregate(keys=[i_category#18], functions=[avg(UnscaledValue(i_current_price#16))]) + +- Exchange hashpartitioning(i_category#18, 200) + +- *(4) HashAggregate(keys=[i_category#18], functions=[partial_avg(UnscaledValue(i_current_price#16))]) + +- *(4) Project [i_current_price#16, i_category#18] + +- *(4) Filter isnotnull(i_category#18) + +- *(4) FileScan parquet default.item[i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt new file mode 100644 index 000000000..59ea8c0d9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt @@ -0,0 +1,78 @@ +TakeOrderedAndProject [cnt,state] + WholeStageCodegen + Project [state,cnt] + Filter [count(1)] + HashAggregate [ca_state,count,count(1)] [state,cnt,count(1),count,count(1)] + InputAdapter + Exchange [ca_state] #1 + WholeStageCodegen + HashAggregate [ca_state,count,count] [count,count] + Project [ca_state] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ca_state,ss_item_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ca_state,ss_sold_date_sk,ss_item_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ca_state,c_customer_sk] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Project [ca_address_sk,ca_state] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + Filter [ss_customer_sk,ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Subquery #1 + WholeStageCodegen + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #5 + WholeStageCodegen + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #5 + WholeStageCodegen + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_year,d_moy] + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_sk] + Filter [i_current_price,avg(i_current_price)] + BroadcastHashJoin [i_category,i_category] + Project [i_item_sk,i_current_price,i_category] + Filter [i_current_price,i_item_sk] + Scan parquet default.item [i_item_sk,i_current_price,i_category] [i_item_sk,i_current_price,i_category] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + HashAggregate [i_category,sum,count,avg(UnscaledValue(i_current_price))] [avg(i_current_price),sum,i_category,count,avg(UnscaledValue(i_current_price))] + InputAdapter + Exchange [i_category] #8 + WholeStageCodegen + HashAggregate [sum,sum,count,i_category,i_current_price,count] [sum,count,sum,count] + Project [i_current_price,i_category] + Filter [i_category] + Scan parquet default.item [i_current_price,i_category] [i_current_price,i_category] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt new file mode 100644 index 000000000..c5587f4e4 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt @@ -0,0 +1,65 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,total_sales#2 ASC NULLS FIRST], output=[i_item_id#1,total_sales#2]) ++- *(20) HashAggregate(keys=[i_item_id#1], functions=[sum(total_sales#3)]) + +- Exchange hashpartitioning(i_item_id#1, 200) + +- *(19) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(total_sales#3)]) + +- Union + :- *(6) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange hashpartitioning(i_item_id#1, 200) + : +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(5) Project [ss_ext_sales_price#4, i_item_id#1] + : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#10] + : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 9)) && isnotnull(d_date_sk#10)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [ca_address_sk#8] + : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) BroadcastHashJoin [i_item_id#1], [i_item_id#1#14], LeftSemi, BuildRight + : :- *(4) Project [i_item_sk#6, i_item_id#1] + : : +- *(4) Filter isnotnull(i_item_sk#6) + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(3) Project [i_item_id#1 AS i_item_id#1#14] + : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Music)) + : +- *(3) FileScan parquet default.item[i_item_id#1,i_category#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)], ReadSchema: struct + :- *(12) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- Exchange hashpartitioning(i_item_id#1, 200) + : +- *(11) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- *(11) Project [cs_ext_sales_price#16, i_item_id#1] + : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight + : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] + : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight + : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(18) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) + +- Exchange hashpartitioning(i_item_id#1, 200) + +- *(17) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) + +- *(17) Project [ws_ext_sales_price#20, i_item_id#1] + +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight + :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] + : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight + : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight + : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt new file mode 100644 index 000000000..e37083df9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt @@ -0,0 +1,91 @@ +TakeOrderedAndProject [i_item_id,total_sales] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(total_sales)] [sum(total_sales),total_sales,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,total_sales,sum,sum] [sum,sum] + InputAdapter + Union + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #2 + WholeStageCodegen + HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + BroadcastHashJoin [i_item_id,i_item_id] + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_id] + Filter [i_category] + Scan parquet default.item [i_item_id,i_category] [i_item_id,i_category] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen + HashAggregate [i_item_id,cs_ext_sales_price,sum,sum] [sum,sum] + Project [cs_ext_sales_price,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + InputAdapter + Exchange [i_item_id] #8 + WholeStageCodegen + HashAggregate [i_item_id,ws_ext_sales_price,sum,sum] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + ReusedExchange [ca_address_sk] [ca_address_sk] #4 + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt new file mode 100644 index 000000000..49bd37330 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt @@ -0,0 +1,68 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[promotions#1 ASC NULLS FIRST,total#2 ASC NULLS FIRST], output=[promotions#1,total#2,(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#3]) ++- *(16) Project [promotions#1, total#2, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#1 as decimal(15,4))) / promote_precision(cast(total#2 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#3] + +- BroadcastNestedLoopJoin BuildRight, Inner + :- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(7) Project [ss_ext_sales_price#4] + : +- *(7) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(7) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(7) BroadcastHashJoin [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(7) Project [ss_item_sk#5, ss_ext_sales_price#4, c_current_addr_sk#7] + : : : +- *(7) BroadcastHashJoin [ss_customer_sk#9], [c_customer_sk#10], Inner, BuildRight + : : : :- *(7) Project [ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : : : +- *(7) BroadcastHashJoin [ss_promo_sk#13], [p_promo_sk#14], Inner, BuildRight + : : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_promo_sk#13, ss_ext_sales_price#4] + : : : : : : +- *(7) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : : : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_promo_sk#13, ss_ext_sales_price#4] + : : : : : : : +- *(7) Filter ((((isnotnull(ss_store_sk#15) && isnotnull(ss_promo_sk#13)) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) + : : : : : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_promo_sk#13,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [p_promo_sk#14] + : : : : : +- *(2) Filter ((((p_channel_dmail#18 = Y) || (p_channel_email#19 = Y)) || (p_channel_tv#20 = Y)) && isnotnull(p_promo_sk#14)) + : : : : : +- *(2) FileScan parquet default.promotion[p_promo_sk#14,p_channel_dmail#18,p_channel_email#19,p_channel_tv#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#12] + : : : : +- *(3) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#22)) && (d_year#21 = 1998)) && (d_moy#22 = 11)) && isnotnull(d_date_sk#12)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_year#21,d_moy#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [c_customer_sk#10, c_current_addr_sk#7] + : : : +- *(4) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#7)) + : : : +- *(4) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [ca_address_sk#8] + : : +- *(5) Filter ((isnotnull(ca_gmt_offset#23) && (ca_gmt_offset#23 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [i_item_sk#6] + : +- *(6) Filter ((isnotnull(i_category#24) && (i_category#24 = Jewelry)) && isnotnull(i_item_sk#6)) + : +- *(6) FileScan parquet default.item[i_item_sk#6,i_category#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange IdentityBroadcastMode + +- *(15) HashAggregate(keys=[], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + +- Exchange SinglePartition + +- *(14) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + +- *(14) Project [ss_ext_sales_price#4] + +- *(14) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + :- *(14) Project [ss_item_sk#5, ss_ext_sales_price#4] + : +- *(14) BroadcastHashJoin [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : :- *(14) Project [ss_item_sk#5, ss_ext_sales_price#4, c_current_addr_sk#7] + : : +- *(14) BroadcastHashJoin [ss_customer_sk#9], [c_customer_sk#10], Inner, BuildRight + : : :- *(14) Project [ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : +- *(14) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : : :- *(14) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : : +- *(14) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : : : : :- *(14) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_ext_sales_price#4] + : : : : : +- *(14) Filter (((isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) + : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item..., ReadSchema: struct120 days #8]) ++- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3, 200) + +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, w_warehouse_name#9, sm_type#2, web_name#3] + +- *(5) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight + :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, w_warehouse_name#9, sm_type#2, web_name#3] + : +- *(5) BroadcastHashJoin [ws_web_site_sk#14], [web_site_sk#15], Inner, BuildRight + : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, w_warehouse_name#9, sm_type#2] + : : +- *(5) BroadcastHashJoin [ws_ship_mode_sk#16], [sm_ship_mode_sk#17], Inner, BuildRight + : : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, ws_ship_mode_sk#16, w_warehouse_name#9] + : : : +- *(5) BroadcastHashJoin [ws_warehouse_sk#18], [w_warehouse_sk#19], Inner, BuildRight + : : : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, ws_ship_mode_sk#16, ws_warehouse_sk#18] + : : : : +- *(5) Filter (((isnotnull(ws_warehouse_sk#18) && isnotnull(ws_ship_mode_sk#16)) && isnotnull(ws_web_site_sk#14)) && isnotnull(ws_ship_date_sk#11)) + : : : : +- *(5) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_ship_date_sk#11,ws_web_site_sk#14,ws_ship_mode_sk#16,ws_warehouse_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] + : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) + : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [web_site_sk#15, web_name#3] + : +- *(3) Filter isnotnull(web_site_sk#15) + : +- *(3) FileScan parquet default.web_site[web_site_sk#15,web_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#13] + +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt new file mode 100644 index 000000000..a6a12999a --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [web_name,substring(w_warehouse_name, 1, 20),91 - 120 days ,31 - 60 days ,>120 days ,30 days ,sm_type,61 - 90 days ] + WholeStageCodegen + HashAggregate [web_name,sum,sum,sum,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum,sm_type,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint))] [substring(w_warehouse_name, 1, 20),sum,sum,91 - 120 days ,31 - 60 days ,>120 days ,sum,30 days ,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint))] + InputAdapter + Exchange [substring(w_warehouse_name, 1, 20),sm_type,web_name] #1 + WholeStageCodegen + HashAggregate [web_name,sum,sum,ws_sold_date_sk,w_warehouse_name,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sm_type,sum,sum,ws_ship_date_sk] [sum,sum,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum] + Project [web_name,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + Project [web_name,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] + BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] + Project [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,w_warehouse_name] + BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] + Project [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] + Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk] + Scan parquet default.web_sales [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sm_ship_mode_sk,sm_type] + Filter [sm_ship_mode_sk] + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [web_site_sk,web_name] + Filter [web_site_sk] + Scan parquet default.web_site [web_site_sk,web_name] [web_site_sk,web_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt new file mode 100644 index 000000000..c1e90579f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt @@ -0,0 +1,31 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[i_manager_id#1 ASC NULLS FIRST,avg_monthly_sales#2 ASC NULLS FIRST,sum_sales#3 ASC NULLS FIRST], output=[i_manager_id#1,sum_sales#3,avg_monthly_sales#2]) ++- *(7) Project [i_manager_id#1, sum_sales#3, avg_monthly_sales#2] + +- *(7) Filter (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#3 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) + +- Window [avg(_w0#4) windowspecdefinition(i_manager_id#1, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_manager_id#1] + +- *(6) Sort [i_manager_id#1 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_manager_id#1, 200) + +- *(5) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) + +- Exchange hashpartitioning(i_manager_id#1, d_moy#5, 200) + +- *(4) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) + +- *(4) Project [i_manager_id#1, ss_sales_price#6, d_moy#5] + +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight + :- *(4) Project [i_manager_id#1, ss_store_sk#7, ss_sales_price#6, d_moy#5] + : +- *(4) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : :- *(4) Project [i_manager_id#1, ss_sold_date_sk#9, ss_store_sk#7, ss_sales_price#6] + : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight + : : :- *(4) Project [i_item_sk#11, i_manager_id#1] + : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,refernece,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manager_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,refernece..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] + : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#10, d_moy#5] + : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_moy#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#8] + +- *(3) Filter isnotnull(s_store_sk#8) + +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt new file mode 100644 index 000000000..941d3fe37 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] + WholeStageCodegen + Project [i_manager_id,sum_sales,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [_w0,i_manager_id] + WholeStageCodegen + Sort [i_manager_id] + InputAdapter + Exchange [i_manager_id] #1 + WholeStageCodegen + HashAggregate [i_manager_id,d_moy,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_manager_id,d_moy] #2 + WholeStageCodegen + HashAggregate [d_moy,sum,ss_sales_price,i_manager_id,sum] [sum,sum] + Project [i_manager_id,ss_sales_price,d_moy] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_manager_id,ss_store_sk,ss_sales_price,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_manager_id,ss_sold_date_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_manager_id] + Filter [i_category,i_class,i_brand,i_item_sk] + Scan parquet default.item [i_class,i_item_sk,i_manager_id,i_category,i_brand] [i_class,i_item_sk,i_manager_id,i_category,i_brand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] [d_date_sk,d_month_seq,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt new file mode 100644 index 000000000..fde217dca --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt @@ -0,0 +1,170 @@ +== Physical Plan == +*(43) Sort [product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST, 200) + +- *(42) Project [product_name#1, store_name#2, store_zip#4, b_street_number#5, b_streen_name#6, b_city#7, b_zip#8, c_street_number#9, c_street_name#10, c_city#11, c_zip#12, syear#13, cnt#14, s1#15, s2#16, s3#17, s1#18, s2#19, s3#20, syear#21, cnt#3] + +- *(42) BroadcastHashJoin [item_sk#22, store_name#2, store_zip#4], [item_sk#23, store_name#24, store_zip#25], Inner, BuildRight, (cnt#3 <= cnt#14) + :- *(42) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) + : +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 200) + : +- *(20) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) + : +- *(20) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] + : +- *(20) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight + : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : +- *(20) BroadcastHashJoin [hd_income_band_sk#45], [ib_income_band_sk#46], Inner, BuildRight + : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : : +- *(20) BroadcastHashJoin [hd_income_band_sk#47], [ib_income_band_sk#48], Inner, BuildRight + : : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : : : +- *(20) BroadcastHashJoin [c_current_addr_sk#49], [ca_address_sk#50], Inner, BuildRight + : : : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] + : : : : : +- *(20) BroadcastHashJoin [ss_addr_sk#51], [ca_address_sk#52], Inner, BuildRight + : : : : : :- *(20) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45] + : : : : : : +- *(20) BroadcastHashJoin [c_current_hdemo_sk#53], [hd_demo_sk#54], Inner, BuildRight + : : : : : : :- *(20) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47] + : : : : : : : +- *(20) BroadcastHashJoin [ss_hdemo_sk#55], [hd_demo_sk#56], Inner, BuildRight + : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : +- *(20) BroadcastHashJoin [ss_promo_sk#57], [p_promo_sk#58], Inner, BuildRight + : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : : +- *(20) BroadcastHashJoin [c_current_cdemo_sk#59], [cd_demo_sk#60], Inner, BuildRight, NOT (cd_marital_status#61 = cd_marital_status#62) + : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, cd_marital_status#61] + : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_cdemo_sk#63], [cd_demo_sk#64], Inner, BuildRight + : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : : : : +- *(20) BroadcastHashJoin [c_first_shipto_date_sk#65], [d_date_sk#66], Inner, BuildRight + : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, d_year#39] + : : : : : : : : : : : : +- *(20) BroadcastHashJoin [c_first_sales_date_sk#67], [d_date_sk#68], Inner, BuildRight + : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] + : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_customer_sk#69], [c_customer_sk#70], Inner, BuildRight + : : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29] + : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_store_sk#71], [s_store_sk#72], Inner, BuildRight + : : : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38] + : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_sold_date_sk#73], [d_date_sk#74], Inner, BuildRight + : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_item_sk#44], [cs_item_sk#75], Inner, BuildRight + : : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight + : : : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : : : +- *(20) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) + : : : : : : : : : : : : : : : : : : +- *(20) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct + : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : : : : : +- *(4) Project [cs_item_sk#75] + : : : : : : : : : : : : : : : : +- *(4) Filter (isnotnull(sum(cs_ext_list_price#79)#80) && (cast(sum(cs_ext_list_price#79)#80 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))#84)), DecimalType(21,2)))) + : : : : : : : : : : : : : : : : +- *(4) HashAggregate(keys=[cs_item_sk#75], functions=[sum(UnscaledValue(cs_ext_list_price#79)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) + : : : : : : : : : : : : : : : : +- Exchange hashpartitioning(cs_item_sk#75, 200) + : : : : : : : : : : : : : : : : +- *(3) HashAggregate(keys=[cs_item_sk#75], functions=[partial_sum(UnscaledValue(cs_ext_list_price#79)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) + : : : : : : : : : : : : : : : : +- *(3) Project [cs_item_sk#75, cs_ext_list_price#79, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] + : : : : : : : : : : : : : : : : +- *(3) BroadcastHashJoin [cs_item_sk#75, cs_order_number#85], [cr_item_sk#86, cr_order_number#87], Inner, BuildRight + : : : : : : : : : : : : : : : : :- *(3) Project [cs_item_sk#75, cs_order_number#85, cs_ext_list_price#79] + : : : : : : : : : : : : : : : : : +- *(3) Filter (isnotnull(cs_item_sk#75) && isnotnull(cs_order_number#85)) + : : : : : : : : : : : : : : : : : +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#75,cs_order_number#85,cs_ext_list_price#79] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)], ReadSchema: struct + : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) + : : : : : : : : : : : : : : : : +- *(2) Project [cr_item_sk#86, cr_order_number#87, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] + : : : : : : : : : : : : : : : : +- *(2) Filter (isnotnull(cr_item_sk#86) && isnotnull(cr_order_number#87)) + : : : : : : : : : : : : : : : : +- *(2) FileScan parquet default.catalog_returns[cr_item_sk#86,cr_order_number#87,cr_refunded_cash#81,cr_reversed_charge#82,cr_store_credit#83] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct + : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : : : +- *(6) Project [s_store_sk#72, s_store_name#28, s_zip#29] + : : : : : : : : : : : : : : +- *(6) Filter ((isnotnull(s_store_sk#72) && isnotnull(s_zip#29)) && isnotnull(s_store_name#28)) + : : : : : : : : : : : : : : +- *(6) FileScan parquet default.store[s_store_sk#72,s_store_name#28,s_zip#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip), IsNotNull(s_store_name)], ReadSchema: struct + : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : : +- *(7) Project [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] + : : : : : : : : : : : : : +- *(7) Filter (((((isnotnull(c_customer_sk#70) && isnotnull(c_first_sales_date_sk#67)) && isnotnull(c_first_shipto_date_sk#65)) && isnotnull(c_current_cdemo_sk#59)) && isnotnull(c_current_hdemo_sk#53)) && isnotnull(c_current_addr_sk#49)) + : : : : : : : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#70,c_current_cdemo_sk#59,c_current_hdemo_sk#53,c_current_addr_sk#49,c_first_shipto_date_sk#65,c_first_sales_date_sk#67] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), I..., ReadSchema: struct + : : : : : : : : : : : +- ReusedExchange [d_date_sk#66, d_year#40], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : +- *(10) Project [cd_demo_sk#64, cd_marital_status#61] + : : : : : : : : : : +- *(10) Filter (isnotnull(cd_demo_sk#64) && isnotnull(cd_marital_status#61)) + : : : : : : : : : : +- *(10) FileScan parquet default.customer_demographics[cd_demo_sk#64,cd_marital_status#61] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)], ReadSchema: struct + : : : : : : : : : +- ReusedExchange [cd_demo_sk#60, cd_marital_status#62], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- *(12) Project [p_promo_sk#58] + : : : : : : : : +- *(12) Filter isnotnull(p_promo_sk#58) + : : : : : : : : +- *(12) FileScan parquet default.promotion[p_promo_sk#58] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct + : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : +- *(13) Project [hd_demo_sk#56, hd_income_band_sk#47] + : : : : : : : +- *(13) Filter (isnotnull(hd_demo_sk#56) && isnotnull(hd_income_band_sk#47)) + : : : : : : : +- *(13) FileScan parquet default.household_demographics[hd_demo_sk#56,hd_income_band_sk#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct + : : : : : : +- ReusedExchange [hd_demo_sk#54, hd_income_band_sk#45], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(15) Project [ca_address_sk#52, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] + : : : : : +- *(15) Filter isnotnull(ca_address_sk#52) + : : : : : +- *(15) FileScan parquet default.customer_address[ca_address_sk#52,ca_street_number#30,ca_street_name#31,ca_city#32,ca_zip#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : : : +- ReusedExchange [ca_address_sk#50, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(17) Project [ib_income_band_sk#48] + : : : +- *(17) Filter isnotnull(ib_income_band_sk#48) + : : : +- *(17) FileScan parquet default.income_band[ib_income_band_sk#48] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_income_band_sk)], ReadSchema: struct + : : +- ReusedExchange [ib_income_band_sk#46], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(19) Project [i_item_sk#27, i_product_name#26] + : +- *(19) Filter ((((((isnotnull(i_current_price#88) && i_color#89 IN (purple,burlywood,indian,spring,floral,medium)) && (i_current_price#88 >= 64.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 74.00)) && (cast(i_current_price#88 as decimal(12,2)) >= 65.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 79.00)) && isnotnull(i_item_sk#27)) + : +- *(19) FileScan parquet default.item[i_item_sk#27,i_current_price#88,i_color#89,i_product_name#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), Greater..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, string, true], input[2, string, true])) + +- *(41) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) + +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 200) + +- *(40) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) + +- *(40) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] + +- *(40) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight + :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : +- *(40) BroadcastHashJoin [hd_income_band_sk#45], [ib_income_band_sk#46], Inner, BuildRight + : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : +- *(40) BroadcastHashJoin [hd_income_band_sk#47], [ib_income_band_sk#48], Inner, BuildRight + : : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : : +- *(40) BroadcastHashJoin [c_current_addr_sk#49], [ca_address_sk#50], Inner, BuildRight + : : : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] + : : : : +- *(40) BroadcastHashJoin [ss_addr_sk#51], [ca_address_sk#52], Inner, BuildRight + : : : : :- *(40) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45] + : : : : : +- *(40) BroadcastHashJoin [c_current_hdemo_sk#53], [hd_demo_sk#54], Inner, BuildRight + : : : : : :- *(40) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47] + : : : : : : +- *(40) BroadcastHashJoin [ss_hdemo_sk#55], [hd_demo_sk#56], Inner, BuildRight + : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : +- *(40) BroadcastHashJoin [ss_promo_sk#57], [p_promo_sk#58], Inner, BuildRight + : : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : +- *(40) BroadcastHashJoin [c_current_cdemo_sk#59], [cd_demo_sk#60], Inner, BuildRight, NOT (cd_marital_status#61 = cd_marital_status#62) + : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, cd_marital_status#61] + : : : : : : : : : +- *(40) BroadcastHashJoin [ss_cdemo_sk#63], [cd_demo_sk#64], Inner, BuildRight + : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : : : +- *(40) BroadcastHashJoin [c_first_shipto_date_sk#65], [d_date_sk#66], Inner, BuildRight + : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, d_year#39] + : : : : : : : : : : : +- *(40) BroadcastHashJoin [c_first_sales_date_sk#67], [d_date_sk#68], Inner, BuildRight + : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] + : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_customer_sk#69], [c_customer_sk#70], Inner, BuildRight + : : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29] + : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_store_sk#71], [s_store_sk#72], Inner, BuildRight + : : : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38] + : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_sold_date_sk#73], [d_date_sk#74], Inner, BuildRight + : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_item_sk#44], [cs_item_sk#75], Inner, BuildRight + : : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight + : : : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : : +- *(40) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) + : : : : : : : : : : : : : : : : : +- *(40) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct + : : : : : : : : : : : : : +- ReusedExchange [s_store_sk#72, s_store_name#28, s_zip#29], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : +- ReusedExchange [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : +- ReusedExchange [d_date_sk#68, d_year#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : +- ReusedExchange [d_date_sk#66, d_year#40], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : +- ReusedExchange [cd_demo_sk#64, cd_marital_status#61], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- ReusedExchange [cd_demo_sk#60, cd_marital_status#62], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : +- ReusedExchange [p_promo_sk#58], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- ReusedExchange [hd_demo_sk#56, hd_income_band_sk#47], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- ReusedExchange [hd_demo_sk#54, hd_income_band_sk#45], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- ReusedExchange [ca_address_sk#52, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- ReusedExchange [ca_address_sk#50, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ib_income_band_sk#48], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ib_income_band_sk#46], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#27, i_product_name#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt new file mode 100644 index 000000000..7336416f6 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt @@ -0,0 +1,229 @@ +WholeStageCodegen + Sort [product_name,store_name,cnt] + InputAdapter + Exchange [product_name,store_name,cnt] #1 + WholeStageCodegen + Project [cnt,b_streen_name,product_name,b_city,b_zip,syear,c_street_name,store_zip,c_street_number,s1,s3,c_zip,s3,s1,b_street_number,s2,store_name,cnt,s2,syear,c_city] + BroadcastHashJoin [cnt,store_zip,store_name,store_zip,store_name,item_sk,cnt,item_sk] + HashAggregate [d_year,ca_street_name,count,sum(UnscaledValue(ss_coupon_amt)),sum,ca_street_number,ca_street_name,sum(UnscaledValue(ss_list_price)),ca_zip,count(1),d_year,ca_city,sum,i_item_sk,sum,ca_street_number,d_year,ca_city,s_store_name,s_zip,sum(UnscaledValue(ss_wholesale_cost)),ca_zip,i_product_name] [c_city,b_streen_name,count,b_zip,sum(UnscaledValue(ss_coupon_amt)),sum,b_city,c_street_name,s3,sum(UnscaledValue(ss_list_price)),cnt,b_street_number,c_street_number,item_sk,count(1),store_zip,s2,product_name,sum,sum,store_name,c_zip,sum(UnscaledValue(ss_wholesale_cost)),s1,syear] + InputAdapter + Exchange [d_year,ca_street_name,ca_street_number,ca_street_name,ca_zip,d_year,ca_city,i_item_sk,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] #2 + WholeStageCodegen + HashAggregate [d_year,ca_street_name,count,sum,sum,ca_street_number,ss_wholesale_cost,ca_street_name,sum,count,ca_zip,ss_coupon_amt,d_year,sum,ca_city,ss_list_price,sum,i_item_sk,sum,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] [count,sum,sum,sum,count,sum,sum,sum] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,d_year,ca_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_street_name,ca_city,i_product_name,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,hd_income_band_sk,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,hd_income_band_sk,d_year,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,hd_income_band_sk,d_year,s_zip,hd_income_band_sk,d_year,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,hd_income_band_sk,d_year,s_zip,d_year,ss_wholesale_cost] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,cd_marital_status,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,c_first_sales_date_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ss_customer_sk,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,d_year,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_item_sk,cs_item_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_ticket_number,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [cs_item_sk] + Filter [sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2)))] + HashAggregate [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),cs_item_sk,sum,sum(UnscaledValue(cs_ext_list_price)),sum] [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum,sum(UnscaledValue(cs_ext_list_price)),sum] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen + HashAggregate [sum,sum,cs_item_sk,sum,cr_refunded_cash,cr_reversed_charge,cr_store_credit,sum,cs_ext_list_price] [sum,sum,sum,sum] + Project [cr_store_credit,cs_item_sk,cr_refunded_cash,cs_ext_list_price,cr_reversed_charge] + BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + Project [cs_item_sk,cs_order_number,cs_ext_list_price] + Filter [cs_item_sk,cs_order_number] + Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price] [cs_item_sk,cs_order_number,cs_ext_list_price] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] + Filter [cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_zip] + Filter [s_store_sk,s_zip,s_store_name] + Scan parquet default.store [s_store_sk,s_store_name,s_zip] [s_store_sk,s_store_name,s_zip] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] + Filter [c_current_cdemo_sk,c_customer_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_addr_sk] + Scan parquet default.customer [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status] + Filter [cd_demo_sk,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 + InputAdapter + BroadcastExchange #12 + WholeStageCodegen + Project [p_promo_sk] + Filter [p_promo_sk] + Scan parquet default.promotion [p_promo_sk] [p_promo_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [hd_demo_sk,hd_income_band_sk] + Filter [hd_demo_sk,hd_income_band_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + Project [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] + InputAdapter + ReusedExchange [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] #14 + InputAdapter + BroadcastExchange #15 + WholeStageCodegen + Project [ib_income_band_sk] + Filter [ib_income_band_sk] + Scan parquet default.income_band [ib_income_band_sk] [ib_income_band_sk] + InputAdapter + ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen + Project [i_item_sk,i_product_name] + Filter [i_current_price,i_color,i_item_sk] + Scan parquet default.item [i_item_sk,i_current_price,i_color,i_product_name] [i_item_sk,i_current_price,i_color,i_product_name] + InputAdapter + BroadcastExchange #17 + WholeStageCodegen + HashAggregate [d_year,ca_street_name,count,sum(UnscaledValue(ss_coupon_amt)),ca_street_number,sum,ca_street_name,sum(UnscaledValue(ss_list_price)),ca_zip,count(1),d_year,ca_city,sum,i_item_sk,ca_street_number,sum,d_year,ca_city,s_store_name,s_zip,sum(UnscaledValue(ss_wholesale_cost)),ca_zip,i_product_name] [count,sum(UnscaledValue(ss_coupon_amt)),syear,s3,sum,sum(UnscaledValue(ss_list_price)),count(1),store_name,cnt,sum,s2,item_sk,s1,sum,store_zip,sum(UnscaledValue(ss_wholesale_cost))] + InputAdapter + Exchange [d_year,ca_street_name,ca_street_number,ca_street_name,ca_zip,d_year,ca_city,i_item_sk,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] #18 + WholeStageCodegen + HashAggregate [d_year,ca_street_name,count,ca_street_number,ss_wholesale_cost,sum,sum,ca_street_name,count,ca_zip,ss_coupon_amt,d_year,ca_city,ss_list_price,sum,sum,i_item_sk,sum,ca_street_number,sum,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] [count,sum,sum,count,sum,sum,sum,sum] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,d_year,ca_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_street_name,ca_city,i_product_name,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,hd_income_band_sk,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,hd_income_band_sk,d_year,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,hd_income_band_sk,d_year,s_zip,hd_income_band_sk,d_year,ss_wholesale_cost] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,hd_income_band_sk,d_year,s_zip,d_year,ss_wholesale_cost] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,cd_marital_status,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,c_first_sales_date_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ss_customer_sk,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,d_year,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_item_sk,cs_item_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_ticket_number,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + InputAdapter + ReusedExchange [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] #3 + InputAdapter + ReusedExchange [cs_item_sk] [cs_item_sk] #4 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + ReusedExchange [s_store_sk,s_store_name,s_zip] [s_store_sk,s_store_name,s_zip] #8 + InputAdapter + ReusedExchange [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 + InputAdapter + ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 + InputAdapter + ReusedExchange [p_promo_sk] [p_promo_sk] #12 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 + InputAdapter + ReusedExchange [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] #14 + InputAdapter + ReusedExchange [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] #14 + InputAdapter + ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 + InputAdapter + ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 + InputAdapter + ReusedExchange [i_item_sk,i_product_name] [i_item_sk,i_product_name] #16 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt new file mode 100644 index 000000000..b06bc7ded --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt @@ -0,0 +1,42 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST], output=[s_store_name#1,i_item_desc#2,revenue#3,i_current_price#4,i_wholesale_cost#5,i_brand#6]) ++- *(9) Project [s_store_name#1, i_item_desc#2, revenue#3, i_current_price#4, i_wholesale_cost#5, i_brand#6] + +- *(9) BroadcastHashJoin [ss_store_sk#7], [ss_store_sk#8], Inner, BuildRight, (cast(revenue#3 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#9)), DecimalType(23,7))) + :- *(9) Project [s_store_name#1, ss_store_sk#7, revenue#3, i_item_desc#2, i_current_price#4, i_wholesale_cost#5, i_brand#6] + : +- *(9) BroadcastHashJoin [ss_item_sk#10], [i_item_sk#11], Inner, BuildRight + : :- *(9) Project [s_store_name#1, ss_store_sk#7, ss_item_sk#10, revenue#3] + : : +- *(9) BroadcastHashJoin [s_store_sk#12], [ss_store_sk#7], Inner, BuildRight + : : :- *(9) Project [s_store_sk#12, s_store_name#1] + : : : +- *(9) Filter isnotnull(s_store_sk#12) + : : : +- *(9) FileScan parquet default.store[s_store_sk#12,s_store_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Filter isnotnull(revenue#3) + : : +- *(3) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[sum(UnscaledValue(ss_sales_price#13))]) + : : +- Exchange hashpartitioning(ss_store_sk#7, ss_item_sk#10, 200) + : : +- *(2) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[partial_sum(UnscaledValue(ss_sales_price#13))]) + : : +- *(2) Project [ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] + : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + : : :- *(2) Project [ss_sold_date_sk#14, ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] + : : : +- *(2) Filter ((isnotnull(ss_sold_date_sk#14) && isnotnull(ss_store_sk#7)) && isnotnull(ss_item_sk#10)) + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#14,ss_item_sk#10,ss_store_sk#7,ss_sales_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#15] + : : +- *(1) Filter (((isnotnull(d_month_seq#16) && (d_month_seq#16 >= 1176)) && (d_month_seq#16 <= 1187)) && isnotnull(d_date_sk#15)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#15,d_month_seq#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187),..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [i_item_sk#11, i_item_desc#2, i_current_price#4, i_wholesale_cost#5, i_brand#6] + : +- *(4) Filter isnotnull(i_item_sk#11) + : +- *(4) FileScan parquet default.item[i_item_sk#11,i_item_desc#2,i_current_price#4,i_wholesale_cost#5,i_brand#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt new file mode 100644 index 000000000..a83319f19 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt @@ -0,0 +1,57 @@ +TakeOrderedAndProject [i_item_desc,i_brand,revenue,i_wholesale_cost,i_current_price,s_store_name] + WholeStageCodegen + Project [i_item_desc,i_brand,revenue,i_wholesale_cost,i_current_price,s_store_name] + BroadcastHashJoin [ss_store_sk,ss_store_sk,revenue,ave] + Project [s_store_name,i_current_price,ss_store_sk,revenue,i_brand,i_wholesale_cost,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_store_sk,ss_item_sk,revenue] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [s_store_sk,s_store_name] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_name] [s_store_sk,s_store_name] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen + Filter [revenue] + HashAggregate [ss_store_sk,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + InputAdapter + Exchange [ss_store_sk,ss_item_sk] #2 + WholeStageCodegen + HashAggregate [ss_item_sk,sum,sum,ss_store_sk,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] + Filter [i_item_sk] + Scan parquet default.item [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [ss_store_sk,sum,count,avg(revenue)] [avg(revenue),ave,sum,count] + InputAdapter + Exchange [ss_store_sk] #6 + WholeStageCodegen + HashAggregate [sum,sum,count,revenue,ss_store_sk,count] [sum,count,sum,count] + HashAggregate [ss_store_sk,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + InputAdapter + Exchange [ss_store_sk,ss_item_sk] #7 + WholeStageCodegen + HashAggregate [sum,ss_store_sk,sum,ss_sales_price,ss_item_sk] [sum,sum] + Project [ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt new file mode 100644 index 000000000..14d9e4ec9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt @@ -0,0 +1,54 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST], output=[w_warehouse_name#1,w_warehouse_sq_ft#2,w_city#3,w_county#4,w_state#5,w_country#6,ship_carriers#7,year#8,jan_sales#9,feb_sales#10,mar_sales#11,apr_sales#12,may_sales#13,jun_sales#14,jul_sales#15,aug_sales#16,sep_sales#17,oct_sales#18,nov_sales#19,dec_sales#20,jan_sales_per_sq_foot#21,feb_sales_per_sq_foot#22,mar_sales_per_sq_foot#23,apr_sales_per_sq_foot#24,... 20 more fields]) ++- *(14) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8], functions=[sum(jan_sales#25), sum(feb_sales#26), sum(mar_sales#27), sum(apr_sales#28), sum(may_sales#29), sum(jun_sales#30), sum(jul_sales#31), sum(aug_sales#32), sum(sep_sales#33), sum(oct_sales#34), sum(nov_sales#35), sum(dec_sales#36), sum(CheckOverflow((promote_precision(jan_sales#25) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#26) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#27) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#28) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#29) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#30) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#31) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#32) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#33) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#34) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#35) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#36) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), ... 12 more fields]) + +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8, 200) + +- *(13) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8], functions=[partial_sum(jan_sales#25), partial_sum(feb_sales#26), partial_sum(mar_sales#27), partial_sum(apr_sales#28), partial_sum(may_sales#29), partial_sum(jun_sales#30), partial_sum(jul_sales#31), partial_sum(aug_sales#32), partial_sum(sep_sales#33), partial_sum(oct_sales#34), partial_sum(nov_sales#35), partial_sum(dec_sales#36), partial_sum(CheckOverflow((promote_precision(jan_sales#25) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#26) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#27) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#28) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#29) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#30) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#31) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#32) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#33) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#34) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#35) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#36) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), ... 12 more fields]) + +- Union + :- *(6) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + : +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 200) + : +- *(5) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + : +- *(5) Project [ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : +- *(5) BroadcastHashJoin [ws_ship_mode_sk#42], [sm_ship_mode_sk#43], Inner, BuildRight + : :- *(5) Project [ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : : +- *(5) BroadcastHashJoin [ws_sold_time_sk#44], [t_time_sk#45], Inner, BuildRight + : : :- *(5) Project [ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : : : +- *(5) BroadcastHashJoin [ws_sold_date_sk#46], [d_date_sk#47], Inner, BuildRight + : : : :- *(5) Project [ws_sold_date_sk#46, ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6] + : : : : +- *(5) BroadcastHashJoin [ws_warehouse_sk#48], [w_warehouse_sk#49], Inner, BuildRight + : : : : :- *(5) Project [ws_sold_date_sk#46, ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_warehouse_sk#48, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41] + : : : : : +- *(5) Filter (((isnotnull(ws_warehouse_sk#48) && isnotnull(ws_sold_date_sk#46)) && isnotnull(ws_sold_time_sk#44)) && isnotnull(ws_ship_mode_sk#42)) + : : : : : +- *(5) FileScan parquet default.web_sales[ws_sold_date_sk#46,ws_sold_time_sk#44,ws_ship_mode_sk#42,ws_warehouse_sk#48,ws_quantity#40,ws_ext_sales_price#39,ws_net_paid#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [t_time_sk#45] + : : +- *(3) Filter (((isnotnull(t_time#50) && (t_time#50 >= 30838)) && (t_time#50 <= 59638)) && isnotnull(t_time_sk#45)) + : : +- *(3) FileScan parquet default.time_dim[t_time_sk#45,t_time#50] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [sm_ship_mode_sk#43] + : +- *(4) Filter (sm_carrier#51 IN (DHL,BARIAN) && isnotnull(sm_ship_mode_sk#43)) + : +- *(4) FileScan parquet default.ship_mode[sm_ship_mode_sk#43,sm_carrier#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + +- *(12) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 200) + +- *(11) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + +- *(11) Project [cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + +- *(11) BroadcastHashJoin [cs_ship_mode_sk#55], [sm_ship_mode_sk#43], Inner, BuildRight + :- *(11) Project [cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : +- *(11) BroadcastHashJoin [cs_sold_time_sk#56], [t_time_sk#45], Inner, BuildRight + : :- *(11) Project [cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#57], [d_date_sk#47], Inner, BuildRight + : : :- *(11) Project [cs_sold_date_sk#57, cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6] + : : : +- *(11) BroadcastHashJoin [cs_warehouse_sk#58], [w_warehouse_sk#49], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#57, cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_warehouse_sk#58, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54] + : : : : +- *(11) Filter (((isnotnull(cs_warehouse_sk#58) && isnotnull(cs_sold_date_sk#57)) && isnotnull(cs_sold_time_sk#56)) && isnotnull(cs_ship_mode_sk#55)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#57,cs_sold_time_sk#56,cs_ship_mode_sk#55,cs_warehouse_sk#58,cs_quantity#53,cs_sales_price#52,cs_net_paid_inc_tax#54] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs..., ReadSchema: struct= 1200)) && (d_month_seq#33 <= 1211)) && isnotnull(d_date_sk#32)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#32,d_month_seq#33,d_year#18,d_moy#20,d_qoy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [s_store_sk#30, s_store_id#26] + : +- *(2) Filter isnotnull(s_store_sk#30) + : +- *(2) FileScan parquet default.store[s_store_sk#30,s_store_id#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [i_item_sk#28, i_brand#24, i_class#23, i_category#22, i_product_name#25] + +- *(3) Filter isnotnull(i_item_sk#28) + +- *(3) FileScan parquet default.item[i_item_sk#28,i_brand#24,i_class#23,i_category#22,i_product_name#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt new file mode 100644 index 000000000..9381a8892 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [d_qoy,i_category,i_brand,i_class,d_year,d_moy,rk,i_product_name,s_store_id,sumsales] + WholeStageCodegen + Filter [rk] + InputAdapter + Window [sumsales,i_category] + WholeStageCodegen + Sort [i_category,sumsales] + InputAdapter + Exchange [i_category] #1 + WholeStageCodegen + HashAggregate [s_store_id,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),i_category,sum,i_brand,i_product_name,spark_grouping_id,d_moy,i_class,d_year,d_qoy] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum] + InputAdapter + Exchange [s_store_id,i_category,i_brand,i_product_name,spark_grouping_id,d_moy,i_class,d_year,d_qoy] #2 + WholeStageCodegen + HashAggregate [s_store_id,i_category,sum,i_brand,ss_sales_price,i_product_name,spark_grouping_id,d_moy,i_class,ss_quantity,d_year,d_qoy,sum] [sum,sum] + Expand [d_year,i_class,d_qoy,d_moy,i_brand,i_category,s_store_id,i_product_name,ss_sales_price,ss_quantity] + Project [d_year,d_qoy,d_moy,i_brand,s_store_id,ss_sales_price,i_category,ss_quantity,i_class,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_id,d_qoy,ss_quantity,ss_item_sk,d_moy,d_year,ss_sales_price] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [d_qoy,ss_quantity,ss_item_sk,d_moy,d_year,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_year,d_moy,d_qoy] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_qoy,d_moy,d_year,d_month_seq,d_date_sk] [d_qoy,d_moy,d_year,d_month_seq,d_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk,s_store_id] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [i_class,i_item_sk,i_product_name,i_category,i_brand] + Filter [i_item_sk] + Scan parquet default.item [i_class,i_item_sk,i_product_name,i_category,i_brand] [i_class,i_item_sk,i_product_name,i_category,i_brand] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt new file mode 100644 index 000000000..c0d696ed3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt @@ -0,0 +1,41 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,ss_ticket_number#2 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#3,ca_city#4,bought_city#5,ss_ticket_number#2,extended_price#6,extended_tax#7,list_price#8]) ++- *(8) Project [c_last_name#1, c_first_name#3, ca_city#4, bought_city#5, ss_ticket_number#2, extended_price#6, extended_tax#7, list_price#8] + +- *(8) BroadcastHashJoin [c_current_addr_sk#9], [ca_address_sk#10], Inner, BuildRight, NOT (ca_city#4 = bought_city#5) + :- *(8) Project [ss_ticket_number#2, bought_city#5, extended_price#6, list_price#8, extended_tax#7, c_current_addr_sk#9, c_first_name#3, c_last_name#1] + : +- *(8) BroadcastHashJoin [ss_customer_sk#11], [c_customer_sk#12], Inner, BuildRight + : :- *(8) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[sum(UnscaledValue(ss_ext_sales_price#14)), sum(UnscaledValue(ss_ext_list_price#15)), sum(UnscaledValue(ss_ext_tax#16))]) + : : +- Exchange hashpartitioning(ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4, 200) + : : +- *(5) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#14)), partial_sum(UnscaledValue(ss_ext_list_price#15)), partial_sum(UnscaledValue(ss_ext_tax#16))]) + : : +- *(5) Project [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16, ca_city#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#13], [ca_address_sk#10], Inner, BuildRight + : : :- *(5) Project [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : +- *(5) BroadcastHashJoin [ss_hdemo_sk#17], [hd_demo_sk#18], Inner, BuildRight + : : : :- *(5) Project [ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : : +- *(5) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#20], Inner, BuildRight + : : : : :- *(5) Project [ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_store_sk#19, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : : : : : :- *(5) Project [ss_sold_date_sk#21, ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_store_sk#19, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#21) && isnotnull(ss_store_sk#19)) && isnotnull(ss_hdemo_sk#17)) && isnotnull(ss_addr_sk#13)) && isnotnull(ss_customer_sk#11)) + : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_customer_sk#11,ss_hdemo_sk#17,ss_addr_sk#13,ss_store_sk#19,ss_ticket_number#2,ss_ext_sales_price#14,ss_ext_list_price#15,ss_ext_tax#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct= 1)) && (d_dom#23 <= 2)) && d_year#24 IN (1999,2000,2001)) && isnotnull(d_date_sk#22)) + : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#22,d_year#24,d_dom#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#20] + : : : : +- *(2) Filter (s_city#25 IN (Midway,Fairview) && isnotnull(s_store_sk#20)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#20,s_city#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [hd_demo_sk#18] + : : : +- *(3) Filter (((hd_dep_count#26 = 4) || (hd_vehicle_count#27 = 3)) && isnotnull(hd_demo_sk#18)) + : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#18,hd_dep_count#26,hd_vehicle_count#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#10, ca_city#4] + : : +- *(4) Filter (isnotnull(ca_address_sk#10) && isnotnull(ca_city#4)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_city#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [c_customer_sk#12, c_current_addr_sk#9, c_first_name#3, c_last_name#1] + : +- *(6) Filter (isnotnull(c_customer_sk#12) && isnotnull(c_current_addr_sk#9)) + : +- *(6) FileScan parquet default.customer[c_customer_sk#12,c_current_addr_sk#9,c_first_name#3,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + +- ReusedExchange [ca_address_sk#10, ca_city#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt new file mode 100644 index 000000000..1e9ce42b5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt @@ -0,0 +1,54 @@ +TakeOrderedAndProject [bought_city,ss_ticket_number,extended_price,list_price,c_last_name,c_first_name,extended_tax,ca_city] + WholeStageCodegen + Project [bought_city,ss_ticket_number,extended_price,list_price,c_last_name,c_first_name,extended_tax,ca_city] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [c_current_addr_sk,extended_price,c_last_name,list_price,c_first_name,ss_ticket_number,extended_tax,bought_city] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [sum,sum,ca_city,ss_customer_sk,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_sales_price)),ss_ticket_number,ss_addr_sk,sum(UnscaledValue(ss_ext_list_price)),sum] [sum,extended_price,sum,list_price,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_sales_price)),extended_tax,sum(UnscaledValue(ss_ext_list_price)),sum,bought_city] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen + HashAggregate [sum,ss_ext_tax,sum,ca_city,ss_customer_sk,sum,sum,sum,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number,ss_addr_sk,sum] [sum,sum,sum,sum,sum,sum] + Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ca_city,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ss_ext_sales_price,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + Filter [ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_dom,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_dom] [d_date_sk,d_year,d_dom] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk] + Filter [s_city,s_store_sk] + Scan parquet default.store [s_store_sk,s_city] [s_store_sk,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [ca_address_sk,ca_city] + Filter [ca_address_sk,ca_city] + Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + Filter [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt new file mode 100644 index 000000000..76eb92a0d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt @@ -0,0 +1,48 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#6,cd_purchase_estimate#4,cnt2#7,cd_credit_rating#5,cnt3#8]) ++- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[count(1)]) + +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, 200) + +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[partial_count(1)]) + +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] + +- *(9) BroadcastHashJoin [c_current_cdemo_sk#9], [cd_demo_sk#10], Inner, BuildRight + :- *(9) Project [c_current_cdemo_sk#9] + : +- *(9) BroadcastHashJoin [c_current_addr_sk#11], [ca_address_sk#12], Inner, BuildRight + : :- *(9) Project [c_current_cdemo_sk#9, c_current_addr_sk#11] + : : +- *(9) BroadcastHashJoin [c_customer_sk#13], [cs_ship_customer_sk#14], LeftAnti, BuildRight + : : :- *(9) BroadcastHashJoin [c_customer_sk#13], [ws_bill_customer_sk#15], LeftAnti, BuildRight + : : : :- *(9) BroadcastHashJoin [c_customer_sk#13], [ss_customer_sk#16], LeftSemi, BuildRight + : : : : :- *(9) Project [c_customer_sk#13, c_current_cdemo_sk#9, c_current_addr_sk#11] + : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#11) && isnotnull(c_current_cdemo_sk#9)) + : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#13,c_current_cdemo_sk#9,c_current_addr_sk#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [ss_customer_sk#16] + : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : : : :- *(2) Project [ss_sold_date_sk#17, ss_customer_sk#16] + : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#17) + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#18] + : : : : +- *(1) Filter (((((isnotnull(d_year#19) && isnotnull(d_moy#20)) && (d_year#19 = 2001)) && (d_moy#20 >= 4)) && (d_moy#20 <= 6)) && isnotnull(d_date_sk#18)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_year#19,d_moy#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThan..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_bill_customer_sk#15] + : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#18], Inner, BuildRight + : : : :- *(4) Project [ws_sold_date_sk#21, ws_bill_customer_sk#15] + : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#21) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [cs_ship_customer_sk#14] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#22], [d_date_sk#18], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#22, cs_ship_customer_sk#14] + : : : +- *(6) Filter isnotnull(cs_sold_date_sk#22) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#22,cs_ship_customer_sk#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#12] + : +- *(7) Filter (ca_state#23 IN (KY,GA,NM) && isnotnull(ca_address_sk#12)) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#12,ca_state#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [KY,GA,NM]), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [cd_demo_sk#10, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] + +- *(8) Filter isnotnull(cd_demo_sk#10) + +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#10,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#15] + : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_item_sk#13, i_item_id#1] + : +- *(3) Filter isnotnull(i_item_sk#13) + : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [p_promo_sk#11] + +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) + +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt new file mode 100644 index 000000000..b72978a57 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [agg2,i_item_id,agg1,agg4,agg3] + WholeStageCodegen + HashAggregate [i_item_id,count,sum,count,avg(UnscaledValue(ss_sales_price)),sum,avg(cast(ss_quantity as bigint)),sum,sum,avg(UnscaledValue(ss_coupon_amt)),count,count,avg(UnscaledValue(ss_list_price))] [agg2,agg1,count,sum,count,avg(UnscaledValue(ss_sales_price)),sum,agg4,avg(cast(ss_quantity as bigint)),sum,sum,avg(UnscaledValue(ss_coupon_amt)),count,agg3,count,avg(UnscaledValue(ss_list_price))] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [count,i_item_id,count,count,sum,count,sum,count,sum,sum,sum,ss_coupon_amt,count,ss_sales_price,sum,sum,ss_list_price,sum,count,ss_quantity,count] [count,count,count,sum,count,sum,count,sum,sum,sum,count,sum,sum,sum,count,count] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_sales_price,i_item_id] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_sales_price,ss_promo_sk,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_promo_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_promo_sk] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] + Filter [ss_cdemo_sk,ss_sold_date_sk,ss_item_sk,ss_promo_sk] + Scan parquet default.store_sales [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [p_promo_sk] + Filter [p_channel_email,p_channel_event,p_promo_sk] + Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event] [p_promo_sk,p_channel_email,p_channel_event] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt new file mode 100644 index 000000000..030bc811b --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt @@ -0,0 +1,46 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN s_state#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[total_sum#4,s_state#2,s_county#5,lochierarchy#1,rank_within_parent#3]) ++- *(11) Project [total_sum#4, s_state#2, s_county#5, lochierarchy#1, rank_within_parent#3] + +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] + +- *(10) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 + +- Exchange hashpartitioning(_w1#7, _w2#8, 200) + +- *(9) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ss_net_profit#10))]) + +- Exchange hashpartitioning(s_state#2, s_county#5, spark_grouping_id#9, 200) + +- *(8) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) + +- *(8) Expand [List(ss_net_profit#10, s_state#11, s_county#12, 0), List(ss_net_profit#10, s_state#11, null, 1), List(ss_net_profit#10, null, null, 3)], [ss_net_profit#10, s_state#2, s_county#5, spark_grouping_id#9] + +- *(8) Project [ss_net_profit#10, s_state#13 AS s_state#11, s_county#14 AS s_county#12] + +- *(8) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + :- *(8) Project [ss_store_sk#15, ss_net_profit#10] + : +- *(8) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : :- *(8) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] + : : +- *(8) Filter (isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) + : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#18] + : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) BroadcastHashJoin [s_state#13], [s_state#20], LeftSemi, BuildRight + :- *(7) Project [s_store_sk#16, s_county#14, s_state#13] + : +- *(7) Filter isnotnull(s_store_sk#16) + : +- *(7) FileScan parquet default.store[s_store_sk#16,s_county#14,s_state#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(6) Project [s_state#20] + +- *(6) Filter (isnotnull(ranking#21) && (ranking#21 <= 5)) + +- Window [rank(_w2#22) windowspecdefinition(s_state#13, _w2#22 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#21], [s_state#13], [_w2#22 DESC NULLS LAST] + +- *(5) Sort [s_state#13 ASC NULLS FIRST, _w2#22 DESC NULLS LAST], false, 0 + +- *(5) HashAggregate(keys=[s_state#13], functions=[sum(UnscaledValue(ss_net_profit#10))]) + +- Exchange hashpartitioning(s_state#13, 200) + +- *(4) HashAggregate(keys=[s_state#13], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) + +- *(4) Project [ss_net_profit#10, s_state#13] + +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + :- *(4) Project [ss_sold_date_sk#17, ss_net_profit#10, s_state#13] + : +- *(4) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : :- *(4) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] + : : +- *(4) Filter (isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#17)) + : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [s_store_sk#16, s_state#13] + : +- *(2) Filter isnotnull(s_store_sk#16) + : +- *(2) FileScan parquet default.store[s_store_sk#16,s_state#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt new file mode 100644 index 000000000..2ef724de4 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt @@ -0,0 +1,65 @@ +TakeOrderedAndProject [total_sum,lochierarchy,s_state,s_county,rank_within_parent] + WholeStageCodegen + Project [total_sum,lochierarchy,s_state,s_county,rank_within_parent] + InputAdapter + Window [_w3,_w1,_w2] + WholeStageCodegen + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen + HashAggregate [sum,s_state,s_county,spark_grouping_id,sum(UnscaledValue(ss_net_profit))] [total_sum,sum,lochierarchy,_w3,sum(UnscaledValue(ss_net_profit)),_w2,_w1] + InputAdapter + Exchange [s_state,s_county,spark_grouping_id] #2 + WholeStageCodegen + HashAggregate [sum,s_state,s_county,spark_grouping_id,sum,ss_net_profit] [sum,sum] + Expand [ss_net_profit,s_state,s_county] + Project [ss_net_profit,s_state,s_county] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_net_profit] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_net_profit] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + BroadcastHashJoin [s_state,s_state] + Project [s_store_sk,s_county,s_state] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_county,s_state] [s_store_sk,s_county,s_state] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_state] + Filter [ranking] + InputAdapter + Window [_w2,s_state] + WholeStageCodegen + Sort [s_state,_w2] + HashAggregate [s_state,sum,sum(UnscaledValue(ss_net_profit))] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum] + InputAdapter + Exchange [s_state] #6 + WholeStageCodegen + HashAggregate [s_state,ss_net_profit,sum,sum] [sum,sum] + Project [ss_net_profit,s_state] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_net_profit,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_net_profit] + Filter [ss_store_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_net_profit] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [s_store_sk,s_state] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt new file mode 100644 index 000000000..32f2c64c4 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt @@ -0,0 +1,40 @@ +== Physical Plan == +*(11) Sort [ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST, 200) + +- *(10) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[sum(UnscaledValue(ext_price#7))]) + +- Exchange hashpartitioning(i_brand#3, i_brand_id#4, t_hour#5, t_minute#6, 200) + +- *(9) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[partial_sum(UnscaledValue(ext_price#7))]) + +- *(9) Project [i_brand_id#4, i_brand#3, ext_price#7, t_hour#5, t_minute#6] + +- *(9) BroadcastHashJoin [time_sk#8], [t_time_sk#9], Inner, BuildRight + :- *(9) Project [i_brand_id#4, i_brand#3, ext_price#7, time_sk#8] + : +- *(9) BroadcastHashJoin [i_item_sk#10], [sold_item_sk#11], Inner, BuildLeft + : :- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#10, i_brand_id#4, i_brand#3] + : : +- *(1) Filter ((isnotnull(i_manager_id#12) && (i_manager_id#12 = 1)) && isnotnull(i_item_sk#10)) + : : +- *(1) FileScan parquet default.item[i_item_sk#10,i_brand_id#4,i_brand#3,i_manager_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct + : +- Union + : :- *(3) Project [ws_ext_sales_price#13 AS ext_price#7, ws_item_sk#14 AS sold_item_sk#11, ws_sold_time_sk#15 AS time_sk#8] + : : +- *(3) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : :- *(3) Project [ws_sold_date_sk#16, ws_sold_time_sk#15, ws_item_sk#14, ws_ext_sales_price#13] + : : : +- *(3) Filter ((isnotnull(ws_sold_date_sk#16) && isnotnull(ws_item_sk#14)) && isnotnull(ws_sold_time_sk#15)) + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_sold_time_sk#15,ws_item_sk#14,ws_ext_sales_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#17] + : : +- *(2) Filter ((((isnotnull(d_moy#18) && isnotnull(d_year#19)) && (d_moy#18 = 11)) && (d_year#19 = 1999)) && isnotnull(d_date_sk#17)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#19,d_moy#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : :- *(5) Project [cs_ext_sales_price#20 AS ext_price#21, cs_item_sk#22 AS sold_item_sk#23, cs_sold_time_sk#24 AS time_sk#25] + : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#17], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#26, cs_sold_time_sk#24, cs_item_sk#22, cs_ext_sales_price#20] + : : : +- *(5) Filter ((isnotnull(cs_sold_date_sk#26) && isnotnull(cs_item_sk#22)) && isnotnull(cs_sold_time_sk#24)) + : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_sold_time_sk#24,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ss_ext_sales_price#27 AS ext_price#28, ss_item_sk#29 AS sold_item_sk#30, ss_sold_time_sk#31 AS time_sk#32] + : +- *(7) BroadcastHashJoin [ss_sold_date_sk#33], [d_date_sk#17], Inner, BuildRight + : :- *(7) Project [ss_sold_date_sk#33, ss_sold_time_sk#31, ss_item_sk#29, ss_ext_sales_price#27] + : : +- *(7) Filter ((isnotnull(ss_sold_date_sk#33) && isnotnull(ss_item_sk#29)) && isnotnull(ss_sold_time_sk#31)) + : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#33,ss_sold_time_sk#31,ss_item_sk#29,ss_ext_sales_price#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [t_time_sk#9, t_hour#5, t_minute#6] + +- *(8) Filter (((t_meal_time#34 = breakfast) || (t_meal_time#34 = dinner)) && isnotnull(t_time_sk#9)) + +- *(8) FileScan parquet default.time_dim[t_time_sk#9,t_hour#5,t_minute#6,t_meal_time#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [Or(EqualTo(t_meal_time,breakfast),EqualTo(t_meal_time,dinner)), IsNotNull(t_time_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt new file mode 100644 index 000000000..0153205a5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt @@ -0,0 +1,56 @@ +WholeStageCodegen + Sort [ext_price,brand_id] + InputAdapter + Exchange [ext_price,brand_id] #1 + WholeStageCodegen + HashAggregate [sum(UnscaledValue(ext_price)),sum,t_minute,t_hour,i_brand,i_brand_id] [sum(UnscaledValue(ext_price)),sum,brand,ext_price,brand_id] + InputAdapter + Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 + WholeStageCodegen + HashAggregate [ext_price,sum,t_minute,t_hour,i_brand,i_brand_id,sum] [sum,sum] + Project [ext_price,t_minute,i_brand_id,i_brand,t_hour] + BroadcastHashJoin [time_sk,t_time_sk] + Project [i_brand_id,i_brand,ext_price,time_sk] + BroadcastHashJoin [i_item_sk,sold_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] [i_item_sk,i_brand_id,i_brand,i_manager_id] + InputAdapter + Union + WholeStageCodegen + Project [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_sold_time_sk,ws_item_sk,ws_ext_sales_price] + Filter [ws_sold_date_sk,ws_item_sk,ws_sold_time_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_sold_time_sk,ws_item_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_sold_time_sk,ws_item_sk,ws_ext_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_moy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + WholeStageCodegen + Project [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_sold_time_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_sold_date_sk,cs_item_sk,cs_sold_time_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_sold_time_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_sold_time_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 + WholeStageCodegen + Project [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_sold_date_sk,ss_item_sk,ss_sold_time_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [t_time_sk,t_hour,t_minute] + Filter [t_meal_time,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time] [t_time_sk,t_hour,t_minute,t_meal_time] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt new file mode 100644 index 000000000..5ad7c14ac --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt @@ -0,0 +1,68 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[total_cnt#1 DESC NULLS LAST,i_item_desc#2 ASC NULLS FIRST,w_warehouse_name#3 ASC NULLS FIRST,d_week_seq#4 ASC NULLS FIRST], output=[i_item_desc#2,w_warehouse_name#3,d_week_seq#4,no_promo#5,promo#6,total_cnt#1]) ++- *(12) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[count(1)]) + +- Exchange hashpartitioning(i_item_desc#2, w_warehouse_name#3, d_week_seq#4, 200) + +- *(11) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[partial_count(1)]) + +- *(11) Project [w_warehouse_name#3, i_item_desc#2, d_week_seq#4] + +- *(11) BroadcastHashJoin [cs_item_sk#7, cs_order_number#8], [cr_item_sk#9, cr_order_number#10], LeftOuter, BuildRight + :- *(11) Project [cs_item_sk#7, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_week_seq#4] + : +- *(11) BroadcastHashJoin [cs_promo_sk#11], [p_promo_sk#12], LeftOuter, BuildRight + : :- *(11) Project [cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_week_seq#4] + : : +- *(11) BroadcastHashJoin [cs_ship_date_sk#13], [d_date_sk#14], Inner, BuildRight, (d_date#15 > cast(cast(d_date#16 as timestamp) + interval 5 days as date)) + : : :- *(11) Project [cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_date#16, d_week_seq#4] + : : : +- *(11) BroadcastHashJoin [d_week_seq#4, inv_date_sk#17], [d_week_seq#18, d_date_sk#19], Inner, BuildRight + : : : :- *(11) Project [cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2, d_date#16, d_week_seq#4] + : : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] + : : : : : +- *(11) BroadcastHashJoin [cs_bill_hdemo_sk#22], [hd_demo_sk#23], Inner, BuildRight + : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] + : : : : : : +- *(11) BroadcastHashJoin [cs_bill_cdemo_sk#24], [cd_demo_sk#25], Inner, BuildRight + : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] + : : : : : : : +- *(11) BroadcastHashJoin [cs_item_sk#7], [i_item_sk#26], Inner, BuildRight + : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3] + : : : : : : : : +- *(11) BroadcastHashJoin [inv_warehouse_sk#27], [w_warehouse_sk#28], Inner, BuildRight + : : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, inv_warehouse_sk#27] + : : : : : : : : : +- *(11) BroadcastHashJoin [cs_item_sk#7], [inv_item_sk#29], Inner, BuildRight, (inv_quantity_on_hand#30 < cs_quantity#31) + : : : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, cs_quantity#31] + : : : : : : : : : : +- *(11) Filter (((((isnotnull(cs_quantity#31) && isnotnull(cs_item_sk#7)) && isnotnull(cs_bill_cdemo_sk#24)) && isnotnull(cs_bill_hdemo_sk#22)) && isnotnull(cs_sold_date_sk#20)) && isnotnull(cs_ship_date_sk#13)) + : : : : : : : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#20,cs_ship_date_sk#13,cs_bill_cdemo_sk#24,cs_bill_hdemo_sk#22,cs_item_sk#7,cs_promo_sk#11,cs_order_number#8,cs_quantity#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hd..., ReadSchema: struct + : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- *(2) Project [w_warehouse_sk#28, w_warehouse_name#3] + : : : : : : : : +- *(2) Filter isnotnull(w_warehouse_sk#28) + : : : : : : : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#28,w_warehouse_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : +- *(3) Project [i_item_sk#26, i_item_desc#2] + : : : : : : : +- *(3) Filter isnotnull(i_item_sk#26) + : : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#26,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(4) Project [cd_demo_sk#25] + : : : : : : +- *(4) Filter ((isnotnull(cd_marital_status#32) && (cd_marital_status#32 = D)) && isnotnull(cd_demo_sk#25)) + : : : : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(5) Project [hd_demo_sk#23] + : : : : : +- *(5) Filter ((isnotnull(hd_buy_potential#33) && (hd_buy_potential#33 = >10000)) && isnotnull(hd_demo_sk#23)) + : : : : : +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#23,hd_buy_potential#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(6) Project [d_date_sk#21, d_date#16, d_week_seq#4] + : : : : +- *(6) Filter (((isnotnull(d_year#34) && (d_year#34 = 1999)) && isnotnull(d_date_sk#21)) && isnotnull(d_week_seq#4)) + : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#21,d_date#16,d_week_seq#4,d_year#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + : : : +- *(7) Project [d_date_sk#19, d_week_seq#18] + : : : +- *(7) Filter (isnotnull(d_week_seq#18) && isnotnull(d_date_sk#19)) + : : : +- *(7) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(8) Project [d_date_sk#14, d_date#15] + : : +- *(8) Filter (isnotnull(d_date#15) && isnotnull(d_date_sk#14)) + : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(9) Project [p_promo_sk#12] + : +- *(9) Filter isnotnull(p_promo_sk#12) + : +- *(9) FileScan parquet default.promotion[p_promo_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) + +- *(10) Project [cr_item_sk#9, cr_order_number#10] + +- *(10) Filter (isnotnull(cr_item_sk#9) && isnotnull(cr_order_number#10)) + +- *(10) FileScan parquet default.catalog_returns[cr_item_sk#9,cr_order_number#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt new file mode 100644 index 000000000..ee1925612 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt @@ -0,0 +1,90 @@ +TakeOrderedAndProject [i_item_desc,w_warehouse_name,promo,no_promo,d_week_seq,total_cnt] + WholeStageCodegen + HashAggregate [i_item_desc,count(1),w_warehouse_name,d_week_seq,count] [count(1),promo,no_promo,count,total_cnt] + InputAdapter + Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + WholeStageCodegen + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count,count] [count,count] + Project [w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + Project [d_week_seq,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [d_week_seq,cs_promo_sk,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk,d_date,d_date] + Project [d_date,d_week_seq,cs_promo_sk,cs_ship_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [d_week_seq,inv_date_sk,d_week_seq,d_date_sk] + Project [d_date,d_week_seq,cs_promo_sk,cs_ship_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc] + BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] + Project [cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc,cs_bill_hdemo_sk] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc,cs_bill_hdemo_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,cs_bill_hdemo_sk] + BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,inv_warehouse_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,inv_date_sk,cs_bill_hdemo_sk] + BroadcastHashJoin [cs_item_sk,inv_item_sk,inv_quantity_on_hand,cs_quantity] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] + Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk] + Scan parquet default.catalog_sales [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_item_desc] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_item_desc] [i_item_sk,i_item_desc] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_marital_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] [hd_demo_sk,hd_buy_potential] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk,d_date,d_week_seq] + Filter [d_year,d_date_sk,d_week_seq] + Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year] [d_date_sk,d_date,d_week_seq,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [d_date_sk,d_week_seq] + Filter [d_date_sk,d_week_seq] + Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [p_promo_sk] + Filter [p_promo_sk] + Scan parquet default.promotion [p_promo_sk] [p_promo_sk] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [cr_item_sk,cr_order_number] + Filter [cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] [cr_item_sk,cr_order_number] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt new file mode 100644 index 000000000..1f917c14b --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt @@ -0,0 +1,34 @@ +== Physical Plan == +*(7) Sort [cnt#1 DESC NULLS LAST], true, 0 ++- Exchange rangepartitioning(cnt#1 DESC NULLS LAST, 200) + +- *(6) Project [c_last_name#2, c_first_name#3, c_salutation#4, c_preferred_cust_flag#5, ss_ticket_number#6, cnt#1] + +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight + :- *(6) Filter ((cnt#1 >= 1) && (cnt#1 <= 5)) + : +- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[count(1)]) + : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#7, 200) + : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[partial_count(1)]) + : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#6] + : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight + : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_ticket_number#6] + : : +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight + : : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#6] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#6] + : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#14] + : : : +- *(1) Filter ((((isnotnull(d_dom#15) && (d_dom#15 >= 1)) && (d_dom#15 <= 2)) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#12] + : : +- *(2) Filter (s_county#17 IN (Williamson County,Franklin Parish,Bronx County,Orange County) && isnotnull(s_store_sk#12)) + : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_county, [Williamson County,Franklin Parish,Bronx County,Orange County]), IsNotNull(s_store_..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [hd_demo_sk#10] + : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.0)) && isnotnull(hd_demo_sk#10)) + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [c_customer_sk#8, c_salutation#4, c_first_name#3, c_last_name#2, c_preferred_cust_flag#5] + +- *(5) Filter isnotnull(c_customer_sk#8) + +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#4,c_first_name#3,c_last_name#2,c_preferred_cust_flag#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct 0.00) THEN CheckOverflow((promote_precision(year_total#7) / promote_precision(year_total#6)), DecimalType(37,20)) ELSE null END > CASE WHEN (year_total#8 > 0.00) THEN CheckOverflow((promote_precision(year_total#9) / promote_precision(year_total#8)), DecimalType(37,20)) ELSE null END) + :- *(17) Project [customer_id#4, year_total#8, customer_id#1, customer_first_name#2, customer_last_name#3, year_total#9, year_total#6] + : +- *(17) BroadcastHashJoin [customer_id#4], [customer_id#10], Inner, BuildRight + : :- *(17) BroadcastHashJoin [customer_id#4], [customer_id#1], Inner, BuildRight + : : :- Union + : : : :- *(4) Filter (isnotnull(year_total#8) && (year_total#8 > 0.00)) + : : : : +- *(4) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) + : : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + : : : : +- *(3) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) + : : : : +- *(3) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : : : :- *(3) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_sold_date_sk#16, ss_net_paid#15] + : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#18], [ss_customer_sk#19], Inner, BuildRight + : : : : : :- *(3) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : : +- *(1) Project [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15] + : : : : : +- *(1) Filter (isnotnull(ss_customer_sk#19) && isnotnull(ss_sold_date_sk#16)) + : : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#19,ss_net_paid#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [d_date_sk#17, d_year#14] + : : : : +- *(2) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2001)) && isnotnull(d_date_sk#17)) + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- LocalTableScan , [customer_id#20, year_total#21] + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- Union + : : :- *(8) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) + : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + : : : +- *(7) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) + : : : +- *(7) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] + : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : : :- *(7) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_sold_date_sk#16, ss_net_paid#15] + : : : : +- *(7) BroadcastHashJoin [c_customer_sk#18], [ss_customer_sk#19], Inner, BuildRight + : : : : :- *(7) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : : : : +- *(7) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : +- ReusedExchange [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(6) Project [d_date_sk#17, d_year#14] + : : : +- *(6) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2002)) && isnotnull(d_date_sk#17)) + : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- LocalTableScan , [customer_id#20, customer_first_name#22, customer_last_name#23, year_total#21] + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- Union + : :- LocalTableScan , [customer_id#10, year_total#6] + : +- *(12) Filter (isnotnull(year_total#21) && (year_total#21 > 0.00)) + : +- *(12) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) + : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + : +- *(11) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) + : +- *(11) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] + : +- *(11) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight + : :- *(11) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_sold_date_sk#25, ws_net_paid#24] + : : +- *(11) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight + : : :- *(11) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : : +- *(11) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : : +- *(11) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(9) Project [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24] + : : +- *(9) Filter (isnotnull(ws_bill_customer_sk#26) && isnotnull(ws_sold_date_sk#25)) + : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_bill_customer_sk#26,ws_net_paid#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- Union + :- LocalTableScan , [customer_id#5, year_total#7] + +- *(16) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) + +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + +- *(15) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) + +- *(15) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] + +- *(15) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight + :- *(15) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_sold_date_sk#25, ws_net_paid#24] + : +- *(15) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight + : :- *(15) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : +- *(15) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : +- *(15) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : +- ReusedExchange [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt new file mode 100644 index 000000000..c3144e1d9 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt @@ -0,0 +1,108 @@ +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] + WholeStageCodegen + Project [customer_id,customer_first_name,customer_last_name] + BroadcastHashJoin [year_total,year_total,year_total,year_total,customer_id,customer_id] + Project [customer_first_name,year_total,year_total,customer_last_name,year_total,customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + BroadcastHashJoin [customer_id,customer_id] + InputAdapter + Union + WholeStageCodegen + Filter [year_total] + HashAggregate [d_year,sum(UnscaledValue(ss_net_paid)),c_customer_id,c_last_name,sum,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + WholeStageCodegen + HashAggregate [d_year,sum,ss_net_paid,c_customer_id,c_last_name,sum,c_first_name] [sum,sum] + Project [c_customer_id,d_year,ss_net_paid,c_last_name,c_first_name] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,ss_net_paid,c_last_name,c_first_name,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Filter [c_customer_sk,c_customer_id] + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [ss_sold_date_sk,ss_customer_sk,ss_net_paid] + Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_net_paid] [ss_sold_date_sk,ss_customer_sk,ss_net_paid] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + LocalTableScan [customer_id,year_total] [customer_id,year_total] + InputAdapter + BroadcastExchange #4 + Union + WholeStageCodegen + HashAggregate [d_year,sum(UnscaledValue(ss_net_paid)),sum,c_customer_id,c_last_name,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_first_name,sum,customer_last_name,year_total,customer_id] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + WholeStageCodegen + HashAggregate [d_year,ss_net_paid,sum,c_customer_id,c_last_name,c_first_name,sum] [sum,sum] + Project [c_customer_id,d_year,ss_net_paid,c_last_name,c_first_name] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,ss_net_paid,c_last_name,c_first_name,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Filter [c_customer_sk,c_customer_id] + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ss_sold_date_sk,ss_customer_sk,ss_net_paid] [ss_sold_date_sk,ss_customer_sk,ss_net_paid] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + LocalTableScan [customer_id,customer_first_name,customer_last_name,year_total] [customer_id,customer_first_name,customer_last_name,year_total] + InputAdapter + BroadcastExchange #7 + Union + LocalTableScan [customer_id,year_total] [customer_id,year_total] + WholeStageCodegen + Filter [year_total] + HashAggregate [d_year,sum,sum(UnscaledValue(ws_net_paid)),c_customer_id,c_last_name,c_first_name] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 + WholeStageCodegen + HashAggregate [d_year,sum,sum,ws_net_paid,c_customer_id,c_last_name,c_first_name] [sum,sum] + Project [c_customer_id,ws_net_paid,d_year,c_last_name,c_first_name] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,ws_net_paid,c_last_name,ws_sold_date_sk,c_first_name] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Filter [c_customer_sk,c_customer_id] + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #10 + Union + LocalTableScan [customer_id,year_total] [customer_id,year_total] + WholeStageCodegen + HashAggregate [d_year,sum(UnscaledValue(ws_net_paid)),c_customer_id,c_last_name,c_first_name,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen + HashAggregate [d_year,ws_net_paid,c_customer_id,sum,c_last_name,c_first_name,sum] [sum,sum] + Project [c_customer_id,ws_net_paid,d_year,c_last_name,c_first_name] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,ws_net_paid,c_last_name,ws_sold_date_sk,c_first_name] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Filter [c_customer_sk,c_customer_id] + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt new file mode 100644 index 000000000..80af7fa5e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt @@ -0,0 +1,117 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], output=[prev_year#2,year#3,i_brand_id#4,i_class_id#5,i_category_id#6,i_manufact_id#7,prev_yr_cnt#8,curr_yr_cnt#9,sales_cnt_diff#1,sales_amt_diff#10]) ++- *(34) Project [d_year#11 AS prev_year#2, d_year#12 AS year#3, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#13 AS prev_yr_cnt#8, sales_cnt#14 AS curr_yr_cnt#9, (sales_cnt#14 - sales_cnt#13) AS sales_cnt_diff#1, CheckOverflow((promote_precision(cast(sales_amt#15 as decimal(19,2))) - promote_precision(cast(sales_amt#16 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#10] + +- *(34) BroadcastHashJoin [i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], [i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], Inner, BuildRight, (CheckOverflow((promote_precision(cast(sales_cnt#14 as decimal(17,2))) / promote_precision(cast(sales_cnt#13 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000) + :- *(34) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) + : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, 200) + : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) + : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 200) + : +- *(15) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : +- Union + : :- *(10) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 200) + : : +- *(9) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : : +- Union + : : :- *(4) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] + : : : +- *(4) BroadcastHashJoin [cs_order_number#27, cs_item_sk#28], [cr_order_number#29, cr_item_sk#30], LeftOuter, BuildRight + : : : :- *(4) Project [cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] + : : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight + : : : : :- *(4) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] + : : : : : +- *(4) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#33], Inner, BuildRight + : : : : : :- *(4) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] + : : : : : : +- *(4) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) + : : : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + : : : +- *(3) Project [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26] + : : : +- *(3) Filter (isnotnull(cr_item_sk#30) && isnotnull(cr_order_number#29)) + : : : +- *(3) FileScan parquet default.catalog_returns[cr_item_sk#30,cr_order_number#29,cr_return_quantity#24,cr_return_amount#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct + : : +- *(8) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] + : : +- *(8) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight + : : :- *(8) Project [ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] + : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#45], [d_date_sk#32], Inner, BuildRight + : : : :- *(8) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] + : : : : +- *(8) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#33], Inner, BuildRight + : : : : :- *(8) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] + : : : : : +- *(8) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) + : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- *(14) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ws_quantity#46 - coalesce(wr_return_quantity#47, 0)) AS sales_cnt#48, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#50, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#51] + : +- *(14) BroadcastHashJoin [cast(ws_order_number#52 as bigint), cast(ws_item_sk#53 as bigint)], [wr_order_number#54, wr_item_sk#55], LeftOuter, BuildRight + : :- *(14) Project [ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] + : : +- *(14) BroadcastHashJoin [ws_sold_date_sk#56], [d_date_sk#32], Inner, BuildRight + : : :- *(14) Project [ws_sold_date_sk#56, ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] + : : : +- *(14) BroadcastHashJoin [ws_item_sk#53], [i_item_sk#33], Inner, BuildRight + : : : :- *(14) Project [ws_sold_date_sk#56, ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49] + : : : : +- *(14) Filter (isnotnull(ws_item_sk#53) && isnotnull(ws_sold_date_sk#56)) + : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#56,ws_item_sk#53,ws_order_number#52,ws_quantity#46,ws_ext_sales_price#49] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true])) + +- *(33) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) + +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, 200) + +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) + +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 200) + +- *(31) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + +- Union + :- *(26) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + : +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 200) + : +- *(25) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + : +- Union + : :- *(20) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] + : : +- *(20) BroadcastHashJoin [cs_order_number#27, cs_item_sk#28], [cr_order_number#29, cr_item_sk#30], LeftOuter, BuildRight + : : :- *(20) Project [cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, d_year#11] + : : : +- *(20) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#57], Inner, BuildRight + : : : :- *(20) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20] + : : : : +- *(20) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#58], Inner, BuildRight + : : : : :- *(20) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] + : : : : : +- *(20) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) + : : : : : +- *(20) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26], BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + : +- *(24) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] + : +- *(24) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight + : :- *(24) Project [ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, d_year#11] + : : +- *(24) BroadcastHashJoin [ss_sold_date_sk#45], [d_date_sk#57], Inner, BuildRight + : : :- *(24) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20] + : : : +- *(24) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#58], Inner, BuildRight + : : : :- *(24) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] + : : : : +- *(24) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) + : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#14, i_category#5] + : : +- *(1) Filter isnotnull(i_item_sk#14) + : : +- *(1) FileScan parquet default.item[i_item_sk#14,i_category#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#12, d_year#3, d_qoy#4] + : +- *(2) Filter isnotnull(d_date_sk#12) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#12,d_year#3,d_qoy#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + :- *(6) Project [web AS channel#15, ws_ship_customer_sk#16 AS col_name#17, d_year#3, d_qoy#4, i_category#5, ws_ext_sales_price#18 AS ext_sales_price#19] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#20], [d_date_sk#12], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#20, ws_ship_customer_sk#16, ws_ext_sales_price#18, i_category#5] + : : +- *(6) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#14], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#20, ws_item_sk#21, ws_ship_customer_sk#16, ws_ext_sales_price#18] + : : : +- *(6) Filter ((isnull(ws_ship_customer_sk#16) && isnotnull(ws_item_sk#21)) && isnotnull(ws_sold_date_sk#20)) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#20,ws_item_sk#21,ws_ship_customer_sk#16,ws_ext_sales_price#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(9) Project [catalog AS channel#22, cs_ship_addr_sk#23 AS col_name#24, d_year#3, d_qoy#4, i_category#5, cs_ext_sales_price#25 AS ext_sales_price#26] + +- *(9) BroadcastHashJoin [cs_sold_date_sk#27], [d_date_sk#12], Inner, BuildRight + :- *(9) Project [cs_sold_date_sk#27, cs_ship_addr_sk#23, cs_ext_sales_price#25, i_category#5] + : +- *(9) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#14], Inner, BuildRight + : :- *(9) Project [cs_sold_date_sk#27, cs_ship_addr_sk#23, cs_item_sk#28, cs_ext_sales_price#25] + : : +- *(9) Filter ((isnull(cs_ship_addr_sk#23) && isnotnull(cs_item_sk#28)) && isnotnull(cs_sold_date_sk#27)) + : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#27,cs_ship_addr_sk#23,cs_item_sk#28,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt new file mode 100644 index 000000000..e40976f52 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt @@ -0,0 +1,53 @@ +TakeOrderedAndProject [d_year,channel,sales_cnt,col_name,d_qoy,i_category,sales_amt] + WholeStageCodegen + HashAggregate [d_year,channel,sum,sum(UnscaledValue(ext_sales_price)),count,count(1),col_name,d_qoy,i_category] [sum,sum(UnscaledValue(ext_sales_price)),sales_cnt,count,count(1),sales_amt] + InputAdapter + Exchange [d_year,channel,col_name,d_qoy,i_category] #1 + WholeStageCodegen + HashAggregate [d_year,count,channel,sum,ext_sales_price,count,col_name,d_qoy,i_category,sum] [count,sum,count,sum] + InputAdapter + Union + WholeStageCodegen + Project [d_year,d_qoy,ss_store_sk,i_category,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price] + Filter [ss_store_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_item_sk,i_category] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_category] [i_item_sk,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_year,d_qoy] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + WholeStageCodegen + Project [d_year,d_qoy,ws_ship_customer_sk,ws_ext_sales_price,i_category] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_ship_customer_sk,ws_ext_sales_price,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price] + Filter [ws_ship_customer_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_category] [i_item_sk,i_category] #2 + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #3 + WholeStageCodegen + Project [d_year,d_qoy,i_category,cs_ship_addr_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_ship_addr_sk,cs_ext_sales_price,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_sold_date_sk,cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price] + Filter [cs_ship_addr_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price] + InputAdapter + ReusedExchange [i_item_sk,i_category] [i_item_sk,i_category] #2 + InputAdapter + ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt new file mode 100644 index 000000000..0199d72c5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt @@ -0,0 +1,100 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) ++- *(25) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 200) + +- *(24) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) + +- *(24) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] + +- Union + :- *(8) Project [sales#7, coalesce(returns#12, 0.00) AS returns#8, CheckOverflow((promote_precision(cast(profit#13 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#9, store channel AS channel#10, s_store_sk#15 AS id#11] + : +- *(8) BroadcastHashJoin [s_store_sk#15], [s_store_sk#16], LeftOuter, BuildRight + : :- *(8) HashAggregate(keys=[s_store_sk#15], functions=[sum(UnscaledValue(ss_ext_sales_price#17)), sum(UnscaledValue(ss_net_profit#18))]) + : : +- Exchange hashpartitioning(s_store_sk#15, 200) + : : +- *(3) HashAggregate(keys=[s_store_sk#15], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#17)), partial_sum(UnscaledValue(ss_net_profit#18))]) + : : +- *(3) Project [ss_ext_sales_price#17, ss_net_profit#18, s_store_sk#15] + : : +- *(3) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#15], Inner, BuildRight + : : :- *(3) Project [ss_store_sk#19, ss_ext_sales_price#17, ss_net_profit#18] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#20, ss_store_sk#19, ss_ext_sales_price#17, ss_net_profit#18] + : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#20) && isnotnull(ss_store_sk#19)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_store_sk#19,ss_ext_sales_price#17,ss_net_profit#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#15] + : : +- *(2) Filter isnotnull(s_store_sk#15) + : : +- *(2) FileScan parquet default.store[s_store_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) HashAggregate(keys=[s_store_sk#16], functions=[sum(UnscaledValue(sr_return_amt#23)), sum(UnscaledValue(sr_net_loss#24))]) + : +- Exchange hashpartitioning(s_store_sk#16, 200) + : +- *(6) HashAggregate(keys=[s_store_sk#16], functions=[partial_sum(UnscaledValue(sr_return_amt#23)), partial_sum(UnscaledValue(sr_net_loss#24))]) + : +- *(6) Project [sr_return_amt#23, sr_net_loss#24, s_store_sk#16] + : +- *(6) BroadcastHashJoin [sr_store_sk#25], [cast(s_store_sk#16 as bigint)], Inner, BuildRight + : :- *(6) Project [sr_store_sk#25, sr_return_amt#23, sr_net_loss#24] + : : +- *(6) BroadcastHashJoin [sr_returned_date_sk#26], [cast(d_date_sk#21 as bigint)], Inner, BuildRight + : : :- *(6) Project [sr_returned_date_sk#26, sr_store_sk#25, sr_return_amt#23, sr_net_loss#24] + : : : +- *(6) Filter (isnotnull(sr_returned_date_sk#26) && isnotnull(sr_store_sk#25)) + : : : +- *(6) FileScan parquet default.store_returns[sr_returned_date_sk#26,sr_store_sk#25,sr_return_amt#23,sr_net_loss#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) + : : +- *(4) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [s_store_sk#16] + : +- *(5) Filter isnotnull(s_store_sk#16) + : +- *(5) FileScan parquet default.store[s_store_sk#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + :- *(15) Project [sales#27, returns#28, CheckOverflow((promote_precision(cast(profit#29 as decimal(18,2))) - promote_precision(cast(profit_loss#30 as decimal(18,2)))), DecimalType(18,2)) AS profit#31, catalog channel AS channel#32, cs_call_center_sk#33 AS id#34] + : +- BroadcastNestedLoopJoin BuildLeft, Inner + : :- BroadcastExchange IdentityBroadcastMode + : : +- *(11) HashAggregate(keys=[cs_call_center_sk#33], functions=[sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))]) + : : +- Exchange hashpartitioning(cs_call_center_sk#33, 200) + : : +- *(10) HashAggregate(keys=[cs_call_center_sk#33], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))]) + : : +- *(10) Project [cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] + : : +- *(10) BroadcastHashJoin [cs_sold_date_sk#37], [d_date_sk#21], Inner, BuildRight + : : :- *(10) Project [cs_sold_date_sk#37, cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] + : : : +- *(10) Filter isnotnull(cs_sold_date_sk#37) + : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#37,cs_call_center_sk#33,cs_ext_sales_price#35,cs_net_profit#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#21], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(23) Project [sales#41, coalesce(returns#42, 0.00) AS returns#43, CheckOverflow((promote_precision(cast(profit#44 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#45, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#46, web channel AS channel#47, wp_web_page_sk#48 AS id#49] + +- *(23) BroadcastHashJoin [wp_web_page_sk#48], [wp_web_page_sk#50], LeftOuter, BuildRight + :- *(23) HashAggregate(keys=[wp_web_page_sk#48], functions=[sum(UnscaledValue(ws_ext_sales_price#51)), sum(UnscaledValue(ws_net_profit#52))]) + : +- Exchange hashpartitioning(wp_web_page_sk#48, 200) + : +- *(18) HashAggregate(keys=[wp_web_page_sk#48], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#51)), partial_sum(UnscaledValue(ws_net_profit#52))]) + : +- *(18) Project [ws_ext_sales_price#51, ws_net_profit#52, wp_web_page_sk#48] + : +- *(18) BroadcastHashJoin [ws_web_page_sk#53], [wp_web_page_sk#48], Inner, BuildRight + : :- *(18) Project [ws_web_page_sk#53, ws_ext_sales_price#51, ws_net_profit#52] + : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#54], [d_date_sk#21], Inner, BuildRight + : : :- *(18) Project [ws_sold_date_sk#54, ws_web_page_sk#53, ws_ext_sales_price#51, ws_net_profit#52] + : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#54) && isnotnull(ws_web_page_sk#53)) + : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#54,ws_web_page_sk#53,ws_ext_sales_price#51,ws_net_profit#52] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(22) HashAggregate(keys=[wp_web_page_sk#50], functions=[sum(UnscaledValue(wr_return_amt#55)), sum(UnscaledValue(wr_net_loss#56))]) + +- Exchange hashpartitioning(wp_web_page_sk#50, 200) + +- *(21) HashAggregate(keys=[wp_web_page_sk#50], functions=[partial_sum(UnscaledValue(wr_return_amt#55)), partial_sum(UnscaledValue(wr_net_loss#56))]) + +- *(21) Project [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#50] + +- *(21) BroadcastHashJoin [wr_web_page_sk#57], [cast(wp_web_page_sk#50 as bigint)], Inner, BuildRight + :- *(21) Project [wr_web_page_sk#57, wr_return_amt#55, wr_net_loss#56] + : +- *(21) BroadcastHashJoin [wr_returned_date_sk#58], [cast(d_date_sk#21 as bigint)], Inner, BuildRight + : :- *(21) Project [wr_returned_date_sk#58, wr_web_page_sk#57, wr_return_amt#55, wr_net_loss#56] + : : +- *(21) Filter (isnotnull(wr_returned_date_sk#58) && isnotnull(wr_web_page_sk#57)) + : : +- *(21) FileScan parquet default.web_returns[wr_returned_date_sk#58,wr_web_page_sk#57,wr_return_amt#55,wr_net_loss#56] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt new file mode 100644 index 000000000..1c29af5ef --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt @@ -0,0 +1,141 @@ +TakeOrderedAndProject [channel,profit,id,sales,returns] + WholeStageCodegen + HashAggregate [channel,sum,sum,id,sum(profit),spark_grouping_id,sum(sales),sum,sum(returns)] [sum,profit,sum,sum(profit),sales,sum(sales),sum,returns,sum(returns)] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen + HashAggregate [profit,channel,sum,sum,sales,sum,sum,id,sum,spark_grouping_id,returns,sum] [sum,sum,sum,sum,sum,sum] + Expand [profit,channel,sales,id,returns] + InputAdapter + Union + WholeStageCodegen + Project [profit,s_store_sk,sales,returns,profit_loss] + BroadcastHashJoin [s_store_sk,s_store_sk] + HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),sum,sum,s_store_sk,sum(UnscaledValue(ss_net_profit))] [sales,sum(UnscaledValue(ss_ext_sales_price)),sum,sum,sum(UnscaledValue(ss_net_profit)),profit] + InputAdapter + Exchange [s_store_sk] #2 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,s_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),s_store_sk,sum] [sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),profit_loss,returns,sum] + InputAdapter + Exchange [s_store_sk] #6 + WholeStageCodegen + HashAggregate [sum,sum,sr_return_amt,s_store_sk,sum,sum,sr_net_loss] [sum,sum,sum,sum] + Project [sr_return_amt,sr_net_loss,s_store_sk] + BroadcastHashJoin [sr_store_sk,s_store_sk] + Project [sr_store_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + Filter [sr_returned_date_sk,sr_store_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] + WholeStageCodegen + Project [sales,returns,profit,cs_call_center_sk,profit_loss] + InputAdapter + BroadcastNestedLoopJoin + BroadcastExchange #9 + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(cs_net_profit)),sum,sum(UnscaledValue(cs_ext_sales_price)),cs_call_center_sk] [sales,sum,profit,sum(UnscaledValue(cs_net_profit)),sum,sum(UnscaledValue(cs_ext_sales_price))] + InputAdapter + Exchange [cs_call_center_sk] #10 + WholeStageCodegen + HashAggregate [sum,sum,cs_net_profit,sum,cs_ext_sales_price,cs_call_center_sk,sum] [sum,sum,sum,sum] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + WholeStageCodegen + HashAggregate [sum,sum,sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss))] [profit_loss,sum,returns,sum(UnscaledValue(cr_return_amount)),sum,sum(UnscaledValue(cr_net_loss))] + InputAdapter + Exchange #11 + WholeStageCodegen + HashAggregate [cr_return_amount,sum,sum,sum,cr_net_loss,sum] [sum,sum,sum,sum] + Project [cr_return_amount,cr_net_loss] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cr_returned_date_sk,cr_return_amount,cr_net_loss] + Filter [cr_returned_date_sk] + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss] [cr_returned_date_sk,cr_return_amount,cr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + WholeStageCodegen + Project [profit,returns,sales,wp_web_page_sk,profit_loss] + BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] + HashAggregate [sum(UnscaledValue(ws_ext_sales_price)),sum,sum(UnscaledValue(ws_net_profit)),sum,wp_web_page_sk] [sum(UnscaledValue(ws_ext_sales_price)),profit,sales,sum,sum(UnscaledValue(ws_net_profit)),sum] + InputAdapter + Exchange [wp_web_page_sk] #12 + WholeStageCodegen + HashAggregate [sum,sum,ws_ext_sales_price,ws_net_profit,sum,sum,wp_web_page_sk] [sum,sum,sum,sum] + Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + Filter [ws_sold_date_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [wp_web_page_sk] + Filter [wp_web_page_sk] + Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + HashAggregate [sum,wp_web_page_sk,sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),sum] [sum,returns,sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),profit_loss,sum] + InputAdapter + Exchange [wp_web_page_sk] #15 + WholeStageCodegen + HashAggregate [sum,wp_web_page_sk,wr_return_amt,sum,wr_net_loss,sum,sum] [sum,sum,sum,sum] + Project [wr_return_amt,wr_net_loss,wp_web_page_sk] + BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] + Project [wr_web_page_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] + Filter [wr_returned_date_sk,wr_web_page_sk] + Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #7 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen + Project [wp_web_page_sk] + Filter [wp_web_page_sk] + Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt new file mode 100644 index 000000000..70080e59a --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt @@ -0,0 +1,61 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC NULLS LAST,ss_wc#3 DESC NULLS LAST,ss_sp#4 DESC NULLS LAST,other_chan_qty#5 ASC NULLS FIRST,other_chan_wholesale_cost#6 ASC NULLS FIRST,other_chan_sales_price#7 ASC NULLS FIRST,round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) ASC NULLS FIRST], output=[ratio#1,store_qty#10,store_wholesale_cost#11,store_sales_price#12,other_chan_qty#5,other_chan_wholesale_cost#6,other_chan_sales_price#7]) ++- *(12) Project [round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) AS ratio#1, ss_qty#2 AS store_qty#10, ss_wc#3 AS store_wholesale_cost#11, ss_sp#4 AS store_sales_price#12, (coalesce(ws_qty#8, 0) + coalesce(cs_qty#9, 0)) AS other_chan_qty#5, CheckOverflow((promote_precision(cast(coalesce(ws_wc#13, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#6, CheckOverflow((promote_precision(cast(coalesce(ws_sp#15, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#16, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#7, cs_qty#9, ws_qty#8, ss_qty#2, ss_sp#4, ss_wc#3] + +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [cs_sold_year#20, cs_item_sk#21, cs_customer_sk#22], Inner, BuildRight + :- *(12) Project [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19, ss_qty#2, ss_wc#3, ss_sp#4, ws_qty#8, ws_wc#13, ws_sp#15] + : +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [ws_sold_year#23, ws_item_sk#24, ws_customer_sk#25], Inner, BuildRight + : :- *(12) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[sum(cast(ss_quantity#27 as bigint)), sum(UnscaledValue(ss_wholesale_cost#28)), sum(UnscaledValue(ss_sales_price#29))]) + : : +- Exchange hashpartitioning(d_year#26, ss_item_sk#18, ss_customer_sk#19, 200) + : : +- *(3) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[partial_sum(cast(ss_quantity#27 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#28)), partial_sum(UnscaledValue(ss_sales_price#29))]) + : : +- *(3) Project [ss_item_sk#18, ss_customer_sk#19, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29, d_year#26] + : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight + : : :- *(3) Project [ss_sold_date_sk#30, ss_item_sk#18, ss_customer_sk#19, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29] + : : : +- *(3) Filter isnull(sr_ticket_number#32) + : : : +- *(3) BroadcastHashJoin [cast(ss_ticket_number#33 as bigint), cast(ss_item_sk#18 as bigint)], [sr_ticket_number#32, sr_item_sk#34], LeftOuter, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#30, ss_item_sk#18, ss_customer_sk#19, ss_ticket_number#33, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29] + : : : : +- *(3) Filter ((isnotnull(ss_sold_date_sk#30) && isnotnull(ss_item_sk#18)) && isnotnull(ss_customer_sk#19)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_item_sk#18,ss_customer_sk#19,ss_ticket_number#33,ss_quantity#27,ss_wholesale_cost#28,ss_sales_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#31, d_year#26] + : : +- *(2) Filter ((isnotnull(d_year#26) && (d_year#26 = 2000)) && isnotnull(d_date_sk#31)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#31,d_year#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + : +- *(7) Filter (coalesce(ws_qty#8, 0) > 0) + : +- *(7) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[sum(cast(ws_quantity#36 as bigint)), sum(UnscaledValue(ws_wholesale_cost#37)), sum(UnscaledValue(ws_sales_price#38))]) + : +- Exchange hashpartitioning(d_year#26, ws_item_sk#24, ws_bill_customer_sk#35, 200) + : +- *(6) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[partial_sum(cast(ws_quantity#36 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#37)), partial_sum(UnscaledValue(ws_sales_price#38))]) + : +- *(6) Project [ws_item_sk#24, ws_bill_customer_sk#35, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38, d_year#26] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#39], [d_date_sk#31], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] + : : +- *(6) Filter isnull(wr_order_number#40) + : : +- *(6) BroadcastHashJoin [cast(ws_order_number#41 as bigint), cast(ws_item_sk#24 as bigint)], [wr_order_number#40, wr_item_sk#42], LeftOuter, BuildRight + : : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_order_number#41, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] + : : : +- *(6) Filter ((isnotnull(ws_sold_date_sk#39) && isnotnull(ws_item_sk#24)) && isnotnull(ws_bill_customer_sk#35)) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#39,ws_item_sk#24,ws_bill_customer_sk#35,ws_order_number#41,ws_quantity#36,ws_wholesale_cost#37,ws_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + +- *(11) Filter (coalesce(cs_qty#9, 0) > 0) + +- *(11) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[sum(cast(cs_quantity#44 as bigint)), sum(UnscaledValue(cs_wholesale_cost#45)), sum(UnscaledValue(cs_sales_price#46))]) + +- Exchange hashpartitioning(d_year#26, cs_item_sk#21, cs_bill_customer_sk#43, 200) + +- *(10) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[partial_sum(cast(cs_quantity#44 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#45)), partial_sum(UnscaledValue(cs_sales_price#46))]) + +- *(10) Project [cs_bill_customer_sk#43, cs_item_sk#21, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46, d_year#26] + +- *(10) BroadcastHashJoin [cs_sold_date_sk#47], [d_date_sk#31], Inner, BuildRight + :- *(10) Project [cs_sold_date_sk#47, cs_bill_customer_sk#43, cs_item_sk#21, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46] + : +- *(10) Filter isnull(cr_order_number#48) + : +- *(10) BroadcastHashJoin [cs_order_number#49, cs_item_sk#21], [cr_order_number#48, cr_item_sk#50], LeftOuter, BuildRight + : :- *(10) Project [cs_sold_date_sk#47, cs_bill_customer_sk#43, cs_item_sk#21, cs_order_number#49, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46] + : : +- *(10) Filter ((isnotnull(cs_sold_date_sk#47) && isnotnull(cs_item_sk#21)) && isnotnull(cs_bill_customer_sk#43)) + : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#47,cs_bill_customer_sk#43,cs_item_sk#21,cs_order_number#49,cs_quantity#44,cs_wholesale_cost#45,cs_sales_price#46] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt new file mode 100644 index 000000000..64b375073 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt @@ -0,0 +1,81 @@ +TakeOrderedAndProject [cs_qty,store_wholesale_cost,store_sales_price,ws_qty,ss_qty,ss_sp,other_chan_qty,store_qty,ratio,other_chan_sales_price,other_chan_wholesale_cost,ss_wc] + WholeStageCodegen + Project [ws_wc,cs_qty,ws_qty,cs_sp,ss_qty,ss_sp,ws_sp,cs_wc,ss_wc] + BroadcastHashJoin [ss_item_sk,ss_customer_sk,cs_customer_sk,cs_item_sk,cs_sold_year,ss_sold_year] + Project [ss_customer_sk,ws_sp,ws_wc,ss_wc,ss_sold_year,ss_sp,ws_qty,ss_qty,ss_item_sk] + BroadcastHashJoin [ss_item_sk,ss_customer_sk,ws_sold_year,ws_item_sk,ws_customer_sk,ss_sold_year] + HashAggregate [d_year,ss_customer_sk,sum(UnscaledValue(ss_sales_price)),sum,sum,sum(cast(ss_quantity as bigint)),sum,sum(UnscaledValue(ss_wholesale_cost)),ss_item_sk] [sum(UnscaledValue(ss_sales_price)),sum,ss_wc,sum,sum(cast(ss_quantity as bigint)),ss_sold_year,ss_sp,ss_qty,sum,sum(UnscaledValue(ss_wholesale_cost))] + InputAdapter + Exchange [d_year,ss_item_sk,ss_customer_sk] #1 + WholeStageCodegen + HashAggregate [d_year,sum,ss_wholesale_cost,ss_customer_sk,sum,sum,ss_sales_price,sum,sum,ss_quantity,sum,ss_item_sk] [sum,sum,sum,sum,sum,sum] + Project [ss_quantity,ss_item_sk,d_year,ss_customer_sk,ss_sales_price,ss_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] + Filter [sr_ticket_number] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] + Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Filter [ws_qty] + HashAggregate [d_year,sum(cast(ws_quantity as bigint)),ws_item_sk,sum(UnscaledValue(ws_sales_price)),ws_bill_customer_sk,sum,sum,sum(UnscaledValue(ws_wholesale_cost)),sum] [sum(cast(ws_quantity as bigint)),sum(UnscaledValue(ws_sales_price)),ws_sp,ws_wc,sum,ws_sold_year,sum,sum(UnscaledValue(ws_wholesale_cost)),sum,ws_customer_sk,ws_qty] + InputAdapter + Exchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 + WholeStageCodegen + HashAggregate [d_year,ws_wholesale_cost,ws_item_sk,sum,ws_bill_customer_sk,sum,sum,sum,sum,sum,ws_sales_price,ws_quantity] [sum,sum,sum,sum,sum,sum] + Project [d_year,ws_quantity,ws_wholesale_cost,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_quantity,ws_wholesale_cost,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + Filter [wr_order_number] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + Project [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + Filter [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] + Scan parquet default.web_sales [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [wr_item_sk,wr_order_number] + Filter [wr_order_number,wr_item_sk] + Scan parquet default.web_returns [wr_item_sk,wr_order_number] [wr_item_sk,wr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Filter [cs_qty] + HashAggregate [d_year,sum,sum(UnscaledValue(cs_sales_price)),sum(cast(cs_quantity as bigint)),sum(UnscaledValue(cs_wholesale_cost)),cs_item_sk,cs_bill_customer_sk,sum,sum] [cs_qty,cs_wc,sum,sum(UnscaledValue(cs_sales_price)),sum(cast(cs_quantity as bigint)),cs_sold_year,cs_sp,sum(UnscaledValue(cs_wholesale_cost)),sum,cs_customer_sk,sum] + InputAdapter + Exchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 + WholeStageCodegen + HashAggregate [d_year,sum,sum,sum,cs_item_sk,cs_bill_customer_sk,sum,sum,cs_sales_price,cs_quantity,sum,cs_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [cs_wholesale_cost,cs_quantity,d_year,cs_bill_customer_sk,cs_sales_price,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk] + Filter [cr_order_number] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + Filter [cs_sold_date_sk,cs_item_sk,cs_bill_customer_sk] + Scan parquet default.catalog_sales [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [cr_item_sk,cr_order_number] + Filter [cr_order_number,cr_item_sk] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] [cr_item_sk,cr_order_number] + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt new file mode 100644 index 000000000..c0fa08c66 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt @@ -0,0 +1,32 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,substring(s_city#3, 1, 30) ASC NULLS FIRST,profit#4 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,substring(s_city, 1, 30)#5,ss_ticket_number#6,amt#7,profit#4]) ++- *(6) Project [c_last_name#1, c_first_name#2, substring(s_city#3, 1, 30) AS substring(s_city, 1, 30)#5, ss_ticket_number#6, amt#7, profit#4, s_city#3] + +- *(6) BroadcastHashJoin [ss_customer_sk#8], [c_customer_sk#9], Inner, BuildRight + :- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#11)), sum(UnscaledValue(ss_net_profit#12))]) + : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3, 200) + : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#11)), partial_sum(UnscaledValue(ss_net_profit#12))]) + : +- *(4) Project [ss_customer_sk#8, ss_addr_sk#10, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12, s_city#3] + : +- *(4) BroadcastHashJoin [ss_hdemo_sk#13], [hd_demo_sk#14], Inner, BuildRight + : :- *(4) Project [ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12, s_city#3] + : : +- *(4) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : : :- *(4) Project [ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_store_sk#15, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#17, ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_store_sk#15, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12] + : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) && isnotnull(ss_hdemo_sk#13)) && isnotnull(ss_customer_sk#8)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#8,ss_hdemo_sk#13,ss_addr_sk#10,ss_store_sk#15,ss_ticket_number#6,ss_coupon_amt#11,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#16, s_city#3] + : : +- *(2) Filter (((isnotnull(s_number_employees#21) && (s_number_employees#21 >= 200)) && (s_number_employees#21 <= 295)) && isnotnull(s_store_sk#16)) + : : +- *(2) FileScan parquet default.store[s_store_sk#16,s_number_employees#21,s_city#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_num..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [hd_demo_sk#14] + : +- *(3) Filter (((hd_dep_count#22 = 6) || (hd_vehicle_count#23 > 2)) && isnotnull(hd_demo_sk#14)) + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#14,hd_dep_count#22,hd_vehicle_count#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] + +- *(5) Filter isnotnull(c_customer_sk#9) + +- *(5) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt new file mode 100644 index 000000000..cee3e4c79 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [profit,amt,substring(s_city, 1, 30),s_city,c_last_name,ss_ticket_number,c_first_name] + WholeStageCodegen + Project [profit,amt,s_city,c_last_name,ss_ticket_number,c_first_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [sum(UnscaledValue(ss_net_profit)),ss_customer_sk,sum(UnscaledValue(ss_coupon_amt)),sum,sum,s_city,ss_ticket_number,ss_addr_sk] [amt,sum(UnscaledValue(ss_net_profit)),profit,sum(UnscaledValue(ss_coupon_amt)),sum,sum] + InputAdapter + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city] #1 + WholeStageCodegen + HashAggregate [sum,ss_customer_sk,ss_coupon_amt,sum,sum,sum,s_city,ss_ticket_number,ss_addr_sk,ss_net_profit] [sum,sum,sum,sum] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,s_city,ss_ticket_number] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,s_city,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_dow,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_dow] [d_date_sk,d_year,d_dow] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk,s_city] + Filter [s_number_employees,s_store_sk] + Scan parquet default.store [s_store_sk,s_number_employees,s_city] [s_store_sk,s_number_employees,s_city] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt new file mode 100644 index 000000000..8ae1eb34e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt @@ -0,0 +1,45 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST], output=[s_store_name#1,sum(ss_net_profit)#2]) ++- *(9) HashAggregate(keys=[s_store_name#1], functions=[sum(UnscaledValue(ss_net_profit#3))]) + +- Exchange hashpartitioning(s_store_name#1, 200) + +- *(8) HashAggregate(keys=[s_store_name#1], functions=[partial_sum(UnscaledValue(ss_net_profit#3))]) + +- *(8) Project [ss_net_profit#3, s_store_name#1] + +- *(8) BroadcastHashJoin [substring(s_zip#4, 1, 2)], [substring(ca_zip#5, 1, 2)], Inner, BuildRight + :- *(8) Project [ss_net_profit#3, s_store_name#1, s_zip#4] + : +- *(8) BroadcastHashJoin [ss_store_sk#6], [s_store_sk#7], Inner, BuildRight + : :- *(8) Project [ss_store_sk#6, ss_net_profit#3] + : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + : : :- *(8) Project [ss_sold_date_sk#8, ss_store_sk#6, ss_net_profit#3] + : : : +- *(8) Filter (isnotnull(ss_sold_date_sk#8) && isnotnull(ss_store_sk#6)) + : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#8,ss_store_sk#6,ss_net_profit#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#9] + : : +- *(1) Filter ((((isnotnull(d_qoy#10) && isnotnull(d_year#11)) && (d_qoy#10 = 2)) && (d_year#11 = 1998)) && isnotnull(d_date_sk#9)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#9,d_year#11,d_qoy#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [s_store_sk#7, s_store_name#1, s_zip#4] + : +- *(2) Filter (isnotnull(s_store_sk#7) && isnotnull(s_zip#4)) + : +- *(2) FileScan parquet default.store[s_store_sk#7,s_store_name#1,s_zip#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(substring(input[0, string, true], 1, 2))) + +- *(7) HashAggregate(keys=[ca_zip#5], functions=[]) + +- Exchange hashpartitioning(ca_zip#5, 200) + +- *(6) HashAggregate(keys=[ca_zip#5], functions=[]) + +- *(6) BroadcastHashJoin [coalesce(ca_zip#5, )], [coalesce(ca_zip#12, )], LeftSemi, BuildRight, (ca_zip#5 <=> ca_zip#12) + :- *(6) Project [substring(ca_zip#13, 1, 5) AS ca_zip#5] + : +- *(6) Filter (substring(ca_zip#13, 1, 5) INSET (56910,69952,63792,39371,74351,11101,25003,97189,57834,73134,62377,51200,32754,22752,86379,14171,91110,40162,98569,28709,13394,66162,25733,25782,26065,18383,51949,87343,50298,83849,33786,64528,23470,67030,46136,25280,46820,77721,99076,18426,31880,17871,98235,45748,49156,18652,72013,51622,43848,78567,41248,13695,44165,67853,54917,53179,64034,10567,71791,68908,55565,59402,64147,85816,57855,61547,27700,68100,28810,58263,15723,83933,51103,58058,90578,82276,81096,81426,96451,77556,38607,76638,18906,62971,57047,48425,35576,11928,30625,83444,73520,51650,57647,60099,30122,94983,24128,10445,41368,26233,26859,21756,24676,19849,36420,38193,58470,39127,13595,87501,24317,15455,69399,98025,81019,48033,11376,39516,67875,92712,14867,38122,29741,42961,30469,51211,56458,15559,16021,33123,33282,33515,72823,54601,76698,56240,72175,60279,20004,68806,72325,28488,43933,50412,45200,22246,78668,79777,96765,67301,73273,49448,82636,23932,47305,29839,39192,18799,61265,37125,58943,64457,88424,24610,84935,89360,68893,30431,28898,10336,90257,59166,46081,26105,96888,36634,86284,35258,39972,22927,73241,53268,24206,27385,99543,31671,14663,30903,39861,24996,63089,88086,83921,21076,67897,66708,45721,60576,25103,52867,30450,36233,30010,96576,73171,56571,56575,64544,13955,78451,43285,18119,16725,83041,76107,79994,54364,35942,56691,19769,63435,34102,18845,22744,13354,75691,45549,23968,31387,83144,13375,15765,28577,88190,19736,73650,37930,25989,83926,94898,51798,39736,22437,55253,38415,71256,18376,42029,25858,44438,19515,38935,51649,71954,15882,18767,63193,25486,49130,37126,40604,34425,17043,12305,11634,26653,94167,36446,10516,67473,66864,72425,63981,18842,22461,42666,47770,69035,70372,28587,45266,15371,15798,45375,90225,16807,31016,68014,21337,19505,50016,10144,84093,21286,19430,34322,91068,94945,72305,24671,58048,65084,28545,21195,20548,22245,77191,96976,48583,76231,15734,61810,11356,68621,68786,98359,41367,26689,69913,76614,68101,88885,50308,79077,18270,28915,29178,53672,62878,10390,14922,68341,56529,41766,68309,56616,15126,61860,97789,11489,45692,41918,72151,72550,27156,36495,70738,17879,53535,17920,68880,78890,35850,14089,58078,65164,27068,26231,13376,57665,32213,77610,87816,21309,15146,86198,91137,55307,67467,40558,94627,82136,22351,89091,20260,23006,91393,47537,62496,98294,18840,71286,81312,31029,70466,35458,14060,22685,28286,25631,19512,40081,63837,14328,35474,22152,76232,51061,86057,17183) && isnotnull(substring(ca_zip#13, 1, 5))) + : +- *(6) FileScan parquet default.customer_address[ca_zip#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ))) + +- *(5) Project [ca_zip#12] + +- *(5) Filter (count(1)#14 > 10) + +- *(5) HashAggregate(keys=[ca_zip#13], functions=[count(1)]) + +- Exchange hashpartitioning(ca_zip#13, 200) + +- *(4) HashAggregate(keys=[ca_zip#13], functions=[partial_count(1)]) + +- *(4) Project [ca_zip#13] + +- *(4) BroadcastHashJoin [ca_address_sk#15], [c_current_addr_sk#16], Inner, BuildRight + :- *(4) Project [ca_address_sk#15, ca_zip#13] + : +- *(4) Filter isnotnull(ca_address_sk#15) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#15,ca_zip#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [c_current_addr_sk#16] + +- *(3) Filter ((isnotnull(c_preferred_cust_flag#17) && (c_preferred_cust_flag#17 = Y)) && isnotnull(c_current_addr_sk#16)) + +- *(3) FileScan parquet default.customer[c_current_addr_sk#16,c_preferred_cust_flag#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt new file mode 100644 index 000000000..855f6d67d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt @@ -0,0 +1,61 @@ +TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] + WholeStageCodegen + HashAggregate [s_store_name,sum,sum(UnscaledValue(ss_net_profit))] [sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit),sum] + InputAdapter + Exchange [s_store_name] #1 + WholeStageCodegen + HashAggregate [s_store_name,ss_net_profit,sum,sum] [sum,sum] + Project [ss_net_profit,s_store_name] + BroadcastHashJoin [s_zip,ca_zip] + Project [ss_net_profit,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_net_profit] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_net_profit] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [d_date_sk] + Filter [d_qoy,d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_zip] + Filter [s_store_sk,s_zip] + Scan parquet default.store [s_store_sk,s_store_name,s_zip] [s_store_sk,s_store_name,s_zip] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + HashAggregate [ca_zip] + InputAdapter + Exchange [ca_zip] #5 + WholeStageCodegen + HashAggregate [ca_zip] + BroadcastHashJoin [ca_zip,ca_zip] + Project [ca_zip] + Filter [ca_zip] + Scan parquet default.customer_address [ca_zip] [ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ca_zip] + Filter [count(1)] + HashAggregate [ca_zip,count,count(1)] [count(1),ca_zip,count(1),count] + InputAdapter + Exchange [ca_zip] #7 + WholeStageCodegen + HashAggregate [ca_zip,count,count] [count,count] + Project [ca_zip] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Project [ca_address_sk,ca_zip] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_zip] [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [c_current_addr_sk] + Filter [c_preferred_cust_flag,c_current_addr_sk] + Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag] [c_current_addr_sk,c_preferred_cust_flag] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt new file mode 100644 index 000000000..ec6c9ab88 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt @@ -0,0 +1,97 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) ++- *(23) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 200) + +- *(22) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) + +- *(22) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] + +- Union + :- *(7) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(ss_ext_sales_price#13)), sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- Exchange hashpartitioning(s_store_id#12, 200) + : +- *(6) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#13)), partial_sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- *(6) Project [ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] + : +- *(6) BroadcastHashJoin [ss_promo_sk#17], [p_promo_sk#18], Inner, BuildRight + : :- *(6) Project [ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] + : : +- *(6) BroadcastHashJoin [ss_item_sk#19], [i_item_sk#20], Inner, BuildRight + : : :- *(6) Project [ss_item_sk#19, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] + : : : +- *(6) BroadcastHashJoin [ss_store_sk#21], [s_store_sk#22], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16] + : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight + : : : : :- *(6) Project [ss_sold_date_sk#23, ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16] + : : : : : +- *(6) BroadcastHashJoin [cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#25 as bigint)], [sr_item_sk#26, sr_ticket_number#27], LeftOuter, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#23, ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ticket_number#25, ss_ext_sales_price#13, ss_net_profit#15] + : : : : : : +- *(6) Filter (((isnotnull(ss_sold_date_sk#23) && isnotnull(ss_store_sk#21)) && isnotnull(ss_item_sk#19)) && isnotnull(ss_promo_sk#17)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#23,ss_item_sk#19,ss_store_sk#21,ss_promo_sk#17,ss_ticket_number#25,ss_ext_sales_price#13,ss_net_profit#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)], ReadSchema: struct= 11192)) && (d_date#28 <= 11222)) && isnotnull(d_date_sk#24)) + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#24,d_date#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), Is..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [s_store_sk#22, s_store_id#12] + : : : +- *(3) Filter isnotnull(s_store_sk#22) + : : : +- *(3) FileScan parquet default.store[s_store_sk#22,s_store_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [i_item_sk#20] + : : +- *(4) Filter ((isnotnull(i_current_price#29) && (i_current_price#29 > 50.00)) && isnotnull(i_item_sk#20)) + : : +- *(4) FileScan parquet default.item[i_item_sk#20,i_current_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [p_promo_sk#18] + : +- *(5) Filter ((isnotnull(p_channel_tv#30) && (p_channel_tv#30 = N)) && isnotnull(p_promo_sk#18)) + : +- *(5) FileScan parquet default.promotion[p_promo_sk#18,p_channel_tv#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)], ReadSchema: struct + :- *(14) HashAggregate(keys=[cp_catalog_page_id#31], functions=[sum(UnscaledValue(cs_ext_sales_price#32)), sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- Exchange hashpartitioning(cp_catalog_page_id#31, 200) + : +- *(13) HashAggregate(keys=[cp_catalog_page_id#31], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#32)), partial_sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- *(13) Project [cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] + : +- *(13) BroadcastHashJoin [cs_promo_sk#36], [p_promo_sk#18], Inner, BuildRight + : :- *(13) Project [cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] + : : +- *(13) BroadcastHashJoin [cs_item_sk#37], [i_item_sk#20], Inner, BuildRight + : : :- *(13) Project [cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] + : : : +- *(13) BroadcastHashJoin [cs_catalog_page_sk#38], [cp_catalog_page_sk#39], Inner, BuildRight + : : : :- *(13) Project [cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35] + : : : : +- *(13) BroadcastHashJoin [cs_sold_date_sk#40], [d_date_sk#24], Inner, BuildRight + : : : : :- *(13) Project [cs_sold_date_sk#40, cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35] + : : : : : +- *(13) BroadcastHashJoin [cs_item_sk#37, cs_order_number#41], [cr_item_sk#42, cr_order_number#43], LeftOuter, BuildRight + : : : : : :- *(13) Project [cs_sold_date_sk#40, cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_order_number#41, cs_ext_sales_price#32, cs_net_profit#34] + : : : : : : +- *(13) Filter (((isnotnull(cs_sold_date_sk#40) && isnotnull(cs_catalog_page_sk#38)) && isnotnull(cs_item_sk#37)) && isnotnull(cs_promo_sk#36)) + : : : : : : +- *(13) FileScan parquet default.catalog_sales[cs_sold_date_sk#40,cs_catalog_page_sk#38,cs_item_sk#37,cs_promo_sk#36,cs_order_number#41,cs_ext_sales_price#32,cs_net_profit#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_p..., ReadSchema: struct + : : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(10) Project [cp_catalog_page_sk#39, cp_catalog_page_id#31] + : : : +- *(10) Filter isnotnull(cp_catalog_page_sk#39) + : : : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#39,cp_catalog_page_id#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(21) HashAggregate(keys=[web_site_id#44], functions=[sum(UnscaledValue(ws_ext_sales_price#45)), sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + +- Exchange hashpartitioning(web_site_id#44, 200) + +- *(20) HashAggregate(keys=[web_site_id#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#45)), partial_sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + +- *(20) Project [ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] + +- *(20) BroadcastHashJoin [ws_promo_sk#49], [p_promo_sk#18], Inner, BuildRight + :- *(20) Project [ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] + : +- *(20) BroadcastHashJoin [ws_item_sk#50], [i_item_sk#20], Inner, BuildRight + : :- *(20) Project [ws_item_sk#50, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] + : : +- *(20) BroadcastHashJoin [ws_web_site_sk#51], [web_site_sk#52], Inner, BuildRight + : : :- *(20) Project [ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48] + : : : +- *(20) BroadcastHashJoin [ws_sold_date_sk#53], [d_date_sk#24], Inner, BuildRight + : : : :- *(20) Project [ws_sold_date_sk#53, ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48] + : : : : +- *(20) BroadcastHashJoin [cast(ws_item_sk#50 as bigint), cast(ws_order_number#54 as bigint)], [wr_item_sk#55, wr_order_number#56], LeftOuter, BuildRight + : : : : :- *(20) Project [ws_sold_date_sk#53, ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_order_number#54, ws_ext_sales_price#45, ws_net_profit#47] + : : : : : +- *(20) Filter (((isnotnull(ws_sold_date_sk#53) && isnotnull(ws_web_site_sk#51)) && isnotnull(ws_item_sk#50)) && isnotnull(ws_promo_sk#49)) + : : : : : +- *(20) FileScan parquet default.web_sales[ws_sold_date_sk#53,ws_item_sk#50,ws_web_site_sk#51,ws_promo_sk#49,ws_order_number#54,ws_ext_sales_price#45,ws_net_profit#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo..., ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(17) Project [web_site_sk#52, web_site_id#44] + : : +- *(17) Filter isnotnull(web_site_sk#52) + : : +- *(17) FileScan parquet default.web_site[web_site_sk#52,web_site_id#44] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt new file mode 100644 index 000000000..e16daec1c --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt @@ -0,0 +1,133 @@ +TakeOrderedAndProject [profit,sales,id,channel,returns] + WholeStageCodegen + HashAggregate [sum,sum(sales),sum(returns),spark_grouping_id,id,channel,sum(profit),sum,sum] [profit,sum,sales,sum(sales),sum(returns),returns,sum(profit),sum,sum] + InputAdapter + Exchange [channel,id,spark_grouping_id] #1 + WholeStageCodegen + HashAggregate [profit,sum,sum,returns,spark_grouping_id,sum,id,channel,sales,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Expand [profit,id,returns,sales,channel] + InputAdapter + Union + WholeStageCodegen + HashAggregate [sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),s_store_id,sum,sum(UnscaledValue(ss_ext_sales_price)),sum] [id,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sales,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),profit,sum,returns,sum(UnscaledValue(ss_ext_sales_price)),channel,sum] + InputAdapter + Exchange [s_store_id] #2 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,sr_return_amt,s_store_id,ss_ext_sales_price,sum,sr_net_loss,ss_net_profit,sum] [sum,sum,sum,sum,sum,sum] + Project [s_store_id,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [s_store_id,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_id,ss_item_sk,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk,s_store_id] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [i_item_sk] + Filter [i_current_price,i_item_sk] + Scan parquet default.item [i_item_sk,i_current_price] [i_item_sk,i_current_price] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [p_promo_sk] + Filter [p_channel_tv,p_promo_sk] + Scan parquet default.promotion [p_promo_sk,p_channel_tv] [p_promo_sk,p_channel_tv] + WholeStageCodegen + HashAggregate [sum(UnscaledValue(cs_ext_sales_price)),cp_catalog_page_id,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] [sum(UnscaledValue(cs_ext_sales_price)),sales,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),returns,id,profit] + InputAdapter + Exchange [cp_catalog_page_id] #8 + WholeStageCodegen + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum,cr_return_amount,sum,cs_net_profit,sum,cr_net_loss,cs_ext_sales_price] [sum,sum,sum,sum,sum,sum] + Project [cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_net_profit,cs_ext_sales_price] + BroadcastHashJoin [cs_promo_sk,p_promo_sk] + Project [cs_promo_sk,cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_net_profit,cs_ext_sales_price] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [cs_promo_sk,cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] + BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] + Project [cs_promo_sk,cr_net_loss,cs_catalog_page_sk,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_promo_sk,cr_net_loss,cs_catalog_page_sk,cs_sold_date_sk,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] + BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + Project [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] + Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk] + Scan parquet default.catalog_sales [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + Filter [cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [cp_catalog_page_sk,cp_catalog_page_id] + Filter [cp_catalog_page_sk] + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] [cp_catalog_page_sk,cp_catalog_page_id] + InputAdapter + ReusedExchange [i_item_sk] [i_item_sk] #6 + InputAdapter + ReusedExchange [p_promo_sk] [p_promo_sk] #7 + WholeStageCodegen + HashAggregate [web_site_id,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] [sum,sales,id,returns,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),profit,sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),channel] + InputAdapter + Exchange [web_site_id] #11 + WholeStageCodegen + HashAggregate [web_site_id,sum,sum,sum,sum,ws_ext_sales_price,ws_net_profit,sum,wr_return_amt,wr_net_loss,sum] [sum,sum,sum,sum,sum,sum] + Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss] + BroadcastHashJoin [ws_promo_sk,p_promo_sk] + Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk,ws_item_sk] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk,ws_item_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_web_site_sk,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_sold_date_sk,ws_promo_sk,ws_item_sk] + BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] + Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk] + Scan parquet default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + Filter [wr_item_sk,wr_order_number] + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [web_site_sk,web_site_id] + Filter [web_site_sk] + Scan parquet default.web_site [web_site_sk,web_site_id] [web_site_sk,web_site_id] + InputAdapter + ReusedExchange [i_item_sk] [i_item_sk] #6 + InputAdapter + ReusedExchange [p_promo_sk] [p_promo_sk] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt new file mode 100644 index 000000000..92713c532 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt @@ -0,0 +1,52 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salutation#2 ASC NULLS FIRST,c_first_name#3 ASC NULLS FIRST,c_last_name#4 ASC NULLS FIRST,ca_street_number#5 ASC NULLS FIRST,ca_street_name#6 ASC NULLS FIRST,ca_street_type#7 ASC NULLS FIRST,ca_suite_number#8 ASC NULLS FIRST,ca_city#9 ASC NULLS FIRST,ca_county#10 ASC NULLS FIRST,ca_state#11 ASC NULLS FIRST,ca_zip#12 ASC NULLS FIRST,ca_country#13 ASC NULLS FIRST,ca_gmt_offset#14 ASC NULLS FIRST,ca_location_type#15 ASC NULLS FIRST,ctr_total_return#16 ASC NULLS FIRST], output=[c_customer_id#1,c_salutation#2,c_first_name#3,c_last_name#4,ca_street_number#5,ca_street_name#6,ca_street_type#7,ca_suite_number#8,ca_city#9,ca_county#10,ca_state#11,ca_zip#12,ca_country#13,ca_gmt_offset#14,ca_location_type#15,ctr_total_return#16]) ++- *(11) Project [c_customer_id#1, c_salutation#2, c_first_name#3, c_last_name#4, ca_street_number#5, ca_street_name#6, ca_street_type#7, ca_suite_number#8, ca_city#9, ca_county#10, ca_state#11, ca_zip#12, ca_country#13, ca_gmt_offset#14, ca_location_type#15, ctr_total_return#16] + +- *(11) BroadcastHashJoin [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight + :- *(11) Project [ctr_total_return#16, c_customer_id#1, c_current_addr_sk#17, c_salutation#2, c_first_name#3, c_last_name#4] + : +- *(11) BroadcastHashJoin [ctr_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : :- *(11) Project [ctr_customer_sk#19, ctr_total_return#16] + : : +- *(11) BroadcastHashJoin [ctr_state#21], [ctr_state#21#22], Inner, BuildRight, (cast(ctr_total_return#16 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) + : : :- *(11) Filter isnotnull(ctr_total_return#16) + : : : +- *(11) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 200) + : : : +- *(3) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : : +- *(3) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] + : : : +- *(3) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight + : : : :- *(3) Project [cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : : +- *(3) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight + : : : : :- *(3) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : : : +- *(3) Filter ((isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) && isnotnull(cr_returning_customer_sk#24)) + : : : : : +- *(3) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#18, ca_state#11] + : : : +- *(2) Filter (isnotnull(ca_address_sk#18) && isnotnull(ca_state#11)) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#18,ca_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) + : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) + : : +- *(8) HashAggregate(keys=[ctr_state#21], functions=[avg(ctr_total_return#16)]) + : : +- Exchange hashpartitioning(ctr_state#21, 200) + : : +- *(7) HashAggregate(keys=[ctr_state#21], functions=[partial_avg(ctr_total_return#16)]) + : : +- *(7) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 200) + : : +- *(6) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : +- *(6) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] + : : +- *(6) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight + : : :- *(6) Project [cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : +- *(6) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight + : : : :- *(6) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : : +- *(6) Filter (isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) + : : : : +- *(6) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)], ReadSchema: struct= 62.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 92.00)) && i_manufact_id#9 IN (129,270,821,423)) && isnotnull(i_item_sk#4)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), In(i_manufact_id, [129,27..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) + : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#7] + : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 11102)) && (d_date#11 <= 11162)) && isnotnull(d_date_sk#7)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), Is..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [ss_item_sk#5] + +- *(3) Filter isnotnull(ss_item_sk#5) + +- *(3) FileScan parquet default.store_sales[ss_item_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt new file mode 100644 index 000000000..e7c48e44c --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,i_current_price] + InputAdapter + Exchange [i_item_id,i_item_desc,i_current_price] #1 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,i_current_price] + Project [i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [i_current_price,i_item_sk,inv_date_sk,i_item_desc,i_item_id] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + Filter [i_current_price,i_manufact_id,i_item_sk] + Scan parquet default.item [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [inv_date_sk,inv_item_sk] + Filter [inv_quantity_on_hand,inv_item_sk,inv_date_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ss_item_sk] + Filter [ss_item_sk] + Scan parquet default.store_sales [ss_item_sk] [ss_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt new file mode 100644 index 000000000..46662f99a --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt @@ -0,0 +1,69 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,sr_item_qty#2 ASC NULLS FIRST], output=[item_id#1,sr_item_qty#2,sr_dev#3,cr_item_qty#4,cr_dev#5,wr_item_qty#6,wr_dev#7,average#8]) ++- *(18) Project [item_id#1, sr_item_qty#2, (((cast(sr_item_qty#2 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS sr_dev#3, cr_item_qty#4, (((cast(cr_item_qty#4 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS cr_dev#5, wr_item_qty#6, (((cast(wr_item_qty#6 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS wr_dev#7, CheckOverflow((promote_precision(cast(cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as decimal(20,0)) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#8] + +- *(18) BroadcastHashJoin [item_id#1], [item_id#9], Inner, BuildRight + :- *(18) Project [item_id#1, sr_item_qty#2, cr_item_qty#4] + : +- *(18) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight + : :- *(18) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(sr_return_quantity#12 as bigint))]) + : : +- Exchange hashpartitioning(i_item_id#11, 200) + : : +- *(5) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(sr_return_quantity#12 as bigint))]) + : : +- *(5) Project [sr_return_quantity#12, i_item_id#11] + : : +- *(5) BroadcastHashJoin [sr_returned_date_sk#13], [cast(d_date_sk#14 as bigint)], Inner, BuildRight + : : :- *(5) Project [sr_returned_date_sk#13, sr_return_quantity#12, i_item_id#11] + : : : +- *(5) BroadcastHashJoin [sr_item_sk#15], [cast(i_item_sk#16 as bigint)], Inner, BuildRight + : : : :- *(5) Project [sr_returned_date_sk#13, sr_item_sk#15, sr_return_quantity#12] + : : : : +- *(5) Filter (isnotnull(sr_item_sk#15) && isnotnull(sr_returned_date_sk#13)) + : : : : +- *(5) FileScan parquet default.store_returns[sr_returned_date_sk#13,sr_item_sk#15,sr_return_quantity#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#16, i_item_id#11] + : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) + : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [d_date_sk#14] + : : +- *(4) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight + : : :- *(4) Project [d_date_sk#14, d_date#17] + : : : +- *(4) Filter isnotnull(d_date_sk#14) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + : : +- *(3) Project [d_date#17 AS d_date#17#18] + : : +- *(3) BroadcastHashJoin [d_week_seq#19], [d_week_seq#19#20], LeftSemi, BuildRight + : : :- *(3) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_week_seq#19 AS d_week_seq#19#20] + : : +- *(2) Filter cast(d_date#17 as string) IN (2000-06-30,2000-09-27,2000-11-17) + : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(11) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(cr_return_quantity#21 as bigint))]) + : +- Exchange hashpartitioning(i_item_id#11, 200) + : +- *(10) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(cr_return_quantity#21 as bigint))]) + : +- *(10) Project [cr_return_quantity#21, i_item_id#11] + : +- *(10) BroadcastHashJoin [cr_returned_date_sk#22], [d_date_sk#14], Inner, BuildRight + : :- *(10) Project [cr_returned_date_sk#22, cr_return_quantity#21, i_item_id#11] + : : +- *(10) BroadcastHashJoin [cr_item_sk#23], [i_item_sk#16], Inner, BuildRight + : : :- *(10) Project [cr_returned_date_sk#22, cr_item_sk#23, cr_return_quantity#21] + : : : +- *(10) Filter (isnotnull(cr_item_sk#23) && isnotnull(cr_returned_date_sk#22)) + : : : +- *(10) FileScan parquet default.catalog_returns[cr_returned_date_sk#22,cr_item_sk#23,cr_return_quantity#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_returned_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [i_item_sk#16, i_item_id#11] + : : +- *(6) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) + : : +- *(6) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(9) Project [d_date_sk#14] + : +- *(9) BroadcastHashJoin [d_date#17], [d_date#17#24], LeftSemi, BuildRight + : :- *(9) Project [d_date_sk#14, d_date#17] + : : +- *(9) Filter isnotnull(d_date_sk#14) + : : +- *(9) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date#17#24], BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(17) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(wr_return_quantity#25 as bigint))]) + +- Exchange hashpartitioning(i_item_id#11, 200) + +- *(16) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(wr_return_quantity#25 as bigint))]) + +- *(16) Project [wr_return_quantity#25, i_item_id#11] + +- *(16) BroadcastHashJoin [wr_returned_date_sk#26], [cast(d_date_sk#14 as bigint)], Inner, BuildRight + :- *(16) Project [wr_returned_date_sk#26, wr_return_quantity#25, i_item_id#11] + : +- *(16) BroadcastHashJoin [wr_item_sk#27], [cast(i_item_sk#16 as bigint)], Inner, BuildRight + : :- *(16) Project [wr_returned_date_sk#26, wr_item_sk#27, wr_return_quantity#25] + : : +- *(16) Filter (isnotnull(wr_item_sk#27) && isnotnull(wr_returned_date_sk#26)) + : : +- *(16) FileScan parquet default.web_returns[wr_returned_date_sk#26,wr_item_sk#27,wr_return_quantity#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_returned_date_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [d_date_sk#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt new file mode 100644 index 000000000..027dc649e --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt @@ -0,0 +1,94 @@ +TakeOrderedAndProject [wr_item_qty,cr_item_qty,wr_dev,sr_dev,sr_item_qty,average,item_id,cr_dev] + WholeStageCodegen + Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty] + BroadcastHashJoin [item_id,item_id] + Project [item_id,sr_item_qty,cr_item_qty] + BroadcastHashJoin [item_id,item_id] + HashAggregate [i_item_id,sum,sum(cast(sr_return_quantity as bigint))] [sum(cast(sr_return_quantity as bigint)),item_id,sr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #1 + WholeStageCodegen + HashAggregate [i_item_id,sr_return_quantity,sum,sum] [sum,sum] + Project [sr_return_quantity,i_item_id] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_returned_date_sk,sr_return_quantity,i_item_id] + BroadcastHashJoin [sr_item_sk,i_item_sk] + Project [sr_returned_date_sk,sr_item_sk,sr_return_quantity] + Filter [sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_return_quantity] [sr_returned_date_sk,sr_item_sk,sr_return_quantity] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk,i_item_id] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Project [d_date_sk,d_date] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date] + BroadcastHashJoin [d_week_seq,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(cast(cr_return_quantity as bigint))] [sum(cast(cr_return_quantity as bigint)),item_id,cr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #7 + WholeStageCodegen + HashAggregate [i_item_id,cr_return_quantity,sum,sum] [sum,sum] + Project [cr_return_quantity,i_item_id] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cr_returned_date_sk,cr_return_quantity,i_item_id] + BroadcastHashJoin [cr_item_sk,i_item_sk] + Project [cr_returned_date_sk,cr_item_sk,cr_return_quantity] + Filter [cr_item_sk,cr_returned_date_sk] + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_item_sk,cr_return_quantity] [cr_returned_date_sk,cr_item_sk,cr_return_quantity] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_sk,i_item_id] + Filter [i_item_sk,i_item_id] + Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Project [d_date_sk,d_date] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + ReusedExchange [d_date] [d_date] #4 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(cast(wr_return_quantity as bigint))] [sum(cast(wr_return_quantity as bigint)),item_id,wr_item_qty,sum] + InputAdapter + Exchange [i_item_id] #11 + WholeStageCodegen + HashAggregate [i_item_id,wr_return_quantity,sum,sum] [sum,sum] + Project [wr_return_quantity,i_item_id] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_returned_date_sk,wr_return_quantity,i_item_id] + BroadcastHashJoin [wr_item_sk,i_item_sk] + Project [wr_returned_date_sk,wr_item_sk,wr_return_quantity] + Filter [wr_item_sk,wr_returned_date_sk] + Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_return_quantity] [wr_returned_date_sk,wr_item_sk,wr_return_quantity] + InputAdapter + ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #2 + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt new file mode 100644 index 000000000..c0cde7a5d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt @@ -0,0 +1,35 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], output=[customer_id#2,customername#3]) ++- *(6) Project [c_customer_id#1 AS customer_id#2, concat(c_last_name#4, , , c_first_name#5) AS customername#3, c_customer_id#1] + +- *(6) BroadcastHashJoin [cast(cd_demo_sk#6 as bigint)], [sr_cdemo_sk#7], Inner, BuildRight + :- *(6) Project [c_customer_id#1, c_first_name#5, c_last_name#4, cd_demo_sk#6] + : +- *(6) BroadcastHashJoin [hd_income_band_sk#8], [ib_income_band_sk#9], Inner, BuildRight + : :- *(6) Project [c_customer_id#1, c_first_name#5, c_last_name#4, cd_demo_sk#6, hd_income_band_sk#8] + : : +- *(6) BroadcastHashJoin [c_current_hdemo_sk#10], [hd_demo_sk#11], Inner, BuildRight + : : :- *(6) Project [c_customer_id#1, c_current_hdemo_sk#10, c_first_name#5, c_last_name#4, cd_demo_sk#6] + : : : +- *(6) BroadcastHashJoin [c_current_cdemo_sk#12], [cd_demo_sk#6], Inner, BuildRight + : : : :- *(6) Project [c_customer_id#1, c_current_cdemo_sk#12, c_current_hdemo_sk#10, c_first_name#5, c_last_name#4] + : : : : +- *(6) BroadcastHashJoin [c_current_addr_sk#13], [ca_address_sk#14], Inner, BuildRight + : : : : :- *(6) Project [c_customer_id#1, c_current_cdemo_sk#12, c_current_hdemo_sk#10, c_current_addr_sk#13, c_first_name#5, c_last_name#4] + : : : : : +- *(6) Filter ((isnotnull(c_current_addr_sk#13) && isnotnull(c_current_cdemo_sk#12)) && isnotnull(c_current_hdemo_sk#10)) + : : : : : +- *(6) FileScan parquet default.customer[c_customer_id#1,c_current_cdemo_sk#12,c_current_hdemo_sk#10,c_current_addr_sk#13,c_first_name#5,c_last_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [cd_demo_sk#6] + : : : +- *(2) Filter isnotnull(cd_demo_sk#6) + : : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [hd_demo_sk#11, hd_income_band_sk#8] + : : +- *(3) Filter (isnotnull(hd_demo_sk#11) && isnotnull(hd_income_band_sk#8)) + : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#11,hd_income_band_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ib_income_band_sk#9] + : +- *(4) Filter ((((isnotnull(ib_lower_bound#16) && isnotnull(ib_upper_bound#17)) && (ib_lower_bound#16 >= 38128)) && (ib_upper_bound#17 <= 88128)) && isnotnull(ib_income_band_sk#9)) + : +- *(4) FileScan parquet default.income_band[ib_income_band_sk#9,ib_lower_bound#16,ib_upper_bound#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), ..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) + +- *(5) Project [sr_cdemo_sk#7] + +- *(5) Filter isnotnull(sr_cdemo_sk#7) + +- *(5) FileScan parquet default.store_returns[sr_cdemo_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_cdemo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt new file mode 100644 index 000000000..95001b56d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt @@ -0,0 +1,45 @@ +TakeOrderedAndProject [c_customer_id,customer_id,customername] + WholeStageCodegen + Project [c_customer_id,c_last_name,c_first_name] + BroadcastHashJoin [cd_demo_sk,sr_cdemo_sk] + Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk] + BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] + Project [cd_demo_sk,c_customer_id,hd_income_band_sk,c_last_name,c_first_name] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [cd_demo_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + Scan parquet default.customer [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_city,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk] [cd_demo_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [hd_demo_sk,hd_income_band_sk] + Filter [hd_demo_sk,hd_income_band_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ib_income_band_sk] + Filter [ib_lower_bound,ib_upper_bound,ib_income_band_sk] + Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [sr_cdemo_sk] + Filter [sr_cdemo_sk] + Scan parquet default.store_returns [sr_cdemo_sk] [sr_cdemo_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt new file mode 100644 index 000000000..560b1eba0 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt @@ -0,0 +1,50 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[substring(r_reason_desc, 1, 20)#1 ASC NULLS FIRST,aggOrder#2 ASC NULLS FIRST,avg(wr_refunded_cash)#3 ASC NULLS FIRST,avg(wr_fee)#4 ASC NULLS FIRST], output=[substring(r_reason_desc, 1, 20)#1,avg(ws_quantity)#5,avg(wr_refunded_cash)#3,avg(wr_fee)#4]) ++- *(9) HashAggregate(keys=[r_reason_desc#6], functions=[avg(cast(ws_quantity#7 as bigint)), avg(UnscaledValue(wr_refunded_cash#8)), avg(UnscaledValue(wr_fee#9))]) + +- Exchange hashpartitioning(r_reason_desc#6, 200) + +- *(8) HashAggregate(keys=[r_reason_desc#6], functions=[partial_avg(cast(ws_quantity#7 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#8)), partial_avg(UnscaledValue(wr_fee#9))]) + +- *(8) Project [ws_quantity#7, wr_fee#9, wr_refunded_cash#8, r_reason_desc#6] + +- *(8) BroadcastHashJoin [wr_reason_sk#10], [cast(r_reason_sk#11 as bigint)], Inner, BuildRight + :- *(8) Project [ws_quantity#7, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : +- *(8) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : +- *(8) BroadcastHashJoin [wr_refunded_addr_sk#14], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight, ((((ca_state#16 IN (IN,OH,NJ) && (ws_net_profit#17 >= 100.00)) && (ws_net_profit#17 <= 200.00)) || ((ca_state#16 IN (WI,CT,KY) && (ws_net_profit#17 >= 150.00)) && (ws_net_profit#17 <= 300.00))) || ((ca_state#16 IN (LA,IA,AR) && (ws_net_profit#17 >= 50.00)) && (ws_net_profit#17 <= 250.00))) + : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_net_profit#17, wr_refunded_addr_sk#14, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : : +- *(8) BroadcastHashJoin [wr_returning_cdemo_sk#18, cd_marital_status#19, cd_education_status#20], [cast(cd_demo_sk#21 as bigint), cd_marital_status#22, cd_education_status#23], Inner, BuildRight + : : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_net_profit#17, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8, cd_marital_status#19, cd_education_status#20] + : : : : +- *(8) BroadcastHashJoin [wr_refunded_cdemo_sk#24], [cast(cd_demo_sk#25 as bigint)], Inner, BuildRight, ((((((cd_marital_status#19 = M) && (cd_education_status#20 = Advanced Degree)) && (ws_sales_price#26 >= 100.00)) && (ws_sales_price#26 <= 150.00)) || ((((cd_marital_status#19 = S) && (cd_education_status#20 = College)) && (ws_sales_price#26 >= 50.00)) && (ws_sales_price#26 <= 100.00))) || ((((cd_marital_status#19 = W) && (cd_education_status#20 = 2 yr Degree)) && (ws_sales_price#26 >= 150.00)) && (ws_sales_price#26 <= 200.00))) + : : : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_sales_price#26, ws_net_profit#17, wr_refunded_cdemo_sk#24, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : : : : +- *(8) BroadcastHashJoin [ws_web_page_sk#27], [wp_web_page_sk#28], Inner, BuildRight + : : : : : :- *(8) Project [ws_sold_date_sk#12, ws_web_page_sk#27, ws_quantity#7, ws_sales_price#26, ws_net_profit#17, wr_refunded_cdemo_sk#24, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : : : : : +- *(8) BroadcastHashJoin [cast(ws_item_sk#29 as bigint), cast(ws_order_number#30 as bigint)], [wr_item_sk#31, wr_order_number#32], Inner, BuildRight + : : : : : : :- *(8) Project [ws_sold_date_sk#12, ws_item_sk#29, ws_web_page_sk#27, ws_order_number#30, ws_quantity#7, ws_sales_price#26, ws_net_profit#17] + : : : : : : : +- *(8) Filter (((isnotnull(ws_item_sk#29) && isnotnull(ws_order_number#30)) && isnotnull(ws_web_page_sk#27)) && isnotnull(ws_sold_date_sk#12)) + : : : : : : : +- *(8) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#29,ws_web_page_sk#27,ws_order_number#30,ws_quantity#7,ws_sales_price#26,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [cd_demo_sk#25, cd_marital_status#19, cd_education_status#20] + : : : : +- *(3) Filter ((isnotnull(cd_demo_sk#25) && isnotnull(cd_education_status#20)) && isnotnull(cd_marital_status#19)) + : : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, string, true], input[2, string, true])) + : : : +- *(4) Project [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + : : : +- *(4) Filter ((isnotnull(cd_education_status#23) && isnotnull(cd_marital_status#22)) && isnotnull(cd_demo_sk#21)) + : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [ca_address_sk#15, ca_state#16] + : : +- *(5) Filter ((isnotnull(ca_country#33) && (ca_country#33 = United States)) && isnotnull(ca_address_sk#15)) + : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [d_date_sk#13] + : +- *(6) Filter ((isnotnull(d_year#34) && (d_year#34 = 2000)) && isnotnull(d_date_sk#13)) + : +- *(6) FileScan parquet default.date_dim[d_date_sk#13,d_year#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [r_reason_sk#11, r_reason_desc#6] + +- *(7) Filter isnotnull(r_reason_sk#11) + +- *(7) FileScan parquet default.reason[r_reason_sk#11,r_reason_desc#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt new file mode 100644 index 000000000..b9fdfc26f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt @@ -0,0 +1,66 @@ +TakeOrderedAndProject [avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_desc, 1, 20),avg(wr_refunded_cash)] + WholeStageCodegen + HashAggregate [r_reason_desc,count,sum,avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),sum,count,count,avg(cast(ws_quantity as bigint)),sum] [count,avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_desc, 1, 20),sum,avg(UnscaledValue(wr_fee)),avg(wr_refunded_cash),avg(UnscaledValue(wr_refunded_cash)),sum,count,count,avg(cast(ws_quantity as bigint)),sum] + InputAdapter + Exchange [r_reason_desc] #1 + WholeStageCodegen + HashAggregate [r_reason_desc,count,wr_refunded_cash,sum,count,wr_fee,sum,sum,sum,count,count,count,count,sum,ws_quantity,sum] [count,sum,count,sum,sum,sum,count,count,count,count,sum,sum] + Project [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] + BroadcastHashJoin [wr_reason_sk,r_reason_sk] + Project [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk,wr_fee] + BroadcastHashJoin [wr_refunded_addr_sk,ca_address_sk,ca_state,ws_net_profit] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,wr_refunded_cash,ws_sold_date_sk,wr_fee] + BroadcastHashJoin [cd_education_status,cd_demo_sk,wr_returning_cdemo_sk,cd_education_status,cd_marital_status,cd_marital_status] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,cd_marital_status,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,cd_education_status] + BroadcastHashJoin [cd_education_status,ws_sales_price,cd_demo_sk,cd_marital_status,wr_refunded_cdemo_sk] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,ws_sales_price] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_web_page_sk,ws_quantity,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,ws_sales_price] + BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + Project [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] + Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] + Filter [wr_item_sk,wr_reason_sk,wr_order_number,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_refunded_cdemo_sk] + Scan parquet default.web_returns [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [wp_web_page_sk] + Filter [wp_web_page_sk] + Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_education_status,cd_demo_sk,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ca_address_sk,ca_state] + Filter [ca_country,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] [ca_address_sk,ca_state,ca_country] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [r_reason_sk,r_reason_desc] + Filter [r_reason_sk] + Scan parquet default.reason [r_reason_sk,r_reason_desc] [r_reason_sk,r_reason_desc] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt new file mode 100644 index 000000000..9a562efa3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt @@ -0,0 +1,25 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN i_category#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[total_sum#4,i_category#2,i_class#5,lochierarchy#1,rank_within_parent#3]) ++- *(6) Project [total_sum#4, i_category#2, i_class#5, lochierarchy#1, rank_within_parent#3] + +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] + +- *(5) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 + +- Exchange hashpartitioning(_w1#7, _w2#8, 200) + +- *(4) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ws_net_paid#10))]) + +- Exchange hashpartitioning(i_category#2, i_class#5, spark_grouping_id#9, 200) + +- *(3) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ws_net_paid#10))]) + +- *(3) Expand [List(ws_net_paid#10, i_category#11, i_class#12, 0), List(ws_net_paid#10, i_category#11, null, 1), List(ws_net_paid#10, null, null, 3)], [ws_net_paid#10, i_category#2, i_class#5, spark_grouping_id#9] + +- *(3) Project [ws_net_paid#10, i_category#13 AS i_category#11, i_class#14 AS i_class#12] + +- *(3) BroadcastHashJoin [ws_item_sk#15], [i_item_sk#16], Inner, BuildRight + :- *(3) Project [ws_item_sk#15, ws_net_paid#10] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#17, ws_item_sk#15, ws_net_paid#10] + : : +- *(3) Filter (isnotnull(ws_sold_date_sk#17) && isnotnull(ws_item_sk#15)) + : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#17,ws_item_sk#15,ws_net_paid#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#18] + : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#16, i_class#14, i_category#13] + +- *(2) Filter isnotnull(i_item_sk#16) + +- *(2) FileScan parquet default.item[i_item_sk#16,i_class#14,i_category#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt new file mode 100644 index 000000000..71aae564f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt @@ -0,0 +1,35 @@ +TakeOrderedAndProject [i_category,total_sum,rank_within_parent,lochierarchy,i_class] + WholeStageCodegen + Project [i_category,total_sum,rank_within_parent,lochierarchy,i_class] + InputAdapter + Window [_w3,_w1,_w2] + WholeStageCodegen + Sort [_w1,_w2,_w3] + InputAdapter + Exchange [_w1,_w2] #1 + WholeStageCodegen + HashAggregate [i_category,sum(UnscaledValue(ws_net_paid)),sum,i_class,spark_grouping_id] [_w1,total_sum,sum(UnscaledValue(ws_net_paid)),sum,_w2,lochierarchy,_w3] + InputAdapter + Exchange [i_category,i_class,spark_grouping_id] #2 + WholeStageCodegen + HashAggregate [i_category,sum,ws_net_paid,i_class,sum,spark_grouping_id] [sum,sum] + Expand [ws_net_paid,i_category,i_class] + Project [ws_net_paid,i_category,i_class] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_net_paid] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_net_paid] + Filter [ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid] [ws_sold_date_sk,ws_item_sk,ws_net_paid] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_item_sk,i_class,i_category] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_class,i_category] [i_item_sk,i_class,i_category] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt new file mode 100644 index 000000000..2868a7ac8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt @@ -0,0 +1,54 @@ +== Physical Plan == +*(13) HashAggregate(keys=[], functions=[count(1)]) ++- Exchange SinglePartition + +- *(12) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#4, ), coalesce(c_first_name#5, ), coalesce(d_date#6, 0)], LeftAnti, BuildRight, (((c_last_name#1 <=> c_last_name#4) && (c_first_name#2 <=> c_first_name#5)) && (d_date#3 <=> d_date#6)) + :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftAnti, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) + : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 200) + : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] + : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight + : : :- *(3) Project [ss_customer_sk#10, d_date#3] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] + : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#13, d_date#3] + : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] + : : +- *(2) Filter isnotnull(c_customer_sk#11) + : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 200) + : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] + : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight + : :- *(6) Project [cs_bill_customer_sk#15, d_date#9] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] + : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 200) + +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] + +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + :- *(10) Project [ws_bill_customer_sk#19, d_date#6] + : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] + : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) + : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt new file mode 100644 index 000000000..c5008bdd8 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt @@ -0,0 +1,74 @@ +WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_first_name,d_date,d_date,c_last_name,c_last_name,c_first_name] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [d_date,c_last_name,c_first_name,c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_customer_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] [ss_sold_date_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] [cs_sold_date_sk,cs_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_bill_customer_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt new file mode 100644 index 000000000..d729f0b3f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt @@ -0,0 +1,165 @@ +== Physical Plan == +BroadcastNestedLoopJoin BuildRight, Inner +:- BroadcastNestedLoopJoin BuildRight, Inner +: :- BroadcastNestedLoopJoin BuildRight, Inner +: : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : : : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : : : : :- *(5) HashAggregate(keys=[], functions=[count(1)]) +: : : : : : : +- Exchange SinglePartition +: : : : : : : +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : : : : +- *(4) Project +: : : : : : : +- *(4) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : : : : :- *(4) Project [ss_store_sk#1] +: : : : : : : : +- *(4) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : : : : :- *(4) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : : : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : : : : :- *(4) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : : : +- *(1) Project [hd_demo_sk#6] +: : : : : : : : : +- *(1) Filter (((((hd_dep_count#7 = 4) && (hd_vehicle_count#8 <= 6)) || ((hd_dep_count#7 = 2) && (hd_vehicle_count#8 <= 4))) || ((hd_dep_count#7 = 0) && (hd_vehicle_count#8 <= 2))) && isnotnull(hd_demo_sk#6)) +: : : : : : : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#6,hd_dep_count#7,hd_vehicle_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,..., ReadSchema: struct +: : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : : +- *(2) Project [t_time_sk#4] +: : : : : : : : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 8)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : : : : : : : +- *(2) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct +: : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : +- *(3) Project [s_store_sk#2] +: : : : : : : +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#2)) +: : : : : : : +- *(3) FileScan parquet default.store[s_store_sk#2,s_store_name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct +: : : : : : +- BroadcastExchange IdentityBroadcastMode +: : : : : : +- *(10) HashAggregate(keys=[], functions=[count(1)]) +: : : : : : +- Exchange SinglePartition +: : : : : : +- *(9) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : : : +- *(9) Project +: : : : : : +- *(9) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : : : :- *(9) Project [ss_store_sk#1] +: : : : : : : +- *(9) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : : : :- *(9) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : : : +- *(9) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : : : :- *(9) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : : : +- *(9) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : : : +- *(9) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : +- *(7) Project [t_time_sk#4] +: : : : : : : +- *(7) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) +: : : : : : : +- *(7) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_ti..., ReadSchema: struct +: : : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : +- BroadcastExchange IdentityBroadcastMode +: : : : : +- *(15) HashAggregate(keys=[], functions=[count(1)]) +: : : : : +- Exchange SinglePartition +: : : : : +- *(14) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : : +- *(14) Project +: : : : : +- *(14) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : : :- *(14) Project [ss_store_sk#1] +: : : : : : +- *(14) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : : :- *(14) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : : +- *(14) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : : :- *(14) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : : +- *(14) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : +- *(12) Project [t_time_sk#4] +: : : : : : +- *(12) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : : : : : +- *(12) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct +: : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : +- BroadcastExchange IdentityBroadcastMode +: : : : +- *(20) HashAggregate(keys=[], functions=[count(1)]) +: : : : +- Exchange SinglePartition +: : : : +- *(19) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : +- *(19) Project +: : : : +- *(19) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : :- *(19) Project [ss_store_sk#1] +: : : : : +- *(19) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : :- *(19) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : +- *(19) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : :- *(19) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : +- *(19) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : +- *(19) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : +- *(17) Project [t_time_sk#4] +: : : : : +- *(17) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) +: : : : : +- *(17) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct +: : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : +- BroadcastExchange IdentityBroadcastMode +: : : +- *(25) HashAggregate(keys=[], functions=[count(1)]) +: : : +- Exchange SinglePartition +: : : +- *(24) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : +- *(24) Project +: : : +- *(24) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : :- *(24) Project [ss_store_sk#1] +: : : : +- *(24) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : :- *(24) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : +- *(24) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : :- *(24) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : +- *(24) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : +- *(22) Project [t_time_sk#4] +: : : : +- *(22) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : : : +- *(22) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct +: : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : +- BroadcastExchange IdentityBroadcastMode +: : +- *(30) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(29) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(29) Project +: : +- *(29) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : :- *(29) Project [ss_store_sk#1] +: : : +- *(29) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : :- *(29) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : +- *(29) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : :- *(29) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : +- *(29) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : +- *(29) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : +- *(27) Project [t_time_sk#4] +: : : +- *(27) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) +: : : +- *(27) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct +: : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: +- BroadcastExchange IdentityBroadcastMode +: +- *(35) HashAggregate(keys=[], functions=[count(1)]) +: +- Exchange SinglePartition +: +- *(34) HashAggregate(keys=[], functions=[partial_count(1)]) +: +- *(34) Project +: +- *(34) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: :- *(34) Project [ss_store_sk#1] +: : +- *(34) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : :- *(34) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : +- *(34) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : :- *(34) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : +- *(34) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : +- *(34) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : +- *(32) Project [t_time_sk#4] +: : +- *(32) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : +- *(32) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct +: +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) ++- BroadcastExchange IdentityBroadcastMode + +- *(40) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(39) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(39) Project + +- *(39) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight + :- *(39) Project [ss_store_sk#1] + : +- *(39) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight + : :- *(39) Project [ss_sold_time_sk#3, ss_store_sk#1] + : : +- *(39) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight + : : :- *(39) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] + : : : +- *(39) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) + : : : +- *(39) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(37) Project [t_time_sk#4] + : +- *(37) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 12)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) + : +- *(37) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct + +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt new file mode 100644 index 000000000..0a4bce5c6 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt @@ -0,0 +1,222 @@ +BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + BroadcastNestedLoopJoin + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h8_30_to_9,count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + Scan parquet default.store [s_store_sk,s_store_name] [s_store_sk,s_store_name] + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h9_to_9_30,count] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #8 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h9_30_to_10,count] + InputAdapter + Exchange #9 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #11 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h10_to_10_30,count] + InputAdapter + Exchange #12 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #14 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h10_30_to_11,count] + InputAdapter + Exchange #15 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #16 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #17 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h11_to_11_30,count] + InputAdapter + Exchange #18 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #19 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #20 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h11_30_to_12,count] + InputAdapter + Exchange #21 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #22 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 + BroadcastExchange #23 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),h12_to_12_30,count] + InputAdapter + Exchange #24 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #25 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + ReusedExchange [s_store_sk] [s_store_sk] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt new file mode 100644 index 000000000..4a69feebe --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt @@ -0,0 +1,31 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_class#5,i_brand#6,s_store_name#3,s_company_name#7,d_moy#8,sum_sales#1,avg_monthly_sales#2]) ++- *(7) Project [i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, sum_sales#1, avg_monthly_sales#2] + +- *(7) Filter (CASE WHEN NOT (avg_monthly_sales#2 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) + +- Window [avg(_w0#9) windowspecdefinition(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#6, s_store_name#3, s_company_name#7] + +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#6 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#7 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, 200) + +- *(5) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#10))]) + +- Exchange hashpartitioning(i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, 200) + +- *(4) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#10))]) + +- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_sales_price#10, d_moy#8, s_store_name#3, s_company_name#7] + +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight + :- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_store_sk#11, ss_sales_price#10, d_moy#8] + : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : :- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_sold_date_sk#13, ss_store_sk#11, ss_sales_price#10] + : : +- *(4) BroadcastHashJoin [i_item_sk#15], [ss_item_sk#16], Inner, BuildRight + : : :- *(4) Project [i_item_sk#15, i_brand#6, i_class#5, i_category#4] + : : : +- *(4) Filter (((i_category#4 IN (Books,Electronics,Sports) && i_class#5 IN (computers,stereo,football)) || (i_category#4 IN (Men,Jewelry,Women) && i_class#5 IN (shirts,birdal,dresses))) && isnotnull(i_item_sk#15)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#15,i_brand#6,i_class#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(In(i_category, [Books,Electronics,Sports]),In(i_class, [computers,stereo,football])),And(..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(1) Project [ss_sold_date_sk#13, ss_item_sk#16, ss_store_sk#11, ss_sales_price#10] + : : +- *(1) Filter ((isnotnull(ss_item_sk#16) && isnotnull(ss_sold_date_sk#13)) && isnotnull(ss_store_sk#11)) + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#16,ss_store_sk#11,ss_sales_price#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#14, d_moy#8] + : +- *(2) Filter ((isnotnull(d_year#17) && (d_year#17 = 1999)) && isnotnull(d_date_sk#14)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#14,d_year#17,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#12, s_store_name#3, s_company_name#7] + +- *(3) Filter isnotnull(s_store_sk#12) + +- *(3) FileScan parquet default.store[s_store_sk#12,s_store_name#3,s_company_name#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt new file mode 100644 index 000000000..31fd0675f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt @@ -0,0 +1,43 @@ +TakeOrderedAndProject [s_company_name,d_moy,sum_sales,i_brand,i_category,avg_monthly_sales,s_store_name,i_class] + WholeStageCodegen + Project [s_company_name,d_moy,sum_sales,i_brand,avg_monthly_sales,i_category,i_class,s_store_name] + Filter [avg_monthly_sales,sum_sales] + InputAdapter + Window [s_company_name,i_brand,i_category,s_store_name,_w0] + WholeStageCodegen + Sort [i_category,i_brand,s_store_name,s_company_name] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen + HashAggregate [s_company_name,d_moy,i_brand,sum(UnscaledValue(ss_sales_price)),i_category,sum,i_class,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [s_company_name,d_moy,i_brand,i_category,i_class,s_store_name] #2 + WholeStageCodegen + HashAggregate [s_company_name,d_moy,i_brand,ss_sales_price,i_category,sum,sum,i_class,s_store_name] [sum,sum] + Project [i_class,s_store_name,s_company_name,d_moy,i_category,ss_sales_price,i_brand] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_class,d_moy,ss_store_sk,i_category,ss_sales_price,i_brand] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_class,ss_store_sk,i_category,ss_sales_price,ss_sold_date_sk,i_brand] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_brand,i_class,i_category] + Filter [i_category,i_class,i_item_sk] + Scan parquet default.item [i_item_sk,i_brand,i_class,i_category] [i_item_sk,i_brand,i_class,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_moy] + Filter [d_year,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_company_name] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk,s_store_name,s_company_name] [s_store_sk,s_store_name,s_company_name] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt new file mode 100644 index 000000000..0ed6b1727 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt @@ -0,0 +1,109 @@ +== Physical Plan == +*(1) Project [CASE WHEN (Subquery subquery1150 > 62316685) THEN Subquery subquery1151 ELSE Subquery subquery1152 END AS bucket1#1, CASE WHEN (Subquery subquery1154 > 19045798) THEN Subquery subquery1155 ELSE Subquery subquery1156 END AS bucket2#2, CASE WHEN (Subquery subquery1158 > 365541424) THEN Subquery subquery1159 ELSE Subquery subquery1160 END AS bucket3#3, CASE WHEN (Subquery subquery1162 > 216357808) THEN Subquery subquery1163 ELSE Subquery subquery1164 END AS bucket4#4, CASE WHEN (Subquery subquery1166 > 184483884) THEN Subquery subquery1167 ELSE Subquery subquery1168 END AS bucket5#5] +: :- Subquery subquery1150 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: :- Subquery subquery1151 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: :- Subquery subquery1152 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: :- Subquery subquery1154 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: :- Subquery subquery1155 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: :- Subquery subquery1156 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: :- Subquery subquery1158 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: :- Subquery subquery1159 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: :- Subquery subquery1160 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: :- Subquery subquery1162 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: :- Subquery subquery1163 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: :- Subquery subquery1164 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: :- Subquery subquery1166 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct +: :- Subquery subquery1167 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct +: +- Subquery subquery1168 +: +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: +- Exchange SinglePartition +: +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: +- *(1) Project [ss_net_paid#8] +: +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) +: +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct ++- *(1) Filter (isnotnull(r_reason_sk#9) && (r_reason_sk#9 = 1)) + +- *(1) FileScan parquet default.reason[r_reason_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt new file mode 100644 index 000000000..1851df752 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt @@ -0,0 +1,154 @@ +WholeStageCodegen + Project + Subquery #1 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] + Subquery #2 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + InputAdapter + Exchange #2 + WholeStageCodegen + HashAggregate [sum,ss_ext_discount_amt,sum,count,count] [sum,count,sum,count] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Subquery #3 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + InputAdapter + Exchange #3 + WholeStageCodegen + HashAggregate [sum,count,ss_net_paid,sum,count] [sum,count,sum,count] + Project [ss_net_paid] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Subquery #4 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #4 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] + Subquery #5 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + InputAdapter + Exchange #5 + WholeStageCodegen + HashAggregate [count,sum,count,ss_ext_discount_amt,sum] [sum,count,sum,count] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Subquery #6 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [sum,sum,ss_net_paid,count,count] [sum,count,sum,count] + Project [ss_net_paid] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Subquery #7 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #7 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] + Subquery #8 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + InputAdapter + Exchange #8 + WholeStageCodegen + HashAggregate [sum,ss_ext_discount_amt,count,count,sum] [sum,count,sum,count] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Subquery #9 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + InputAdapter + Exchange #9 + WholeStageCodegen + HashAggregate [sum,count,count,sum,ss_net_paid] [sum,count,sum,count] + Project [ss_net_paid] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Subquery #10 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #10 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] + Subquery #11 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + InputAdapter + Exchange #11 + WholeStageCodegen + HashAggregate [ss_ext_discount_amt,sum,count,count,sum] [sum,count,sum,count] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Subquery #12 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + InputAdapter + Exchange #12 + WholeStageCodegen + HashAggregate [count,sum,ss_net_paid,count,sum] [sum,count,sum,count] + Project [ss_net_paid] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Subquery #13 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #13 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] + Subquery #14 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + InputAdapter + Exchange #14 + WholeStageCodegen + HashAggregate [sum,ss_ext_discount_amt,sum,count,count] [sum,count,sum,count] + Project [ss_ext_discount_amt] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Subquery #15 + WholeStageCodegen + HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + InputAdapter + Exchange #15 + WholeStageCodegen + HashAggregate [count,ss_net_paid,sum,count,sum] [sum,count,sum,count] + Project [ss_net_paid] + Filter [ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Filter [r_reason_sk] + Scan parquet default.reason [r_reason_sk] [r_reason_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt new file mode 100644 index 000000000..0d0c65f33 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt @@ -0,0 +1,47 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[am_pm_ratio#1 ASC NULLS FIRST], output=[am_pm_ratio#1]) ++- *(11) Project [CheckOverflow((promote_precision(cast(amc#2 as decimal(15,4))) / promote_precision(cast(pmc#3 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#1] + +- BroadcastNestedLoopJoin BuildRight, Inner + :- *(5) HashAggregate(keys=[], functions=[count(1)]) + : +- Exchange SinglePartition + : +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) + : +- *(4) Project + : +- *(4) BroadcastHashJoin [ws_web_page_sk#4], [wp_web_page_sk#5], Inner, BuildRight + : :- *(4) Project [ws_web_page_sk#4] + : : +- *(4) BroadcastHashJoin [ws_sold_time_sk#6], [t_time_sk#7], Inner, BuildRight + : : :- *(4) Project [ws_sold_time_sk#6, ws_web_page_sk#4] + : : : +- *(4) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight + : : : :- *(4) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] + : : : : +- *(4) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [hd_demo_sk#9] + : : : +- *(1) Filter ((isnotnull(hd_dep_count#10) && (hd_dep_count#10 = 6)) && isnotnull(hd_demo_sk#9)) + : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_dep_count#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [t_time_sk#7] + : : +- *(2) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 8)) && (t_hour#11 <= 9)) && isnotnull(t_time_sk#7)) + : : +- *(2) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [wp_web_page_sk#5] + : +- *(3) Filter (((isnotnull(wp_char_count#12) && (wp_char_count#12 >= 5000)) && (wp_char_count#12 <= 5200)) && isnotnull(wp_web_page_sk#5)) + : +- *(3) FileScan parquet default.web_page[wp_web_page_sk#5,wp_char_count#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,..., ReadSchema: struct + +- BroadcastExchange IdentityBroadcastMode + +- *(10) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(9) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(9) Project + +- *(9) BroadcastHashJoin [ws_web_page_sk#4], [wp_web_page_sk#5], Inner, BuildRight + :- *(9) Project [ws_web_page_sk#4] + : +- *(9) BroadcastHashJoin [ws_sold_time_sk#6], [t_time_sk#7], Inner, BuildRight + : :- *(9) Project [ws_sold_time_sk#6, ws_web_page_sk#4] + : : +- *(9) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight + : : :- *(9) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] + : : : +- *(9) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) + : : : +- *(9) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + : : +- ReusedExchange [hd_demo_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [t_time_sk#7] + : +- *(7) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 19)) && (t_hour#11 <= 20)) && isnotnull(t_time_sk#7)) + : +- *(7) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)], ReadSchema: struct + +- ReusedExchange [wp_web_page_sk#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt new file mode 100644 index 000000000..c14580e27 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt @@ -0,0 +1,64 @@ +TakeOrderedAndProject [am_pm_ratio] + WholeStageCodegen + Project [amc,pmc] + InputAdapter + BroadcastNestedLoopJoin + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),amc,count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour] [t_time_sk,t_hour] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [wp_web_page_sk] + Filter [wp_char_count,wp_web_page_sk] + Scan parquet default.web_page [wp_web_page_sk,wp_char_count] [wp_web_page_sk,wp_char_count] + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),pmc,count] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + InputAdapter + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour] [t_time_sk,t_hour] + InputAdapter + ReusedExchange [wp_web_page_sk] [wp_web_page_sk] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt new file mode 100644 index 000000000..90c7f4cff --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt @@ -0,0 +1,45 @@ +== Physical Plan == +*(9) Sort [Returns_Loss#1 DESC NULLS LAST], true, 0 ++- Exchange rangepartitioning(Returns_Loss#1 DESC NULLS LAST, 200) + +- *(8) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[sum(UnscaledValue(cr_net_loss#7))]) + +- Exchange hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6, 200) + +- *(7) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[partial_sum(UnscaledValue(cr_net_loss#7))]) + +- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#5, cd_education_status#6] + +- *(7) BroadcastHashJoin [c_current_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight + :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#8, cd_marital_status#5, cd_education_status#6] + : +- *(7) BroadcastHashJoin [c_current_cdemo_sk#10], [cd_demo_sk#11], Inner, BuildRight + : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#10, c_current_hdemo_sk#8] + : : +- *(7) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight + : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#10, c_current_hdemo_sk#8, c_current_addr_sk#12] + : : : +- *(7) BroadcastHashJoin [cr_returning_customer_sk#14], [c_customer_sk#15], Inner, BuildRight + : : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#14, cr_net_loss#7] + : : : : +- *(7) BroadcastHashJoin [cr_returned_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#16, cr_returning_customer_sk#14, cr_net_loss#7] + : : : : : +- *(7) BroadcastHashJoin [cc_call_center_sk#18], [cr_call_center_sk#19], Inner, BuildRight + : : : : : :- *(7) Project [cc_call_center_sk#18, cc_call_center_id#2, cc_name#3, cc_manager#4] + : : : : : : +- *(7) Filter isnotnull(cc_call_center_sk#18) + : : : : : : +- *(7) FileScan parquet default.call_center[cc_call_center_sk#18,cc_call_center_id#2,cc_name#3,cc_manager#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) + : : : : : +- *(1) Project [cr_returned_date_sk#16, cr_returning_customer_sk#14, cr_call_center_sk#19, cr_net_loss#7] + : : : : : +- *(1) Filter ((isnotnull(cr_call_center_sk#19) && isnotnull(cr_returned_date_sk#16)) && isnotnull(cr_returning_customer_sk#14)) + : : : : : +- *(1) FileScan parquet default.catalog_returns[cr_returned_date_sk#16,cr_returning_customer_sk#14,cr_call_center_sk#19,cr_net_loss#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [c_customer_sk#15, c_current_cdemo_sk#10, c_current_hdemo_sk#8, c_current_addr_sk#12] + : : : +- *(3) Filter (((isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) && isnotnull(c_current_cdemo_sk#10)) && isnotnull(c_current_hdemo_sk#8)) + : : : +- *(3) FileScan parquet default.customer[c_customer_sk#15,c_current_cdemo_sk#10,c_current_hdemo_sk#8,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#13] + : : +- *(4) Filter ((isnotnull(ca_gmt_offset#22) && (ca_gmt_offset#22 = -7.00)) && isnotnull(ca_address_sk#13)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#13,ca_gmt_offset#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [cd_demo_sk#11, cd_marital_status#5, cd_education_status#6] + : +- *(5) Filter ((((cd_marital_status#5 = M) && (cd_education_status#6 = Unknown)) || ((cd_marital_status#5 = W) && (cd_education_status#6 = Advanced Degree))) && isnotnull(cd_demo_sk#11)) + : +- *(5) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#5,cd_education_status#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(6) Project [hd_demo_sk#9] + +- *(6) Filter ((isnotnull(hd_buy_potential#23) && StartsWith(hd_buy_potential#23, Unknown)) && isnotnull(hd_demo_sk#9)) + +- *(6) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_buy_potential#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt new file mode 100644 index 000000000..4b2c97c9f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt @@ -0,0 +1,61 @@ +WholeStageCodegen + Sort [Returns_Loss] + InputAdapter + Exchange [Returns_Loss] #1 + WholeStageCodegen + HashAggregate [cc_call_center_id,sum(UnscaledValue(cr_net_loss)),sum,cc_name,cd_education_status,cc_manager,cd_marital_status] [sum(UnscaledValue(cr_net_loss)),sum,Manager,Call_Center_Name,Returns_Loss,Call_Center] + InputAdapter + Exchange [cc_call_center_id,cc_name,cd_education_status,cc_manager,cd_marital_status] #2 + WholeStageCodegen + HashAggregate [cc_call_center_id,sum,cc_name,cd_education_status,cc_manager,cr_net_loss,sum,cd_marital_status] [sum,sum] + Project [cr_net_loss,cd_marital_status,cc_call_center_id,cc_name,cc_manager,cd_education_status] + BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] + Project [cr_net_loss,c_current_hdemo_sk,cd_marital_status,cc_call_center_id,cc_name,cc_manager,cd_education_status] + BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] + Project [c_current_cdemo_sk,cr_net_loss,c_current_hdemo_sk,cc_call_center_id,cc_name,cc_manager] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [c_current_cdemo_sk,c_current_addr_sk,cr_net_loss,c_current_hdemo_sk,cc_call_center_id,cc_name,cc_manager] + BroadcastHashJoin [cr_returning_customer_sk,c_customer_sk] + Project [cr_returning_customer_sk,cr_net_loss,cc_call_center_id,cc_name,cc_manager] + BroadcastHashJoin [cr_returned_date_sk,d_date_sk] + Project [cr_returning_customer_sk,cr_net_loss,cc_call_center_id,cr_returned_date_sk,cc_name,cc_manager] + BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] + Project [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + Filter [cc_call_center_sk] + Scan parquet default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] + Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_gmt_offset,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_marital_status,cd_education_status,cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_buy_potential,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] [hd_demo_sk,hd_buy_potential] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt new file mode 100644 index 000000000..4a18c2b2d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt @@ -0,0 +1,33 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[Excess Discount Amount #1 ASC NULLS FIRST], output=[Excess Discount Amount #1]) ++- *(7) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_discount_amt#2))]) + +- Exchange SinglePartition + +- *(6) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ws_ext_discount_amt#2))]) + +- *(6) Project [ws_ext_discount_amt#2] + +- *(6) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + :- *(6) Project [ws_sold_date_sk#3, ws_ext_discount_amt#2] + : +- *(6) BroadcastHashJoin [i_item_sk#5], [ws_item_sk#6#7], Inner, BuildRight, (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) + : :- *(6) Project [ws_sold_date_sk#3, ws_ext_discount_amt#2, i_item_sk#5] + : : +- *(6) BroadcastHashJoin [ws_item_sk#6], [i_item_sk#5], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] + : : : +- *(6) Filter ((isnotnull(ws_item_sk#6) && isnotnull(ws_ext_discount_amt#2)) && isnotnull(ws_sold_date_sk#3)) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#5] + : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 350)) && isnotnull(i_item_sk#5)) + : : +- *(1) FileScan parquet default.item[i_item_sk#5,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) + : +- *(4) HashAggregate(keys=[ws_item_sk#6], functions=[avg(UnscaledValue(ws_ext_discount_amt#2))]) + : +- Exchange hashpartitioning(ws_item_sk#6, 200) + : +- *(3) HashAggregate(keys=[ws_item_sk#6], functions=[partial_avg(UnscaledValue(ws_ext_discount_amt#2))]) + : +- *(3) Project [ws_item_sk#6, ws_ext_discount_amt#2] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] + : : +- *(3) Filter (isnotnull(ws_sold_date_sk#3) && isnotnull(ws_item_sk#6)) + : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#4] + : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#4)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#4,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt new file mode 100644 index 000000000..6137607f6 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt @@ -0,0 +1,44 @@ +TakeOrderedAndProject [Excess Discount Amount ] + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(ws_ext_discount_amt))] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [ws_ext_discount_amt,sum,sum] [sum,sum] + Project [ws_ext_discount_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_ext_discount_amt] + BroadcastHashJoin [i_item_sk,ws_item_sk,ws_ext_discount_amt,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] + Project [ws_sold_date_sk,ws_ext_discount_amt,i_item_sk] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + Filter [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [i_item_sk] + Filter [i_manufact_id,i_item_sk] + Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] + HashAggregate [ws_item_sk,sum,count,avg(UnscaledValue(ws_ext_discount_amt))] [sum,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(ws_ext_discount_amt)),count,ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #4 + WholeStageCodegen + HashAggregate [ws_ext_discount_amt,ws_item_sk,sum,sum,count,count] [sum,count,sum,count] + Project [ws_item_sk,ws_ext_discount_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + Filter [ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt new file mode 100644 index 000000000..4788a00a3 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt @@ -0,0 +1,18 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[sumsales#1 ASC NULLS FIRST,ss_customer_sk#2 ASC NULLS FIRST], output=[ss_customer_sk#2,sumsales#1]) ++- *(4) HashAggregate(keys=[ss_customer_sk#2], functions=[sum(act_sales#3)]) + +- Exchange hashpartitioning(ss_customer_sk#2, 200) + +- *(3) HashAggregate(keys=[ss_customer_sk#2], functions=[partial_sum(act_sales#3)]) + +- *(3) Project [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#5 - sr_return_quantity#4) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#5 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#3] + +- *(3) BroadcastHashJoin [sr_reason_sk#7], [cast(r_reason_sk#8 as bigint)], Inner, BuildRight + :- *(3) Project [ss_customer_sk#2, ss_quantity#5, ss_sales_price#6, sr_reason_sk#7, sr_return_quantity#4] + : +- *(3) BroadcastHashJoin [cast(ss_item_sk#9 as bigint), cast(ss_ticket_number#10 as bigint)], [sr_item_sk#11, sr_ticket_number#12], Inner, BuildRight + : :- *(3) FileScan parquet default.store_sales[ss_item_sk#9,ss_customer_sk#2,ss_ticket_number#10,ss_quantity#5,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [r_reason_sk#8] + +- *(2) Filter ((isnotnull(r_reason_desc#13) && (r_reason_desc#13 = reason 28)) && isnotnull(r_reason_sk#8)) + +- *(2) FileScan parquet default.reason[r_reason_sk#8,r_reason_desc#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28), IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt new file mode 100644 index 000000000..5c7f07a1b --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt @@ -0,0 +1,24 @@ +TakeOrderedAndProject [sumsales,ss_customer_sk] + WholeStageCodegen + HashAggregate [ss_customer_sk,sum,sum(act_sales)] [sum(act_sales),sumsales,sum] + InputAdapter + Exchange [ss_customer_sk] #1 + WholeStageCodegen + HashAggregate [ss_customer_sk,act_sales,sum,sum] [sum,sum] + Project [ss_customer_sk,sr_return_quantity,ss_quantity,ss_sales_price] + BroadcastHashJoin [sr_reason_sk,r_reason_sk] + Project [ss_quantity,sr_return_quantity,ss_customer_sk,sr_reason_sk,ss_sales_price] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_ticket_number] [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_ticket_number] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + Filter [sr_item_sk,sr_ticket_number,sr_reason_sk] + Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [r_reason_sk] + Filter [r_reason_desc,r_reason_sk] + Scan parquet default.reason [r_reason_sk,r_reason_desc] [r_reason_sk,r_reason_desc] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt new file mode 100644 index 000000000..8034c8beb --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt @@ -0,0 +1,37 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) ++- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_ship_cost#4)), sum(UnscaledValue(ws_net_profit#5)), count(distinct ws_order_number#6)]) + +- Exchange SinglePartition + +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) + +- *(7) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) + +- Exchange hashpartitioning(ws_order_number#6, 200) + +- *(6) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) + +- *(6) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + +- *(6) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight + :- *(6) Project [ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : +- *(6) BroadcastHashJoin [ws_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight + : :- *(6) Project [ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : +- *(6) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : :- *(6) BroadcastHashJoin [cast(ws_order_number#6 as bigint)], [wr_order_number#13], LeftAnti, BuildRight + : : : :- *(6) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : : : +- *(6) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight, NOT (ws_warehouse_sk#15 = ws_warehouse_sk#15#16) + : : : : :- *(6) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_warehouse_sk#15, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : : : : +- *(6) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) + : : : : : +- *(6) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_warehouse_sk#15,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) + : : : +- *(2) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#12] + : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 1999-02-01)) && (d_date#17 <= 10683)) && isnotnull(d_date_sk#12)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ca_address_sk#10] + : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = IL)) && isnotnull(ca_address_sk#10)) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [web_site_sk#8] + +- *(5) Filter ((isnotnull(web_company_name#19) && (web_company_name#19 = pri)) && isnotnull(web_site_sk#8)) + +- *(5) FileScan parquet default.web_site[web_site_sk#8,web_company_name#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt new file mode 100644 index 000000000..cc709ac3c --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -0,0 +1,51 @@ +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum] [sum,sum(UnscaledValue(ws_net_profit)),order count ,total net profit ,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum,total shipping cost ] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [sum,count,sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,sum] [sum,count,sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),sum,count,sum] + HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,sum] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),sum,sum] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,ws_net_profit,ws_ext_ship_cost] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),sum,sum] + Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + BroadcastHashJoin [ws_order_number,wr_order_number] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] + BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] + Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ws_warehouse_sk,ws_order_number] + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Scan parquet default.web_returns [wr_order_number] [wr_order_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + Scan parquet default.web_site [web_site_sk,web_company_name] [web_site_sk,web_company_name] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt new file mode 100644 index 000000000..a3de90e6d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt @@ -0,0 +1,54 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) ++- *(11) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_ship_cost#4)), sum(UnscaledValue(ws_net_profit#5)), count(distinct ws_order_number#6)]) + +- Exchange SinglePartition + +- *(10) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) + +- *(10) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) + +- Exchange hashpartitioning(ws_order_number#6, 200) + +- *(9) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) + +- *(9) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + +- *(9) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight + :- *(9) Project [ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : +- *(9) BroadcastHashJoin [ws_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight + : :- *(9) Project [ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : +- *(9) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : :- *(9) BroadcastHashJoin [cast(ws_order_number#6 as bigint)], [wr_order_number#13], LeftSemi, BuildRight + : : : :- *(9) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight + : : : : :- *(9) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : : : : +- *(9) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) + : : : : : +- *(9) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : +- *(1) Project [ws_warehouse_sk#17, ws_order_number#15] + : : : : +- *(1) Filter (isnotnull(ws_order_number#15) && isnotnull(ws_warehouse_sk#17)) + : : : : +- *(1) FileScan parquet default.web_sales[ws_warehouse_sk#17,ws_order_number#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) + : : : +- *(5) Project [wr_order_number#13] + : : : +- *(5) BroadcastHashJoin [wr_order_number#13], [cast(ws_order_number#6 as bigint)], Inner, BuildRight + : : : :- *(5) Project [wr_order_number#13] + : : : : +- *(5) Filter isnotnull(wr_order_number#13) + : : : : +- *(5) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_order_number)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_order_number#6] + : : : +- *(4) BroadcastHashJoin [ws_order_number#6], [ws_order_number#15], Inner, BuildRight, NOT (ws_warehouse_sk#16 = ws_warehouse_sk#17) + : : : :- *(4) Project [ws_warehouse_sk#16, ws_order_number#6] + : : : : +- *(4) Filter (isnotnull(ws_order_number#6) && isnotnull(ws_warehouse_sk#16)) + : : : : +- *(4) FileScan parquet default.web_sales[ws_warehouse_sk#16,ws_order_number#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct + : : : +- ReusedExchange [ws_warehouse_sk#17, ws_order_number#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [d_date_sk#12] + : : +- *(6) Filter (((isnotnull(d_date#18) && (cast(d_date#18 as string) >= 1999-02-01)) && (d_date#18 <= 10683)) && isnotnull(d_date_sk#12)) + : : +- *(6) FileScan parquet default.date_dim[d_date_sk#12,d_date#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#10] + : +- *(7) Filter ((isnotnull(ca_state#19) && (ca_state#19 = IL)) && isnotnull(ca_address_sk#10)) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [web_site_sk#8] + +- *(8) Filter ((isnotnull(web_company_name#20) && (web_company_name#20 = pri)) && isnotnull(web_site_sk#8)) + +- *(8) FileScan parquet default.web_site[web_site_sk#8,web_company_name#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt new file mode 100644 index 000000000..5b6f33ebd --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -0,0 +1,73 @@ +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen + HashAggregate [count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] [order count ,total shipping cost ,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,total net profit ,sum(UnscaledValue(ws_ext_ship_cost)),sum] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),sum,ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),sum] [sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] + HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),sum] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] + InputAdapter + Exchange [ws_order_number] #2 + WholeStageCodegen + HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),ws_net_profit,ws_ext_ship_cost] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] + Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + BroadcastHashJoin [ws_order_number,wr_order_number] + BroadcastHashJoin [ws_order_number,ws_order_number] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] + Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ws_order_number] + BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + Project [ws_warehouse_sk,ws_order_number] + Filter [ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ws_warehouse_sk,ws_order_number] + Filter [ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [wr_order_number] + BroadcastHashJoin [wr_order_number,ws_order_number] + Project [wr_order_number] + Filter [wr_order_number] + Scan parquet default.web_returns [wr_order_number] [wr_order_number] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ws_order_number] + BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] + Project [ws_warehouse_sk,ws_order_number] + Filter [ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + InputAdapter + ReusedExchange [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [ca_address_sk] + Filter [ca_state,ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [web_site_sk] + Filter [web_company_name,web_site_sk] + Scan parquet default.web_site [web_site_sk,web_company_name] [web_site_sk,web_company_name] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt new file mode 100644 index 000000000..c66e7e331 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt @@ -0,0 +1,26 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[count(1)#1 ASC NULLS FIRST], output=[count(1)#1]) ++- *(5) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(4) Project + +- *(4) BroadcastHashJoin [ss_store_sk#2], [s_store_sk#3], Inner, BuildRight + :- *(4) Project [ss_store_sk#2] + : +- *(4) BroadcastHashJoin [ss_sold_time_sk#4], [t_time_sk#5], Inner, BuildRight + : :- *(4) Project [ss_sold_time_sk#4, ss_store_sk#2] + : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#6], [hd_demo_sk#7], Inner, BuildRight + : : :- *(4) Project [ss_sold_time_sk#4, ss_hdemo_sk#6, ss_store_sk#2] + : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#6) && isnotnull(ss_sold_time_sk#4)) && isnotnull(ss_store_sk#2)) + : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#4,ss_hdemo_sk#6,ss_store_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [hd_demo_sk#7] + : : +- *(1) Filter ((isnotnull(hd_dep_count#8) && (hd_dep_count#8 = 7)) && isnotnull(hd_demo_sk#7)) + : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#7,hd_dep_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [t_time_sk#5] + : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 20)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#5)) + : +- *(2) FileScan parquet default.time_dim[t_time_sk#5,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#3] + +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#3)) + +- *(3) FileScan parquet default.store[s_store_sk#3,s_store_name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt new file mode 100644 index 000000000..fada2566d --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt @@ -0,0 +1,34 @@ +TakeOrderedAndProject [count(1)] + WholeStageCodegen + HashAggregate [count,count(1)] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + Project + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk] + BroadcastHashJoin [ss_sold_time_sk,t_time_sk] + Project [ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [hd_demo_sk] + Filter [hd_dep_count,hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [t_time_sk] + Filter [t_hour,t_minute,t_time_sk] + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_name,s_store_sk] + Scan parquet default.store [s_store_sk,s_store_name] [s_store_sk,s_store_name] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt new file mode 100644 index 000000000..136c5a89f --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt @@ -0,0 +1,32 @@ +== Physical Plan == +CollectLimit 100 ++- *(10) HashAggregate(keys=[], functions=[sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint))]) + +- Exchange SinglePartition + +- *(9) HashAggregate(keys=[], functions=[partial_sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint))]) + +- *(9) Project [customer_sk#1, customer_sk#2] + +- SortMergeJoin [customer_sk#1, item_sk#3], [customer_sk#2, item_sk#4], FullOuter + :- *(4) Sort [customer_sk#1 ASC NULLS FIRST, item_sk#3 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(customer_sk#1, item_sk#3, 200) + : +- *(3) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) + : +- Exchange hashpartitioning(ss_customer_sk#5, ss_item_sk#6, 200) + : +- *(2) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) + : +- *(2) Project [ss_item_sk#6, ss_customer_sk#5] + : +- *(2) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + : :- *(2) Project [ss_sold_date_sk#7, ss_item_sk#6, ss_customer_sk#5] + : : +- *(2) Filter isnotnull(ss_sold_date_sk#7) + : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#6,ss_customer_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#8] + : +- *(1) Filter (((isnotnull(d_month_seq#9) && (d_month_seq#9 >= 1200)) && (d_month_seq#9 <= 1211)) && isnotnull(d_date_sk#8)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- *(8) Sort [customer_sk#2 ASC NULLS FIRST, item_sk#4 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(customer_sk#2, item_sk#4, 200) + +- *(7) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) + +- Exchange hashpartitioning(cs_bill_customer_sk#10, cs_item_sk#11, 200) + +- *(6) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) + +- *(6) Project [cs_bill_customer_sk#10, cs_item_sk#11] + +- *(6) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#8], Inner, BuildRight + :- *(6) Project [cs_sold_date_sk#12, cs_bill_customer_sk#10, cs_item_sk#11] + : +- *(6) Filter isnotnull(cs_sold_date_sk#12) + : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_bill_customer_sk#10,cs_item_sk#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt new file mode 100644 index 000000000..a3cc25d23 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt @@ -0,0 +1,48 @@ +CollectLimit + WholeStageCodegen + HashAggregate [sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] [store_only,sum,catalog_only,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_and_catalog] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [sum,sum,sum,customer_sk,sum,customer_sk,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + WholeStageCodegen + Sort [customer_sk,item_sk] + InputAdapter + Exchange [customer_sk,item_sk] #2 + WholeStageCodegen + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #3 + WholeStageCodegen + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_item_sk,ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + WholeStageCodegen + Sort [customer_sk,item_sk] + InputAdapter + Exchange [customer_sk,item_sk] #5 + WholeStageCodegen + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #6 + WholeStageCodegen + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt new file mode 100644 index 000000000..eee60f541 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt @@ -0,0 +1,26 @@ +== Physical Plan == +*(7) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, revenueratio#6] ++- *(7) Sort [i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST], true, 0 + +- Exchange rangepartitioning(i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST, 200) + +- *(6) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#6, i_item_id#7] + +- Window [sum(_w1#10) windowspecdefinition(i_class#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#3] + +- *(5) Sort [i_class#3 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_class#3, 200) + +- *(4) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[sum(UnscaledValue(ss_ext_sales_price#11))]) + +- Exchange hashpartitioning(i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4, 200) + +- *(3) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#11))]) + +- *(3) Project [ss_ext_sales_price#11, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] + +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + :- *(3) Project [ss_sold_date_sk#12, ss_ext_sales_price#11, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] + : +- *(3) BroadcastHashJoin [ss_item_sk#14], [i_item_sk#15], Inner, BuildRight + : :- *(3) Project [ss_sold_date_sk#12, ss_item_sk#14, ss_ext_sales_price#11] + : : +- *(3) Filter (isnotnull(ss_item_sk#14) && isnotnull(ss_sold_date_sk#12)) + : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_item_sk#14,ss_ext_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [i_item_sk#15, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] + : +- *(1) Filter (i_category#2 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) + : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#7,i_item_desc#1,i_current_price#4,i_class#3,i_category#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt new file mode 100644 index 000000000..be26ab7ec --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt @@ -0,0 +1,38 @@ +WholeStageCodegen + Project [revenueratio,i_item_desc,itemrevenue,i_category,i_current_price,i_class] + Sort [revenueratio,i_item_id,i_item_desc,i_category,i_class] + InputAdapter + Exchange [revenueratio,i_item_id,i_item_desc,i_category,i_class] #1 + WholeStageCodegen + Project [i_item_id,i_item_desc,_w0,itemrevenue,i_category,_we0,i_current_price,i_class] + InputAdapter + Window [_w1,i_class] + WholeStageCodegen + Sort [i_class] + InputAdapter + Exchange [i_class] #2 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum,sum(UnscaledValue(ss_ext_sales_price)),i_category,i_current_price,i_class] [sum,sum(UnscaledValue(ss_ext_sales_price)),_w0,itemrevenue,_w1] + InputAdapter + Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #3 + WholeStageCodegen + HashAggregate [i_item_id,i_item_desc,sum,i_category,ss_ext_sales_price,sum,i_current_price,i_class] [sum,sum] + Project [i_class,i_current_price,ss_ext_sales_price,i_category,i_item_desc,i_item_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_class,i_current_price,ss_ext_sales_price,i_category,ss_sold_date_sk,i_item_desc,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Filter [i_category,i_item_sk] + Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt new file mode 100644 index 000000000..fbd4c2dc5 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt @@ -0,0 +1,32 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[substring(w_warehouse_name, 1, 20)#1 ASC NULLS FIRST,sm_type#2 ASC NULLS FIRST,cc_name#3 ASC NULLS FIRST], output=[substring(w_warehouse_name, 1, 20)#1,sm_type#2,cc_name#3,30 days #4,31 - 60 days #5,61 - 90 days #6,91 - 120 days #7,>120 days #8]) ++- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3, 200) + +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, w_warehouse_name#9, sm_type#2, cc_name#3] + +- *(5) BroadcastHashJoin [cs_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight + :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, w_warehouse_name#9, sm_type#2, cc_name#3] + : +- *(5) BroadcastHashJoin [cs_call_center_sk#14], [cc_call_center_sk#15], Inner, BuildRight + : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, w_warehouse_name#9, sm_type#2] + : : +- *(5) BroadcastHashJoin [cs_ship_mode_sk#16], [sm_ship_mode_sk#17], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, cs_ship_mode_sk#16, w_warehouse_name#9] + : : : +- *(5) BroadcastHashJoin [cs_warehouse_sk#18], [w_warehouse_sk#19], Inner, BuildRight + : : : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, cs_ship_mode_sk#16, cs_warehouse_sk#18] + : : : : +- *(5) Filter (((isnotnull(cs_warehouse_sk#18) && isnotnull(cs_ship_mode_sk#16)) && isnotnull(cs_call_center_sk#14)) && isnotnull(cs_ship_date_sk#11)) + : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_ship_date_sk#11,cs_call_center_sk#14,cs_ship_mode_sk#16,cs_warehouse_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] + : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) + : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [cc_call_center_sk#15, cc_name#3] + : +- *(3) Filter isnotnull(cc_call_center_sk#15) + : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#15,cc_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#13] + +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt new file mode 100644 index 000000000..934bf3e47 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt @@ -0,0 +1,42 @@ +TakeOrderedAndProject [substring(w_warehouse_name, 1, 20),61 - 90 days ,cc_name,>120 days ,91 - 120 days ,30 days ,sm_type,31 - 60 days ] + WholeStageCodegen + HashAggregate [sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,cc_name,sum,sm_type,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum] [sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum,>120 days ,sum,91 - 120 days ,30 days ,31 - 60 days ,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum] + InputAdapter + Exchange [substring(w_warehouse_name, 1, 20),sm_type,cc_name] #1 + WholeStageCodegen + HashAggregate [sum,sum,sum,substring(w_warehouse_name, 1, 20),w_warehouse_name,sum,sum,cs_sold_date_sk,sum,cc_name,sum,sum,sm_type,cs_ship_date_sk,sum,sum] [sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum] + Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cc_name] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk] + Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cc_name] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cs_call_center_sk] + BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] + Project [cs_ship_date_sk,cs_sold_date_sk,cs_ship_mode_sk,w_warehouse_name,cs_call_center_sk] + BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] + Project [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] + Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk] + Scan parquet default.catalog_sales [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen + Project [w_warehouse_sk,w_warehouse_name] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [sm_ship_mode_sk,sm_type] + Filter [sm_ship_mode_sk] + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] [sm_ship_mode_sk,sm_type] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [cc_call_center_sk,cc_name] + Filter [cc_call_center_sk] + Scan parquet default.call_center [cc_call_center_sk,cc_name] [cc_call_center_sk,cc_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] diff --git a/src/test/resources/tpcds/q1.sql b/src/test/resources/tpcds/q1.sql new file mode 100644 index 000000000..4d20faad8 --- /dev/null +++ b/src/test/resources/tpcds/q1.sql @@ -0,0 +1,19 @@ +WITH customer_total_return AS +( SELECT + sr_customer_sk AS ctr_customer_sk, + sr_store_sk AS ctr_store_sk, + sum(sr_return_amt) AS ctr_total_return + FROM store_returns, date_dim + WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000 + GROUP BY sr_customer_sk, sr_store_sk) +SELECT c_customer_id +FROM customer_total_return ctr1, store, customer +WHERE ctr1.ctr_total_return > + (SELECT avg(ctr_total_return) * 1.2 + FROM customer_total_return ctr2 + WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk) + AND s_store_sk = ctr1.ctr_store_sk + AND s_state = 'TN' + AND ctr1.ctr_customer_sk = c_customer_sk +ORDER BY c_customer_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q10.sql b/src/test/resources/tpcds/q10.sql new file mode 100644 index 000000000..5500e1aea --- /dev/null +++ b/src/test/resources/tpcds/q10.sql @@ -0,0 +1,57 @@ +SELECT + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 +FROM + customer c, customer_address ca, customer_demographics +WHERE + c.c_current_addr_sk = ca.ca_address_sk AND + ca_county IN ('Rush County', 'Toole County', 'Jefferson County', + 'Dona Ana County', 'La Porte County') AND + cd_demo_sk = c.c_current_cdemo_sk AND + exists(SELECT * + FROM store_sales, date_dim + WHERE c.c_customer_sk = ss_customer_sk AND + ss_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_moy BETWEEN 1 AND 1 + 3) AND + (exists(SELECT * + FROM web_sales, date_dim + WHERE c.c_customer_sk = ws_bill_customer_sk AND + ws_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_moy BETWEEN 1 AND 1 + 3) OR + exists(SELECT * + FROM catalog_sales, date_dim + WHERE c.c_customer_sk = cs_ship_customer_sk AND + cs_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_moy BETWEEN 1 AND 1 + 3)) +GROUP BY cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +ORDER BY cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +LIMIT 100 diff --git a/src/test/resources/tpcds/q11.sql b/src/test/resources/tpcds/q11.sql new file mode 100644 index 000000000..3618fb14f --- /dev/null +++ b/src/test/resources/tpcds/q11.sql @@ -0,0 +1,68 @@ +WITH year_total AS ( + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum(ss_ext_list_price - ss_ext_discount_amt) year_total, + 's' sale_type + FROM customer, store_sales, date_dim + WHERE c_customer_sk = ss_customer_sk + AND ss_sold_date_sk = d_date_sk + GROUP BY c_customer_id + , c_first_name + , c_last_name + , d_year + , c_preferred_cust_flag + , c_birth_country + , c_login + , c_email_address + , d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum(ws_ext_list_price - ws_ext_discount_amt) year_total, + 'w' sale_type + FROM customer, web_sales, date_dim + WHERE c_customer_sk = ws_bill_customer_sk + AND ws_sold_date_sk = d_date_sk + GROUP BY + c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, + c_login, c_email_address, d_year) +SELECT t_s_secyear.customer_preferred_cust_flag +FROM year_total t_s_firstyear + , year_total t_s_secyear + , year_total t_w_firstyear + , year_total t_w_secyear +WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_secyear.customer_id + AND t_s_firstyear.customer_id = t_w_firstyear.customer_id + AND t_s_firstyear.sale_type = 's' + AND t_w_firstyear.sale_type = 'w' + AND t_s_secyear.sale_type = 's' + AND t_w_secyear.sale_type = 'w' + AND t_s_firstyear.dyear = 2001 + AND t_s_secyear.dyear = 2001 + 1 + AND t_w_firstyear.dyear = 2001 + AND t_w_secyear.dyear = 2001 + 1 + AND t_s_firstyear.year_total > 0 + AND t_w_firstyear.year_total > 0 + AND CASE WHEN t_w_firstyear.year_total > 0 + THEN t_w_secyear.year_total / t_w_firstyear.year_total + ELSE NULL END + > CASE WHEN t_s_firstyear.year_total > 0 + THEN t_s_secyear.year_total / t_s_firstyear.year_total + ELSE NULL END +ORDER BY t_s_secyear.customer_preferred_cust_flag +LIMIT 100 diff --git a/src/test/resources/tpcds/q12.sql b/src/test/resources/tpcds/q12.sql new file mode 100644 index 000000000..0382737f5 --- /dev/null +++ b/src/test/resources/tpcds/q12.sql @@ -0,0 +1,22 @@ +SELECT + i_item_desc, + i_category, + i_class, + i_current_price, + sum(ws_ext_sales_price) AS itemrevenue, + sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price)) + OVER + (PARTITION BY i_class) AS revenueratio +FROM + web_sales, item, date_dim +WHERE + ws_item_sk = i_item_sk + AND i_category IN ('Sports', 'Books', 'Home') + AND ws_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('1999-02-22' AS DATE) + AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) +GROUP BY + i_item_id, i_item_desc, i_category, i_class, i_current_price +ORDER BY + i_category, i_class, i_item_id, i_item_desc, revenueratio +LIMIT 100 diff --git a/src/test/resources/tpcds/q13.sql b/src/test/resources/tpcds/q13.sql new file mode 100644 index 000000000..32dc9e260 --- /dev/null +++ b/src/test/resources/tpcds/q13.sql @@ -0,0 +1,49 @@ +SELECT + avg(ss_quantity), + avg(ss_ext_sales_price), + avg(ss_ext_wholesale_cost), + sum(ss_ext_wholesale_cost) +FROM store_sales + , store + , customer_demographics + , household_demographics + , customer_address + , date_dim +WHERE s_store_sk = ss_store_sk + AND ss_sold_date_sk = d_date_sk AND d_year = 2001 + AND ((ss_hdemo_sk = hd_demo_sk + AND cd_demo_sk = ss_cdemo_sk + AND cd_marital_status = 'M' + AND cd_education_status = 'Advanced Degree' + AND ss_sales_price BETWEEN 100.00 AND 150.00 + AND hd_dep_count = 3 +) OR + (ss_hdemo_sk = hd_demo_sk + AND cd_demo_sk = ss_cdemo_sk + AND cd_marital_status = 'S' + AND cd_education_status = 'College' + AND ss_sales_price BETWEEN 50.00 AND 100.00 + AND hd_dep_count = 1 + ) OR + (ss_hdemo_sk = hd_demo_sk + AND cd_demo_sk = ss_cdemo_sk + AND cd_marital_status = 'W' + AND cd_education_status = '2 yr Degree' + AND ss_sales_price BETWEEN 150.00 AND 200.00 + AND hd_dep_count = 1 + )) + AND ((ss_addr_sk = ca_address_sk + AND ca_country = 'United States' + AND ca_state IN ('TX', 'OH', 'TX') + AND ss_net_profit BETWEEN 100 AND 200 +) OR + (ss_addr_sk = ca_address_sk + AND ca_country = 'United States' + AND ca_state IN ('OR', 'NM', 'KY') + AND ss_net_profit BETWEEN 150 AND 300 + ) OR + (ss_addr_sk = ca_address_sk + AND ca_country = 'United States' + AND ca_state IN ('VA', 'TX', 'MS') + AND ss_net_profit BETWEEN 50 AND 250 + )) diff --git a/src/test/resources/tpcds/q14a.sql b/src/test/resources/tpcds/q14a.sql new file mode 100644 index 000000000..954ddd41b --- /dev/null +++ b/src/test/resources/tpcds/q14a.sql @@ -0,0 +1,120 @@ +WITH cross_items AS +(SELECT i_item_sk ss_item_sk + FROM item, + (SELECT + iss.i_brand_id brand_id, + iss.i_class_id class_id, + iss.i_category_id category_id + FROM store_sales, item iss, date_dim d1 + WHERE ss_item_sk = iss.i_item_sk + AND ss_sold_date_sk = d1.d_date_sk + AND d1.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + ics.i_brand_id, + ics.i_class_id, + ics.i_category_id + FROM catalog_sales, item ics, date_dim d2 + WHERE cs_item_sk = ics.i_item_sk + AND cs_sold_date_sk = d2.d_date_sk + AND d2.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + iws.i_brand_id, + iws.i_class_id, + iws.i_category_id + FROM web_sales, item iws, date_dim d3 + WHERE ws_item_sk = iws.i_item_sk + AND ws_sold_date_sk = d3.d_date_sk + AND d3.d_year BETWEEN 1999 AND 1999 + 2) x + WHERE i_brand_id = brand_id + AND i_class_id = class_id + AND i_category_id = category_id +), + avg_sales AS + (SELECT avg(quantity * list_price) average_sales + FROM ( + SELECT + ss_quantity quantity, + ss_list_price list_price + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk + AND d_year BETWEEN 1999 AND 2001 + UNION ALL + SELECT + cs_quantity quantity, + cs_list_price list_price + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk + AND d_year BETWEEN 1999 AND 1999 + 2 + UNION ALL + SELECT + ws_quantity quantity, + ws_list_price list_price + FROM web_sales, date_dim + WHERE ws_sold_date_sk = d_date_sk + AND d_year BETWEEN 1999 AND 1999 + 2) x) +SELECT + channel, + i_brand_id, + i_class_id, + i_category_id, + sum(sales), + sum(number_sales) +FROM ( + SELECT + 'store' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ss_quantity * ss_list_price) sales, + count(*) number_sales + FROM store_sales, item, date_dim + WHERE ss_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 1999 + 2 + AND d_moy = 11 + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales + FROM avg_sales) + UNION ALL + SELECT + 'catalog' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(cs_quantity * cs_list_price) sales, + count(*) number_sales + FROM catalog_sales, item, date_dim + WHERE cs_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 1999 + 2 + AND d_moy = 11 + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(cs_quantity * cs_list_price) > (SELECT average_sales FROM avg_sales) + UNION ALL + SELECT + 'web' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ws_quantity * ws_list_price) sales, + count(*) number_sales + FROM web_sales, item, date_dim + WHERE ws_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 1999 + 2 + AND d_moy = 11 + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ws_quantity * ws_list_price) > (SELECT average_sales + FROM avg_sales) + ) y +GROUP BY ROLLUP (channel, i_brand_id, i_class_id, i_category_id) +ORDER BY channel, i_brand_id, i_class_id, i_category_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q14b.sql b/src/test/resources/tpcds/q14b.sql new file mode 100644 index 000000000..929a8484b --- /dev/null +++ b/src/test/resources/tpcds/q14b.sql @@ -0,0 +1,95 @@ +WITH cross_items AS +(SELECT i_item_sk ss_item_sk + FROM item, + (SELECT + iss.i_brand_id brand_id, + iss.i_class_id class_id, + iss.i_category_id category_id + FROM store_sales, item iss, date_dim d1 + WHERE ss_item_sk = iss.i_item_sk + AND ss_sold_date_sk = d1.d_date_sk + AND d1.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + ics.i_brand_id, + ics.i_class_id, + ics.i_category_id + FROM catalog_sales, item ics, date_dim d2 + WHERE cs_item_sk = ics.i_item_sk + AND cs_sold_date_sk = d2.d_date_sk + AND d2.d_year BETWEEN 1999 AND 1999 + 2 + INTERSECT + SELECT + iws.i_brand_id, + iws.i_class_id, + iws.i_category_id + FROM web_sales, item iws, date_dim d3 + WHERE ws_item_sk = iws.i_item_sk + AND ws_sold_date_sk = d3.d_date_sk + AND d3.d_year BETWEEN 1999 AND 1999 + 2) x + WHERE i_brand_id = brand_id + AND i_class_id = class_id + AND i_category_id = category_id +), + avg_sales AS + (SELECT avg(quantity * list_price) average_sales + FROM (SELECT + ss_quantity quantity, + ss_list_price list_price + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 + UNION ALL + SELECT + cs_quantity quantity, + cs_list_price list_price + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 + UNION ALL + SELECT + ws_quantity quantity, + ws_list_price list_price + FROM web_sales, date_dim + WHERE ws_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2) x) +SELECT * +FROM + (SELECT + 'store' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ss_quantity * ss_list_price) sales, + count(*) number_sales + FROM store_sales, item, date_dim + WHERE ss_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_year = 1999 + 1 AND d_moy = 12 AND d_dom = 11) + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales + FROM avg_sales)) this_year, + (SELECT + 'store' channel, + i_brand_id, + i_class_id, + i_category_id, + sum(ss_quantity * ss_list_price) sales, + count(*) number_sales + FROM store_sales, item, date_dim + WHERE ss_item_sk IN (SELECT ss_item_sk + FROM cross_items) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_year = 1999 AND d_moy = 12 AND d_dom = 11) + GROUP BY i_brand_id, i_class_id, i_category_id + HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales + FROM avg_sales)) last_year +WHERE this_year.i_brand_id = last_year.i_brand_id + AND this_year.i_class_id = last_year.i_class_id + AND this_year.i_category_id = last_year.i_category_id +ORDER BY this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q15.sql b/src/test/resources/tpcds/q15.sql new file mode 100644 index 000000000..b8182e23b --- /dev/null +++ b/src/test/resources/tpcds/q15.sql @@ -0,0 +1,15 @@ +SELECT + ca_zip, + sum(cs_sales_price) +FROM catalog_sales, customer, customer_address, date_dim +WHERE cs_bill_customer_sk = c_customer_sk + AND c_current_addr_sk = ca_address_sk + AND (substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475', + '85392', '85460', '80348', '81792') + OR ca_state IN ('CA', 'WA', 'GA') + OR cs_sales_price > 500) + AND cs_sold_date_sk = d_date_sk + AND d_qoy = 2 AND d_year = 2001 +GROUP BY ca_zip +ORDER BY ca_zip +LIMIT 100 diff --git a/src/test/resources/tpcds/q16.sql b/src/test/resources/tpcds/q16.sql new file mode 100644 index 000000000..732ad0d84 --- /dev/null +++ b/src/test/resources/tpcds/q16.sql @@ -0,0 +1,23 @@ +SELECT + count(DISTINCT cs_order_number) AS `order count `, + sum(cs_ext_ship_cost) AS `total shipping cost `, + sum(cs_net_profit) AS `total net profit ` +FROM + catalog_sales cs1, date_dim, customer_address, call_center +WHERE + d_date BETWEEN '2002-02-01' AND (CAST('2002-02-01' AS DATE) + INTERVAL 60 days) + AND cs1.cs_ship_date_sk = d_date_sk + AND cs1.cs_ship_addr_sk = ca_address_sk + AND ca_state = 'GA' + AND cs1.cs_call_center_sk = cc_call_center_sk + AND cc_county IN + ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') + AND EXISTS(SELECT * + FROM catalog_sales cs2 + WHERE cs1.cs_order_number = cs2.cs_order_number + AND cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) + AND NOT EXISTS(SELECT * + FROM catalog_returns cr1 + WHERE cs1.cs_order_number = cr1.cr_order_number) +ORDER BY count(DISTINCT cs_order_number) +LIMIT 100 diff --git a/src/test/resources/tpcds/q17.sql b/src/test/resources/tpcds/q17.sql new file mode 100644 index 000000000..4d647f795 --- /dev/null +++ b/src/test/resources/tpcds/q17.sql @@ -0,0 +1,33 @@ +SELECT + i_item_id, + i_item_desc, + s_state, + count(ss_quantity) AS store_sales_quantitycount, + avg(ss_quantity) AS store_sales_quantityave, + stddev_samp(ss_quantity) AS store_sales_quantitystdev, + stddev_samp(ss_quantity) / avg(ss_quantity) AS store_sales_quantitycov, + count(sr_return_quantity) as_store_returns_quantitycount, + avg(sr_return_quantity) as_store_returns_quantityave, + stddev_samp(sr_return_quantity) as_store_returns_quantitystdev, + stddev_samp(sr_return_quantity) / avg(sr_return_quantity) AS store_returns_quantitycov, + count(cs_quantity) AS catalog_sales_quantitycount, + avg(cs_quantity) AS catalog_sales_quantityave, + stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitystdev, + stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitycov +FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item +WHERE d1.d_quarter_name = '2001Q1' + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND ss_customer_sk = sr_customer_sk + AND ss_item_sk = sr_item_sk + AND ss_ticket_number = sr_ticket_number + AND sr_returned_date_sk = d2.d_date_sk + AND d2.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') + AND sr_customer_sk = cs_bill_customer_sk + AND sr_item_sk = cs_item_sk + AND cs_sold_date_sk = d3.d_date_sk + AND d3.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') +GROUP BY i_item_id, i_item_desc, s_state +ORDER BY i_item_id, i_item_desc, s_state +LIMIT 100 diff --git a/src/test/resources/tpcds/q18.sql b/src/test/resources/tpcds/q18.sql new file mode 100644 index 000000000..4055c80fd --- /dev/null +++ b/src/test/resources/tpcds/q18.sql @@ -0,0 +1,28 @@ +SELECT + i_item_id, + ca_country, + ca_state, + ca_county, + avg(cast(cs_quantity AS DECIMAL(12, 2))) agg1, + avg(cast(cs_list_price AS DECIMAL(12, 2))) agg2, + avg(cast(cs_coupon_amt AS DECIMAL(12, 2))) agg3, + avg(cast(cs_sales_price AS DECIMAL(12, 2))) agg4, + avg(cast(cs_net_profit AS DECIMAL(12, 2))) agg5, + avg(cast(c_birth_year AS DECIMAL(12, 2))) agg6, + avg(cast(cd1.cd_dep_count AS DECIMAL(12, 2))) agg7 +FROM catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item +WHERE cs_sold_date_sk = d_date_sk AND + cs_item_sk = i_item_sk AND + cs_bill_cdemo_sk = cd1.cd_demo_sk AND + cs_bill_customer_sk = c_customer_sk AND + cd1.cd_gender = 'F' AND + cd1.cd_education_status = 'Unknown' AND + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_addr_sk = ca_address_sk AND + c_birth_month IN (1, 6, 8, 9, 12, 2) AND + d_year = 1998 AND + ca_state IN ('MS', 'IN', 'ND', 'OK', 'NM', 'VA', 'MS') +GROUP BY ROLLUP (i_item_id, ca_country, ca_state, ca_county) +ORDER BY ca_country, ca_state, ca_county, i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q19.sql b/src/test/resources/tpcds/q19.sql new file mode 100644 index 000000000..e38ab7f26 --- /dev/null +++ b/src/test/resources/tpcds/q19.sql @@ -0,0 +1,19 @@ +SELECT + i_brand_id brand_id, + i_brand brand, + i_manufact_id, + i_manufact, + sum(ss_ext_sales_price) ext_price +FROM date_dim, store_sales, item, customer, customer_address, store +WHERE d_date_sk = ss_sold_date_sk + AND ss_item_sk = i_item_sk + AND i_manager_id = 8 + AND d_moy = 11 + AND d_year = 1998 + AND ss_customer_sk = c_customer_sk + AND c_current_addr_sk = ca_address_sk + AND substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) + AND ss_store_sk = s_store_sk +GROUP BY i_brand, i_brand_id, i_manufact_id, i_manufact +ORDER BY ext_price DESC, brand, brand_id, i_manufact_id, i_manufact +LIMIT 100 diff --git a/src/test/resources/tpcds/q2.sql b/src/test/resources/tpcds/q2.sql new file mode 100644 index 000000000..52c0e90c4 --- /dev/null +++ b/src/test/resources/tpcds/q2.sql @@ -0,0 +1,81 @@ +WITH wscs AS +( SELECT + sold_date_sk, + sales_price + FROM (SELECT + ws_sold_date_sk sold_date_sk, + ws_ext_sales_price sales_price + FROM web_sales) x + UNION ALL + (SELECT + cs_sold_date_sk sold_date_sk, + cs_ext_sales_price sales_price + FROM catalog_sales)), + wswscs AS + ( SELECT + d_week_seq, + sum(CASE WHEN (d_day_name = 'Sunday') + THEN sales_price + ELSE NULL END) + sun_sales, + sum(CASE WHEN (d_day_name = 'Monday') + THEN sales_price + ELSE NULL END) + mon_sales, + sum(CASE WHEN (d_day_name = 'Tuesday') + THEN sales_price + ELSE NULL END) + tue_sales, + sum(CASE WHEN (d_day_name = 'Wednesday') + THEN sales_price + ELSE NULL END) + wed_sales, + sum(CASE WHEN (d_day_name = 'Thursday') + THEN sales_price + ELSE NULL END) + thu_sales, + sum(CASE WHEN (d_day_name = 'Friday') + THEN sales_price + ELSE NULL END) + fri_sales, + sum(CASE WHEN (d_day_name = 'Saturday') + THEN sales_price + ELSE NULL END) + sat_sales + FROM wscs, date_dim + WHERE d_date_sk = sold_date_sk + GROUP BY d_week_seq) +SELECT + d_week_seq1, + round(sun_sales1 / sun_sales2, 2), + round(mon_sales1 / mon_sales2, 2), + round(tue_sales1 / tue_sales2, 2), + round(wed_sales1 / wed_sales2, 2), + round(thu_sales1 / thu_sales2, 2), + round(fri_sales1 / fri_sales2, 2), + round(sat_sales1 / sat_sales2, 2) +FROM + (SELECT + wswscs.d_week_seq d_week_seq1, + sun_sales sun_sales1, + mon_sales mon_sales1, + tue_sales tue_sales1, + wed_sales wed_sales1, + thu_sales thu_sales1, + fri_sales fri_sales1, + sat_sales sat_sales1 + FROM wswscs, date_dim + WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y, + (SELECT + wswscs.d_week_seq d_week_seq2, + sun_sales sun_sales2, + mon_sales mon_sales2, + tue_sales tue_sales2, + wed_sales wed_sales2, + thu_sales thu_sales2, + fri_sales fri_sales2, + sat_sales sat_sales2 + FROM wswscs, date_dim + WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1) z +WHERE d_week_seq1 = d_week_seq2 - 53 +ORDER BY d_week_seq1 diff --git a/src/test/resources/tpcds/q20.sql b/src/test/resources/tpcds/q20.sql new file mode 100644 index 000000000..7ac6c7a75 --- /dev/null +++ b/src/test/resources/tpcds/q20.sql @@ -0,0 +1,18 @@ +SELECT + i_item_desc, + i_category, + i_class, + i_current_price, + sum(cs_ext_sales_price) AS itemrevenue, + sum(cs_ext_sales_price) * 100 / sum(sum(cs_ext_sales_price)) + OVER + (PARTITION BY i_class) AS revenueratio +FROM catalog_sales, item, date_dim +WHERE cs_item_sk = i_item_sk + AND i_category IN ('Sports', 'Books', 'Home') + AND cs_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('1999-02-22' AS DATE) +AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) +GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price +ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio +LIMIT 100 diff --git a/src/test/resources/tpcds/q21.sql b/src/test/resources/tpcds/q21.sql new file mode 100644 index 000000000..550881143 --- /dev/null +++ b/src/test/resources/tpcds/q21.sql @@ -0,0 +1,25 @@ +SELECT * +FROM ( + SELECT + w_warehouse_name, + i_item_id, + sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) + THEN inv_quantity_on_hand + ELSE 0 END) AS inv_before, + sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) + THEN inv_quantity_on_hand + ELSE 0 END) AS inv_after + FROM inventory, warehouse, item, date_dim + WHERE i_current_price BETWEEN 0.99 AND 1.49 + AND i_item_sk = inv_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND inv_date_sk = d_date_sk + AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) + AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) + GROUP BY w_warehouse_name, i_item_id) x +WHERE (CASE WHEN inv_before > 0 + THEN inv_after / inv_before + ELSE NULL + END) BETWEEN 2.0 / 3.0 AND 3.0 / 2.0 +ORDER BY w_warehouse_name, i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q22.sql b/src/test/resources/tpcds/q22.sql new file mode 100644 index 000000000..add3b41f7 --- /dev/null +++ b/src/test/resources/tpcds/q22.sql @@ -0,0 +1,14 @@ +SELECT + i_product_name, + i_brand, + i_class, + i_category, + avg(inv_quantity_on_hand) qoh +FROM inventory, date_dim, item, warehouse +WHERE inv_date_sk = d_date_sk + AND inv_item_sk = i_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 +GROUP BY ROLLUP (i_product_name, i_brand, i_class, i_category) +ORDER BY qoh, i_product_name, i_brand, i_class, i_category +LIMIT 100 diff --git a/src/test/resources/tpcds/q23a.sql b/src/test/resources/tpcds/q23a.sql new file mode 100644 index 000000000..37791f643 --- /dev/null +++ b/src/test/resources/tpcds/q23a.sql @@ -0,0 +1,53 @@ +WITH frequent_ss_items AS +(SELECT + substr(i_item_desc, 1, 30) itemdesc, + i_item_sk item_sk, + d_date solddate, + count(*) cnt + FROM store_sales, date_dim, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date + HAVING count(*) > 4), + max_store_sales AS + (SELECT max(csales) tpcds_cmax + FROM (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) csales + FROM store_sales, customer, date_dim + WHERE ss_customer_sk = c_customer_sk + AND ss_sold_date_sk = d_date_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY c_customer_sk) x), + best_ss_customer AS + (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) ssales + FROM store_sales, customer + WHERE ss_customer_sk = c_customer_sk + GROUP BY c_customer_sk + HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * + (SELECT * + FROM max_store_sales)) +SELECT sum(sales) +FROM ((SELECT cs_quantity * cs_list_price sales +FROM catalog_sales, date_dim +WHERE d_year = 2000 + AND d_moy = 2 + AND cs_sold_date_sk = d_date_sk + AND cs_item_sk IN (SELECT item_sk +FROM frequent_ss_items) + AND cs_bill_customer_sk IN (SELECT c_customer_sk +FROM best_ss_customer)) + UNION ALL + (SELECT ws_quantity * ws_list_price sales + FROM web_sales, date_dim + WHERE d_year = 2000 + AND d_moy = 2 + AND ws_sold_date_sk = d_date_sk + AND ws_item_sk IN (SELECT item_sk + FROM frequent_ss_items) + AND ws_bill_customer_sk IN (SELECT c_customer_sk + FROM best_ss_customer))) y +LIMIT 100 diff --git a/src/test/resources/tpcds/q23b.sql b/src/test/resources/tpcds/q23b.sql new file mode 100644 index 000000000..01150197a --- /dev/null +++ b/src/test/resources/tpcds/q23b.sql @@ -0,0 +1,68 @@ +WITH frequent_ss_items AS +(SELECT + substr(i_item_desc, 1, 30) itemdesc, + i_item_sk item_sk, + d_date solddate, + count(*) cnt + FROM store_sales, date_dim, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date + HAVING count(*) > 4), + max_store_sales AS + (SELECT max(csales) tpcds_cmax + FROM (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) csales + FROM store_sales, customer, date_dim + WHERE ss_customer_sk = c_customer_sk + AND ss_sold_date_sk = d_date_sk + AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) + GROUP BY c_customer_sk) x), + best_ss_customer AS + (SELECT + c_customer_sk, + sum(ss_quantity * ss_sales_price) ssales + FROM store_sales + , customer + WHERE ss_customer_sk = c_customer_sk + GROUP BY c_customer_sk + HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * + (SELECT * + FROM max_store_sales)) +SELECT + c_last_name, + c_first_name, + sales +FROM ((SELECT + c_last_name, + c_first_name, + sum(cs_quantity * cs_list_price) sales +FROM catalog_sales, customer, date_dim +WHERE d_year = 2000 + AND d_moy = 2 + AND cs_sold_date_sk = d_date_sk + AND cs_item_sk IN (SELECT item_sk +FROM frequent_ss_items) + AND cs_bill_customer_sk IN (SELECT c_customer_sk +FROM best_ss_customer) + AND cs_bill_customer_sk = c_customer_sk +GROUP BY c_last_name, c_first_name) + UNION ALL + (SELECT + c_last_name, + c_first_name, + sum(ws_quantity * ws_list_price) sales + FROM web_sales, customer, date_dim + WHERE d_year = 2000 + AND d_moy = 2 + AND ws_sold_date_sk = d_date_sk + AND ws_item_sk IN (SELECT item_sk + FROM frequent_ss_items) + AND ws_bill_customer_sk IN (SELECT c_customer_sk + FROM best_ss_customer) + AND ws_bill_customer_sk = c_customer_sk + GROUP BY c_last_name, c_first_name)) y +ORDER BY c_last_name, c_first_name, sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q24a.sql b/src/test/resources/tpcds/q24a.sql new file mode 100644 index 000000000..bcc189486 --- /dev/null +++ b/src/test/resources/tpcds/q24a.sql @@ -0,0 +1,34 @@ +WITH ssales AS +(SELECT + c_last_name, + c_first_name, + s_store_name, + ca_state, + s_state, + i_color, + i_current_price, + i_manager_id, + i_units, + i_size, + sum(ss_net_paid) netpaid + FROM store_sales, store_returns, store, item, customer, customer_address + WHERE ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk + AND ss_customer_sk = c_customer_sk + AND ss_item_sk = i_item_sk + AND ss_store_sk = s_store_sk + AND c_birth_country = upper(ca_country) + AND s_zip = ca_zip + AND s_market_id = 8 + GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, + i_current_price, i_manager_id, i_units, i_size) +SELECT + c_last_name, + c_first_name, + s_store_name, + sum(netpaid) paid +FROM ssales +WHERE i_color = 'pale' +GROUP BY c_last_name, c_first_name, s_store_name +HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) +FROM ssales) diff --git a/src/test/resources/tpcds/q24b.sql b/src/test/resources/tpcds/q24b.sql new file mode 100644 index 000000000..830eb670b --- /dev/null +++ b/src/test/resources/tpcds/q24b.sql @@ -0,0 +1,34 @@ +WITH ssales AS +(SELECT + c_last_name, + c_first_name, + s_store_name, + ca_state, + s_state, + i_color, + i_current_price, + i_manager_id, + i_units, + i_size, + sum(ss_net_paid) netpaid + FROM store_sales, store_returns, store, item, customer, customer_address + WHERE ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk + AND ss_customer_sk = c_customer_sk + AND ss_item_sk = i_item_sk + AND ss_store_sk = s_store_sk + AND c_birth_country = upper(ca_country) + AND s_zip = ca_zip + AND s_market_id = 8 + GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, + i_color, i_current_price, i_manager_id, i_units, i_size) +SELECT + c_last_name, + c_first_name, + s_store_name, + sum(netpaid) paid +FROM ssales +WHERE i_color = 'chiffon' +GROUP BY c_last_name, c_first_name, s_store_name +HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) +FROM ssales) diff --git a/src/test/resources/tpcds/q25.sql b/src/test/resources/tpcds/q25.sql new file mode 100644 index 000000000..a4d78a3c5 --- /dev/null +++ b/src/test/resources/tpcds/q25.sql @@ -0,0 +1,33 @@ +SELECT + i_item_id, + i_item_desc, + s_store_id, + s_store_name, + sum(ss_net_profit) AS store_sales_profit, + sum(sr_net_loss) AS store_returns_loss, + sum(cs_net_profit) AS catalog_sales_profit +FROM + store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, + store, item +WHERE + d1.d_moy = 4 + AND d1.d_year = 2001 + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND ss_customer_sk = sr_customer_sk + AND ss_item_sk = sr_item_sk + AND ss_ticket_number = sr_ticket_number + AND sr_returned_date_sk = d2.d_date_sk + AND d2.d_moy BETWEEN 4 AND 10 + AND d2.d_year = 2001 + AND sr_customer_sk = cs_bill_customer_sk + AND sr_item_sk = cs_item_sk + AND cs_sold_date_sk = d3.d_date_sk + AND d3.d_moy BETWEEN 4 AND 10 + AND d3.d_year = 2001 +GROUP BY + i_item_id, i_item_desc, s_store_id, s_store_name +ORDER BY + i_item_id, i_item_desc, s_store_id, s_store_name +LIMIT 100 \ No newline at end of file diff --git a/src/test/resources/tpcds/q26.sql b/src/test/resources/tpcds/q26.sql new file mode 100644 index 000000000..6d395a1d7 --- /dev/null +++ b/src/test/resources/tpcds/q26.sql @@ -0,0 +1,19 @@ +SELECT + i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 +FROM catalog_sales, customer_demographics, date_dim, item, promotion +WHERE cs_sold_date_sk = d_date_sk AND + cs_item_sk = i_item_sk AND + cs_bill_cdemo_sk = cd_demo_sk AND + cs_promo_sk = p_promo_sk AND + cd_gender = 'M' AND + cd_marital_status = 'S' AND + cd_education_status = 'College' AND + (p_channel_email = 'N' OR p_channel_event = 'N') AND + d_year = 2000 +GROUP BY i_item_id +ORDER BY i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q27.sql b/src/test/resources/tpcds/q27.sql new file mode 100644 index 000000000..b0e2fd95f --- /dev/null +++ b/src/test/resources/tpcds/q27.sql @@ -0,0 +1,21 @@ +SELECT + i_item_id, + s_state, + grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 +FROM store_sales, customer_demographics, date_dim, store, item +WHERE ss_sold_date_sk = d_date_sk AND + ss_item_sk = i_item_sk AND + ss_store_sk = s_store_sk AND + ss_cdemo_sk = cd_demo_sk AND + cd_gender = 'M' AND + cd_marital_status = 'S' AND + cd_education_status = 'College' AND + d_year = 2002 AND + s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN') +GROUP BY ROLLUP (i_item_id, s_state) +ORDER BY i_item_id, s_state +LIMIT 100 diff --git a/src/test/resources/tpcds/q28.sql b/src/test/resources/tpcds/q28.sql new file mode 100644 index 000000000..f34c2bb0e --- /dev/null +++ b/src/test/resources/tpcds/q28.sql @@ -0,0 +1,56 @@ +SELECT * +FROM (SELECT + avg(ss_list_price) B1_LP, + count(ss_list_price) B1_CNT, + count(DISTINCT ss_list_price) B1_CNTD +FROM store_sales +WHERE ss_quantity BETWEEN 0 AND 5 + AND (ss_list_price BETWEEN 8 AND 8 + 10 + OR ss_coupon_amt BETWEEN 459 AND 459 + 1000 + OR ss_wholesale_cost BETWEEN 57 AND 57 + 20)) B1, + (SELECT + avg(ss_list_price) B2_LP, + count(ss_list_price) B2_CNT, + count(DISTINCT ss_list_price) B2_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 6 AND 10 + AND (ss_list_price BETWEEN 90 AND 90 + 10 + OR ss_coupon_amt BETWEEN 2323 AND 2323 + 1000 + OR ss_wholesale_cost BETWEEN 31 AND 31 + 20)) B2, + (SELECT + avg(ss_list_price) B3_LP, + count(ss_list_price) B3_CNT, + count(DISTINCT ss_list_price) B3_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 11 AND 15 + AND (ss_list_price BETWEEN 142 AND 142 + 10 + OR ss_coupon_amt BETWEEN 12214 AND 12214 + 1000 + OR ss_wholesale_cost BETWEEN 79 AND 79 + 20)) B3, + (SELECT + avg(ss_list_price) B4_LP, + count(ss_list_price) B4_CNT, + count(DISTINCT ss_list_price) B4_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 16 AND 20 + AND (ss_list_price BETWEEN 135 AND 135 + 10 + OR ss_coupon_amt BETWEEN 6071 AND 6071 + 1000 + OR ss_wholesale_cost BETWEEN 38 AND 38 + 20)) B4, + (SELECT + avg(ss_list_price) B5_LP, + count(ss_list_price) B5_CNT, + count(DISTINCT ss_list_price) B5_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 25 + AND (ss_list_price BETWEEN 122 AND 122 + 10 + OR ss_coupon_amt BETWEEN 836 AND 836 + 1000 + OR ss_wholesale_cost BETWEEN 17 AND 17 + 20)) B5, + (SELECT + avg(ss_list_price) B6_LP, + count(ss_list_price) B6_CNT, + count(DISTINCT ss_list_price) B6_CNTD + FROM store_sales + WHERE ss_quantity BETWEEN 26 AND 30 + AND (ss_list_price BETWEEN 154 AND 154 + 10 + OR ss_coupon_amt BETWEEN 7326 AND 7326 + 1000 + OR ss_wholesale_cost BETWEEN 7 AND 7 + 20)) B6 +LIMIT 100 diff --git a/src/test/resources/tpcds/q29.sql b/src/test/resources/tpcds/q29.sql new file mode 100644 index 000000000..3f1fd553f --- /dev/null +++ b/src/test/resources/tpcds/q29.sql @@ -0,0 +1,32 @@ +SELECT + i_item_id, + i_item_desc, + s_store_id, + s_store_name, + sum(ss_quantity) AS store_sales_quantity, + sum(sr_return_quantity) AS store_returns_quantity, + sum(cs_quantity) AS catalog_sales_quantity +FROM + store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, + date_dim d3, store, item +WHERE + d1.d_moy = 9 + AND d1.d_year = 1999 + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND ss_customer_sk = sr_customer_sk + AND ss_item_sk = sr_item_sk + AND ss_ticket_number = sr_ticket_number + AND sr_returned_date_sk = d2.d_date_sk + AND d2.d_moy BETWEEN 9 AND 9 + 3 + AND d2.d_year = 1999 + AND sr_customer_sk = cs_bill_customer_sk + AND sr_item_sk = cs_item_sk + AND cs_sold_date_sk = d3.d_date_sk + AND d3.d_year IN (1999, 1999 + 1, 1999 + 2) +GROUP BY + i_item_id, i_item_desc, s_store_id, s_store_name +ORDER BY + i_item_id, i_item_desc, s_store_id, s_store_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q3.sql b/src/test/resources/tpcds/q3.sql new file mode 100644 index 000000000..181509df9 --- /dev/null +++ b/src/test/resources/tpcds/q3.sql @@ -0,0 +1,13 @@ +SELECT + dt.d_year, + item.i_brand_id brand_id, + item.i_brand brand, + SUM(ss_ext_sales_price) sum_agg +FROM date_dim dt, store_sales, item +WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + AND store_sales.ss_item_sk = item.i_item_sk + AND item.i_manufact_id = 128 + AND dt.d_moy = 11 +GROUP BY dt.d_year, item.i_brand, item.i_brand_id +ORDER BY dt.d_year, sum_agg DESC, brand_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q30.sql b/src/test/resources/tpcds/q30.sql new file mode 100644 index 000000000..986bef566 --- /dev/null +++ b/src/test/resources/tpcds/q30.sql @@ -0,0 +1,35 @@ +WITH customer_total_return AS +(SELECT + wr_returning_customer_sk AS ctr_customer_sk, + ca_state AS ctr_state, + sum(wr_return_amt) AS ctr_total_return + FROM web_returns, date_dim, customer_address + WHERE wr_returned_date_sk = d_date_sk + AND d_year = 2002 + AND wr_returning_addr_sk = ca_address_sk + GROUP BY wr_returning_customer_sk, ca_state) +SELECT + c_customer_id, + c_salutation, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_day, + c_birth_month, + c_birth_year, + c_birth_country, + c_login, + c_email_address, + c_last_review_date, + ctr_total_return +FROM customer_total_return ctr1, customer_address, customer +WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 +FROM customer_total_return ctr2 +WHERE ctr1.ctr_state = ctr2.ctr_state) + AND ca_address_sk = c_current_addr_sk + AND ca_state = 'GA' + AND ctr1.ctr_customer_sk = c_customer_sk +ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag + , c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address + , c_last_review_date, ctr_total_return +LIMIT 100 diff --git a/src/test/resources/tpcds/q31.sql b/src/test/resources/tpcds/q31.sql new file mode 100644 index 000000000..3e543d543 --- /dev/null +++ b/src/test/resources/tpcds/q31.sql @@ -0,0 +1,60 @@ +WITH ss AS +(SELECT + ca_county, + d_qoy, + d_year, + sum(ss_ext_sales_price) AS store_sales + FROM store_sales, date_dim, customer_address + WHERE ss_sold_date_sk = d_date_sk + AND ss_addr_sk = ca_address_sk + GROUP BY ca_county, d_qoy, d_year), + ws AS + (SELECT + ca_county, + d_qoy, + d_year, + sum(ws_ext_sales_price) AS web_sales + FROM web_sales, date_dim, customer_address + WHERE ws_sold_date_sk = d_date_sk + AND ws_bill_addr_sk = ca_address_sk + GROUP BY ca_county, d_qoy, d_year) +SELECT + ss1.ca_county, + ss1.d_year, + ws2.web_sales / ws1.web_sales web_q1_q2_increase, + ss2.store_sales / ss1.store_sales store_q1_q2_increase, + ws3.web_sales / ws2.web_sales web_q2_q3_increase, + ss3.store_sales / ss2.store_sales store_q2_q3_increase +FROM + ss ss1, ss ss2, ss ss3, ws ws1, ws ws2, ws ws3 +WHERE + ss1.d_qoy = 1 + AND ss1.d_year = 2000 + AND ss1.ca_county = ss2.ca_county + AND ss2.d_qoy = 2 + AND ss2.d_year = 2000 + AND ss2.ca_county = ss3.ca_county + AND ss3.d_qoy = 3 + AND ss3.d_year = 2000 + AND ss1.ca_county = ws1.ca_county + AND ws1.d_qoy = 1 + AND ws1.d_year = 2000 + AND ws1.ca_county = ws2.ca_county + AND ws2.d_qoy = 2 + AND ws2.d_year = 2000 + AND ws1.ca_county = ws3.ca_county + AND ws3.d_qoy = 3 + AND ws3.d_year = 2000 + AND CASE WHEN ws1.web_sales > 0 + THEN ws2.web_sales / ws1.web_sales + ELSE NULL END + > CASE WHEN ss1.store_sales > 0 + THEN ss2.store_sales / ss1.store_sales + ELSE NULL END + AND CASE WHEN ws2.web_sales > 0 + THEN ws3.web_sales / ws2.web_sales + ELSE NULL END + > CASE WHEN ss2.store_sales > 0 + THEN ss3.store_sales / ss2.store_sales + ELSE NULL END +ORDER BY ss1.ca_county diff --git a/src/test/resources/tpcds/q32.sql b/src/test/resources/tpcds/q32.sql new file mode 100644 index 000000000..a6f59ecb8 --- /dev/null +++ b/src/test/resources/tpcds/q32.sql @@ -0,0 +1,15 @@ +SELECT 1 AS `excess discount amount ` +FROM + catalog_sales, item, date_dim +WHERE + i_manufact_id = 977 + AND i_item_sk = cs_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) + AND d_date_sk = cs_sold_date_sk + AND cs_ext_discount_amt > ( + SELECT 1.3 * avg(cs_ext_discount_amt) + FROM catalog_sales, date_dim + WHERE cs_item_sk = i_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) + AND d_date_sk = cs_sold_date_sk) +LIMIT 100 diff --git a/src/test/resources/tpcds/q33.sql b/src/test/resources/tpcds/q33.sql new file mode 100644 index 000000000..d24856aa5 --- /dev/null +++ b/src/test/resources/tpcds/q33.sql @@ -0,0 +1,65 @@ +WITH ss AS ( + SELECT + i_manufact_id, + sum(ss_ext_sales_price) total_sales + FROM + store_sales, date_dim, customer_address, item + WHERE + i_manufact_id IN (SELECT i_manufact_id + FROM item + WHERE i_category IN ('Electronics')) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 5 + AND ss_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_manufact_id), cs AS +(SELECT + i_manufact_id, + sum(cs_ext_sales_price) total_sales + FROM catalog_sales, date_dim, customer_address, item + WHERE + i_manufact_id IN ( + SELECT i_manufact_id + FROM item + WHERE + i_category IN ('Electronics')) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 5 + AND cs_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_manufact_id), + ws AS ( + SELECT + i_manufact_id, + sum(ws_ext_sales_price) total_sales + FROM + web_sales, date_dim, customer_address, item + WHERE + i_manufact_id IN (SELECT i_manufact_id + FROM item + WHERE i_category IN ('Electronics')) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 5 + AND ws_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_manufact_id) +SELECT + i_manufact_id, + sum(total_sales) total_sales +FROM (SELECT * + FROM ss + UNION ALL + SELECT * + FROM cs + UNION ALL + SELECT * + FROM ws) tmp1 +GROUP BY i_manufact_id +ORDER BY total_sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q34.sql b/src/test/resources/tpcds/q34.sql new file mode 100644 index 000000000..33396bf16 --- /dev/null +++ b/src/test/resources/tpcds/q34.sql @@ -0,0 +1,32 @@ +SELECT + c_last_name, + c_first_name, + c_salutation, + c_preferred_cust_flag, + ss_ticket_number, + cnt +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + count(*) cnt + FROM store_sales, date_dim, store, household_demographics + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND (date_dim.d_dom BETWEEN 1 AND 3 OR date_dim.d_dom BETWEEN 25 AND 28) + AND (household_demographics.hd_buy_potential = '>10000' OR + household_demographics.hd_buy_potential = 'unknown') + AND household_demographics.hd_vehicle_count > 0 + AND (CASE WHEN household_demographics.hd_vehicle_count > 0 + THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count + ELSE NULL + END) > 1.2 + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_county IN + ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', + 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') + GROUP BY ss_ticket_number, ss_customer_sk) dn, customer +WHERE ss_customer_sk = c_customer_sk + AND cnt BETWEEN 15 AND 20 +ORDER BY c_last_name, c_first_name, c_salutation, c_preferred_cust_flag DESC diff --git a/src/test/resources/tpcds/q35.sql b/src/test/resources/tpcds/q35.sql new file mode 100644 index 000000000..cfe4342d8 --- /dev/null +++ b/src/test/resources/tpcds/q35.sql @@ -0,0 +1,46 @@ +SELECT + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + min(cd_dep_count), + max(cd_dep_count), + avg(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + min(cd_dep_employed_count), + max(cd_dep_employed_count), + avg(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + min(cd_dep_college_count), + max(cd_dep_college_count), + avg(cd_dep_college_count) +FROM + customer c, customer_address ca, customer_demographics +WHERE + c.c_current_addr_sk = ca.ca_address_sk AND + cd_demo_sk = c.c_current_cdemo_sk AND + exists(SELECT * + FROM store_sales, date_dim + WHERE c.c_customer_sk = ss_customer_sk AND + ss_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_qoy < 4) AND + (exists(SELECT * + FROM web_sales, date_dim + WHERE c.c_customer_sk = ws_bill_customer_sk AND + ws_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_qoy < 4) OR + exists(SELECT * + FROM catalog_sales, date_dim + WHERE c.c_customer_sk = cs_ship_customer_sk AND + cs_sold_date_sk = d_date_sk AND + d_year = 2002 AND + d_qoy < 4)) +GROUP BY ca_state, cd_gender, cd_marital_status, cd_dep_count, + cd_dep_employed_count, cd_dep_college_count +ORDER BY ca_state, cd_gender, cd_marital_status, cd_dep_count, + cd_dep_employed_count, cd_dep_college_count +LIMIT 100 diff --git a/src/test/resources/tpcds/q36.sql b/src/test/resources/tpcds/q36.sql new file mode 100644 index 000000000..a8f93df76 --- /dev/null +++ b/src/test/resources/tpcds/q36.sql @@ -0,0 +1,26 @@ +SELECT + sum(ss_net_profit) / sum(ss_ext_sales_price) AS gross_margin, + i_category, + i_class, + grouping(i_category) + grouping(i_class) AS lochierarchy, + rank() + OVER ( + PARTITION BY grouping(i_category) + grouping(i_class), + CASE WHEN grouping(i_class) = 0 + THEN i_category END + ORDER BY sum(ss_net_profit) / sum(ss_ext_sales_price) ASC) AS rank_within_parent +FROM + store_sales, date_dim d1, item, store +WHERE + d1.d_year = 2001 + AND d1.d_date_sk = ss_sold_date_sk + AND i_item_sk = ss_item_sk + AND s_store_sk = ss_store_sk + AND s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN') +GROUP BY ROLLUP (i_category, i_class) +ORDER BY + lochierarchy DESC + , CASE WHEN lochierarchy = 0 + THEN i_category END + , rank_within_parent +LIMIT 100 diff --git a/src/test/resources/tpcds/q37.sql b/src/test/resources/tpcds/q37.sql new file mode 100644 index 000000000..11b3821fa --- /dev/null +++ b/src/test/resources/tpcds/q37.sql @@ -0,0 +1,15 @@ +SELECT + i_item_id, + i_item_desc, + i_current_price +FROM item, inventory, date_dim, catalog_sales +WHERE i_current_price BETWEEN 68 AND 68 + 30 + AND inv_item_sk = i_item_sk + AND d_date_sk = inv_date_sk + AND d_date BETWEEN cast('2000-02-01' AS DATE) AND (cast('2000-02-01' AS DATE) + INTERVAL 60 days) + AND i_manufact_id IN (677, 940, 694, 808) + AND inv_quantity_on_hand BETWEEN 100 AND 500 + AND cs_item_sk = i_item_sk +GROUP BY i_item_id, i_item_desc, i_current_price +ORDER BY i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q38.sql b/src/test/resources/tpcds/q38.sql new file mode 100644 index 000000000..1c8d53ee2 --- /dev/null +++ b/src/test/resources/tpcds/q38.sql @@ -0,0 +1,30 @@ +SELECT count(*) +FROM ( + SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM store_sales, date_dim, customer + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + INTERSECT + SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM catalog_sales, date_dim, customer + WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + INTERSECT + SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM web_sales, date_dim, customer + WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk + AND web_sales.ws_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + ) hot_cust +LIMIT 100 diff --git a/src/test/resources/tpcds/q39a.sql b/src/test/resources/tpcds/q39a.sql new file mode 100644 index 000000000..9fc4c1701 --- /dev/null +++ b/src/test/resources/tpcds/q39a.sql @@ -0,0 +1,47 @@ +WITH inv AS +(SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stdev, + mean, + CASE mean + WHEN 0 + THEN NULL + ELSE stdev / mean END cov + FROM (SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stddev_samp(inv_quantity_on_hand) stdev, + avg(inv_quantity_on_hand) mean + FROM inventory, item, warehouse, date_dim + WHERE inv_item_sk = i_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND inv_date_sk = d_date_sk + AND d_year = 2001 + GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo + WHERE CASE mean + WHEN 0 + THEN 0 + ELSE stdev / mean END > 1) +SELECT + inv1.w_warehouse_sk, + inv1.i_item_sk, + inv1.d_moy, + inv1.mean, + inv1.cov, + inv2.w_warehouse_sk, + inv2.i_item_sk, + inv2.d_moy, + inv2.mean, + inv2.cov +FROM inv inv1, inv inv2 +WHERE inv1.i_item_sk = inv2.i_item_sk + AND inv1.w_warehouse_sk = inv2.w_warehouse_sk + AND inv1.d_moy = 1 + AND inv2.d_moy = 1 + 1 +ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov + , inv2.d_moy, inv2.mean, inv2.cov diff --git a/src/test/resources/tpcds/q39b.sql b/src/test/resources/tpcds/q39b.sql new file mode 100644 index 000000000..6f8493029 --- /dev/null +++ b/src/test/resources/tpcds/q39b.sql @@ -0,0 +1,48 @@ +WITH inv AS +(SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stdev, + mean, + CASE mean + WHEN 0 + THEN NULL + ELSE stdev / mean END cov + FROM (SELECT + w_warehouse_name, + w_warehouse_sk, + i_item_sk, + d_moy, + stddev_samp(inv_quantity_on_hand) stdev, + avg(inv_quantity_on_hand) mean + FROM inventory, item, warehouse, date_dim + WHERE inv_item_sk = i_item_sk + AND inv_warehouse_sk = w_warehouse_sk + AND inv_date_sk = d_date_sk + AND d_year = 2001 + GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo + WHERE CASE mean + WHEN 0 + THEN 0 + ELSE stdev / mean END > 1) +SELECT + inv1.w_warehouse_sk, + inv1.i_item_sk, + inv1.d_moy, + inv1.mean, + inv1.cov, + inv2.w_warehouse_sk, + inv2.i_item_sk, + inv2.d_moy, + inv2.mean, + inv2.cov +FROM inv inv1, inv inv2 +WHERE inv1.i_item_sk = inv2.i_item_sk + AND inv1.w_warehouse_sk = inv2.w_warehouse_sk + AND inv1.d_moy = 1 + AND inv2.d_moy = 1 + 1 + AND inv1.cov > 1.5 +ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov + , inv2.d_moy, inv2.mean, inv2.cov diff --git a/src/test/resources/tpcds/q4.sql b/src/test/resources/tpcds/q4.sql new file mode 100644 index 000000000..b9f27fbc9 --- /dev/null +++ b/src/test/resources/tpcds/q4.sql @@ -0,0 +1,120 @@ +WITH year_total AS ( + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum(((ss_ext_list_price - ss_ext_wholesale_cost - ss_ext_discount_amt) + + ss_ext_sales_price) / 2) year_total, + 's' sale_type + FROM customer, store_sales, date_dim + WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk + GROUP BY c_customer_id, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_country, + c_login, + c_email_address, + d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum((((cs_ext_list_price - cs_ext_wholesale_cost - cs_ext_discount_amt) + + cs_ext_sales_price) / 2)) year_total, + 'c' sale_type + FROM customer, catalog_sales, date_dim + WHERE c_customer_sk = cs_bill_customer_sk AND cs_sold_date_sk = d_date_sk + GROUP BY c_customer_id, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_country, + c_login, + c_email_address, + d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + c_preferred_cust_flag customer_preferred_cust_flag, + c_birth_country customer_birth_country, + c_login customer_login, + c_email_address customer_email_address, + d_year dyear, + sum((((ws_ext_list_price - ws_ext_wholesale_cost - ws_ext_discount_amt) + ws_ext_sales_price) / + 2)) year_total, + 'w' sale_type + FROM customer, web_sales, date_dim + WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk + GROUP BY c_customer_id, + c_first_name, + c_last_name, + c_preferred_cust_flag, + c_birth_country, + c_login, + c_email_address, + d_year) +SELECT + t_s_secyear.customer_id, + t_s_secyear.customer_first_name, + t_s_secyear.customer_last_name, + t_s_secyear.customer_preferred_cust_flag, + t_s_secyear.customer_birth_country, + t_s_secyear.customer_login, + t_s_secyear.customer_email_address +FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_c_firstyear, + year_total t_c_secyear, year_total t_w_firstyear, year_total t_w_secyear +WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id + AND t_s_firstyear.customer_id = t_c_secyear.customer_id + AND t_s_firstyear.customer_id = t_c_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_secyear.customer_id + AND t_s_firstyear.sale_type = 's' + AND t_c_firstyear.sale_type = 'c' + AND t_w_firstyear.sale_type = 'w' + AND t_s_secyear.sale_type = 's' + AND t_c_secyear.sale_type = 'c' + AND t_w_secyear.sale_type = 'w' + AND t_s_firstyear.dyear = 2001 + AND t_s_secyear.dyear = 2001 + 1 + AND t_c_firstyear.dyear = 2001 + AND t_c_secyear.dyear = 2001 + 1 + AND t_w_firstyear.dyear = 2001 + AND t_w_secyear.dyear = 2001 + 1 + AND t_s_firstyear.year_total > 0 + AND t_c_firstyear.year_total > 0 + AND t_w_firstyear.year_total > 0 + AND CASE WHEN t_c_firstyear.year_total > 0 + THEN t_c_secyear.year_total / t_c_firstyear.year_total + ELSE NULL END + > CASE WHEN t_s_firstyear.year_total > 0 + THEN t_s_secyear.year_total / t_s_firstyear.year_total + ELSE NULL END + AND CASE WHEN t_c_firstyear.year_total > 0 + THEN t_c_secyear.year_total / t_c_firstyear.year_total + ELSE NULL END + > CASE WHEN t_w_firstyear.year_total > 0 + THEN t_w_secyear.year_total / t_w_firstyear.year_total + ELSE NULL END +ORDER BY + t_s_secyear.customer_id, + t_s_secyear.customer_first_name, + t_s_secyear.customer_last_name, + t_s_secyear.customer_preferred_cust_flag, + t_s_secyear.customer_birth_country, + t_s_secyear.customer_login, + t_s_secyear.customer_email_address +LIMIT 100 diff --git a/src/test/resources/tpcds/q40.sql b/src/test/resources/tpcds/q40.sql new file mode 100644 index 000000000..66d8b73ac --- /dev/null +++ b/src/test/resources/tpcds/q40.sql @@ -0,0 +1,25 @@ +SELECT + w_state, + i_item_id, + sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) + THEN cs_sales_price - coalesce(cr_refunded_cash, 0) + ELSE 0 END) AS sales_before, + sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) + THEN cs_sales_price - coalesce(cr_refunded_cash, 0) + ELSE 0 END) AS sales_after +FROM + catalog_sales + LEFT OUTER JOIN catalog_returns ON + (cs_order_number = cr_order_number + AND cs_item_sk = cr_item_sk) + , warehouse, item, date_dim +WHERE + i_current_price BETWEEN 0.99 AND 1.49 + AND i_item_sk = cs_item_sk + AND cs_warehouse_sk = w_warehouse_sk + AND cs_sold_date_sk = d_date_sk + AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) + AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) +GROUP BY w_state, i_item_id +ORDER BY w_state, i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q41.sql b/src/test/resources/tpcds/q41.sql new file mode 100644 index 000000000..25e317e0e --- /dev/null +++ b/src/test/resources/tpcds/q41.sql @@ -0,0 +1,49 @@ +SELECT DISTINCT (i_product_name) +FROM item i1 +WHERE i_manufact_id BETWEEN 738 AND 738 + 40 + AND (SELECT count(*) AS item_cnt +FROM item +WHERE (i_manufact = i1.i_manufact AND + ((i_category = 'Women' AND + (i_color = 'powder' OR i_color = 'khaki') AND + (i_units = 'Ounce' OR i_units = 'Oz') AND + (i_size = 'medium' OR i_size = 'extra large') + ) OR + (i_category = 'Women' AND + (i_color = 'brown' OR i_color = 'honeydew') AND + (i_units = 'Bunch' OR i_units = 'Ton') AND + (i_size = 'N/A' OR i_size = 'small') + ) OR + (i_category = 'Men' AND + (i_color = 'floral' OR i_color = 'deep') AND + (i_units = 'N/A' OR i_units = 'Dozen') AND + (i_size = 'petite' OR i_size = 'large') + ) OR + (i_category = 'Men' AND + (i_color = 'light' OR i_color = 'cornflower') AND + (i_units = 'Box' OR i_units = 'Pound') AND + (i_size = 'medium' OR i_size = 'extra large') + ))) OR + (i_manufact = i1.i_manufact AND + ((i_category = 'Women' AND + (i_color = 'midnight' OR i_color = 'snow') AND + (i_units = 'Pallet' OR i_units = 'Gross') AND + (i_size = 'medium' OR i_size = 'extra large') + ) OR + (i_category = 'Women' AND + (i_color = 'cyan' OR i_color = 'papaya') AND + (i_units = 'Cup' OR i_units = 'Dram') AND + (i_size = 'N/A' OR i_size = 'small') + ) OR + (i_category = 'Men' AND + (i_color = 'orange' OR i_color = 'frosted') AND + (i_units = 'Each' OR i_units = 'Tbl') AND + (i_size = 'petite' OR i_size = 'large') + ) OR + (i_category = 'Men' AND + (i_color = 'forest' OR i_color = 'ghost') AND + (i_units = 'Lb' OR i_units = 'Bundle') AND + (i_size = 'medium' OR i_size = 'extra large') + )))) > 0 +ORDER BY i_product_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q42.sql b/src/test/resources/tpcds/q42.sql new file mode 100644 index 000000000..4d2e71760 --- /dev/null +++ b/src/test/resources/tpcds/q42.sql @@ -0,0 +1,18 @@ +SELECT + dt.d_year, + item.i_category_id, + item.i_category, + sum(ss_ext_sales_price) +FROM date_dim dt, store_sales, item +WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + AND store_sales.ss_item_sk = item.i_item_sk + AND item.i_manager_id = 1 + AND dt.d_moy = 11 + AND dt.d_year = 2000 +GROUP BY dt.d_year + , item.i_category_id + , item.i_category +ORDER BY sum(ss_ext_sales_price) DESC, dt.d_year + , item.i_category_id + , item.i_category +LIMIT 100 diff --git a/src/test/resources/tpcds/q43.sql b/src/test/resources/tpcds/q43.sql new file mode 100644 index 000000000..45411772c --- /dev/null +++ b/src/test/resources/tpcds/q43.sql @@ -0,0 +1,33 @@ +SELECT + s_store_name, + s_store_id, + sum(CASE WHEN (d_day_name = 'Sunday') + THEN ss_sales_price + ELSE NULL END) sun_sales, + sum(CASE WHEN (d_day_name = 'Monday') + THEN ss_sales_price + ELSE NULL END) mon_sales, + sum(CASE WHEN (d_day_name = 'Tuesday') + THEN ss_sales_price + ELSE NULL END) tue_sales, + sum(CASE WHEN (d_day_name = 'Wednesday') + THEN ss_sales_price + ELSE NULL END) wed_sales, + sum(CASE WHEN (d_day_name = 'Thursday') + THEN ss_sales_price + ELSE NULL END) thu_sales, + sum(CASE WHEN (d_day_name = 'Friday') + THEN ss_sales_price + ELSE NULL END) fri_sales, + sum(CASE WHEN (d_day_name = 'Saturday') + THEN ss_sales_price + ELSE NULL END) sat_sales +FROM date_dim, store_sales, store +WHERE d_date_sk = ss_sold_date_sk AND + s_store_sk = ss_store_sk AND + s_gmt_offset = -5 AND + d_year = 2000 +GROUP BY s_store_name, s_store_id +ORDER BY s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales, + thu_sales, fri_sales, sat_sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q44.sql b/src/test/resources/tpcds/q44.sql new file mode 100644 index 000000000..379e60478 --- /dev/null +++ b/src/test/resources/tpcds/q44.sql @@ -0,0 +1,46 @@ +SELECT + asceding.rnk, + i1.i_product_name best_performing, + i2.i_product_name worst_performing +FROM (SELECT * +FROM (SELECT + item_sk, + rank() + OVER ( + ORDER BY rank_col ASC) rnk +FROM (SELECT + ss_item_sk item_sk, + avg(ss_net_profit) rank_col +FROM store_sales ss1 +WHERE ss_store_sk = 4 +GROUP BY ss_item_sk +HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col +FROM store_sales +WHERE ss_store_sk = 4 + AND ss_addr_sk IS NULL +GROUP BY ss_store_sk)) V1) V11 +WHERE rnk < 11) asceding, + (SELECT * + FROM (SELECT + item_sk, + rank() + OVER ( + ORDER BY rank_col DESC) rnk + FROM (SELECT + ss_item_sk item_sk, + avg(ss_net_profit) rank_col + FROM store_sales ss1 + WHERE ss_store_sk = 4 + GROUP BY ss_item_sk + HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col + FROM store_sales + WHERE ss_store_sk = 4 + AND ss_addr_sk IS NULL + GROUP BY ss_store_sk)) V2) V21 + WHERE rnk < 11) descending, + item i1, item i2 +WHERE asceding.rnk = descending.rnk + AND i1.i_item_sk = asceding.item_sk + AND i2.i_item_sk = descending.item_sk +ORDER BY asceding.rnk +LIMIT 100 diff --git a/src/test/resources/tpcds/q45.sql b/src/test/resources/tpcds/q45.sql new file mode 100644 index 000000000..907438f19 --- /dev/null +++ b/src/test/resources/tpcds/q45.sql @@ -0,0 +1,21 @@ +SELECT + ca_zip, + ca_city, + sum(ws_sales_price) +FROM web_sales, customer, customer_address, date_dim, item +WHERE ws_bill_customer_sk = c_customer_sk + AND c_current_addr_sk = ca_address_sk + AND ws_item_sk = i_item_sk + AND (substr(ca_zip, 1, 5) IN + ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') + OR + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) +) + AND ws_sold_date_sk = d_date_sk + AND d_qoy = 2 AND d_year = 2001 +GROUP BY ca_zip, ca_city +ORDER BY ca_zip, ca_city +LIMIT 100 diff --git a/src/test/resources/tpcds/q46.sql b/src/test/resources/tpcds/q46.sql new file mode 100644 index 000000000..0911677df --- /dev/null +++ b/src/test/resources/tpcds/q46.sql @@ -0,0 +1,32 @@ +SELECT + c_last_name, + c_first_name, + ca_city, + bought_city, + ss_ticket_number, + amt, + profit +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + ca_city bought_city, + sum(ss_coupon_amt) amt, + sum(ss_net_profit) profit + FROM store_sales, date_dim, store, household_demographics, customer_address + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND store_sales.ss_addr_sk = customer_address.ca_address_sk + AND (household_demographics.hd_dep_count = 4 OR + household_demographics.hd_vehicle_count = 3) + AND date_dim.d_dow IN (6, 0) + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_city IN ('Fairview', 'Midway', 'Fairview', 'Fairview', 'Fairview') + GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, customer, + customer_address current_addr +WHERE ss_customer_sk = c_customer_sk + AND customer.c_current_addr_sk = current_addr.ca_address_sk + AND current_addr.ca_city <> bought_city +ORDER BY c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number +LIMIT 100 diff --git a/src/test/resources/tpcds/q47.sql b/src/test/resources/tpcds/q47.sql new file mode 100644 index 000000000..cfc37a4ce --- /dev/null +++ b/src/test/resources/tpcds/q47.sql @@ -0,0 +1,63 @@ +WITH v1 AS ( + SELECT + i_category, + i_brand, + s_store_name, + s_company_name, + d_year, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER + (PARTITION BY i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() + OVER + (PARTITION BY i_category, i_brand, + s_store_name, s_company_name + ORDER BY d_year, d_moy) rn + FROM item, store_sales, date_dim, store + WHERE ss_item_sk = i_item_sk AND + ss_sold_date_sk = d_date_sk AND + ss_store_sk = s_store_sk AND + ( + d_year = 1999 OR + (d_year = 1999 - 1 AND d_moy = 12) OR + (d_year = 1999 + 1 AND d_moy = 1) + ) + GROUP BY i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 AS ( + SELECT + v1.i_category, + v1.i_brand, + v1.s_store_name, + v1.s_company_name, + v1.d_year, + v1.d_moy, + v1.avg_monthly_sales, + v1.sum_sales, + v1_lag.sum_sales psum, + v1_lead.sum_sales nsum + FROM v1, v1 v1_lag, v1 v1_lead + WHERE v1.i_category = v1_lag.i_category AND + v1.i_category = v1_lead.i_category AND + v1.i_brand = v1_lag.i_brand AND + v1.i_brand = v1_lead.i_brand AND + v1.s_store_name = v1_lag.s_store_name AND + v1.s_store_name = v1_lead.s_store_name AND + v1.s_company_name = v1_lag.s_company_name AND + v1.s_company_name = v1_lead.s_company_name AND + v1.rn = v1_lag.rn + 1 AND + v1.rn = v1_lead.rn - 1) +SELECT * +FROM v2 +WHERE d_year = 1999 AND + avg_monthly_sales > 0 AND + CASE WHEN avg_monthly_sales > 0 + THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales + ELSE NULL END > 0.1 +ORDER BY sum_sales - avg_monthly_sales, 3 +LIMIT 100 diff --git a/src/test/resources/tpcds/q48.sql b/src/test/resources/tpcds/q48.sql new file mode 100644 index 000000000..fdb9f38e2 --- /dev/null +++ b/src/test/resources/tpcds/q48.sql @@ -0,0 +1,63 @@ +SELECT sum(ss_quantity) +FROM store_sales, store, customer_demographics, customer_address, date_dim +WHERE s_store_sk = ss_store_sk + AND ss_sold_date_sk = d_date_sk AND d_year = 2001 + AND + ( + ( + cd_demo_sk = ss_cdemo_sk + AND + cd_marital_status = 'M' + AND + cd_education_status = '4 yr Degree' + AND + ss_sales_price BETWEEN 100.00 AND 150.00 + ) + OR + ( + cd_demo_sk = ss_cdemo_sk + AND + cd_marital_status = 'D' + AND + cd_education_status = '2 yr Degree' + AND + ss_sales_price BETWEEN 50.00 AND 100.00 + ) + OR + ( + cd_demo_sk = ss_cdemo_sk + AND + cd_marital_status = 'S' + AND + cd_education_status = 'College' + AND + ss_sales_price BETWEEN 150.00 AND 200.00 + ) + ) + AND + ( + ( + ss_addr_sk = ca_address_sk + AND + ca_country = 'United States' + AND + ca_state IN ('CO', 'OH', 'TX') + AND ss_net_profit BETWEEN 0 AND 2000 + ) + OR + (ss_addr_sk = ca_address_sk + AND + ca_country = 'United States' + AND + ca_state IN ('OR', 'MN', 'KY') + AND ss_net_profit BETWEEN 150 AND 3000 + ) + OR + (ss_addr_sk = ca_address_sk + AND + ca_country = 'United States' + AND + ca_state IN ('VA', 'CA', 'MS') + AND ss_net_profit BETWEEN 50 AND 25000 + ) + ) diff --git a/src/test/resources/tpcds/q49.sql b/src/test/resources/tpcds/q49.sql new file mode 100644 index 000000000..9568d8b92 --- /dev/null +++ b/src/test/resources/tpcds/q49.sql @@ -0,0 +1,126 @@ +SELECT + 'web' AS channel, + web.item, + web.return_ratio, + web.return_rank, + web.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + ws.ws_item_sk AS item, + (cast(sum(coalesce(wr.wr_return_quantity, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(ws.ws_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, + (cast(sum(coalesce(wr.wr_return_amt, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(ws.ws_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio + FROM + web_sales ws LEFT OUTER JOIN web_returns wr + ON (ws.ws_order_number = wr.wr_order_number AND + ws.ws_item_sk = wr.wr_item_sk) + , date_dim + WHERE + wr.wr_return_amt > 10000 + AND ws.ws_net_profit > 1 + AND ws.ws_net_paid > 0 + AND ws.ws_quantity > 0 + AND ws_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY ws.ws_item_sk + ) in_web + ) web +WHERE (web.return_rank <= 10 OR web.currency_rank <= 10) +UNION +SELECT + 'catalog' AS channel, + catalog.item, + catalog.return_ratio, + catalog.return_rank, + catalog.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + cs.cs_item_sk AS item, + (cast(sum(coalesce(cr.cr_return_quantity, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(cs.cs_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, + (cast(sum(coalesce(cr.cr_return_amount, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(cs.cs_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio + FROM + catalog_sales cs LEFT OUTER JOIN catalog_returns cr + ON (cs.cs_order_number = cr.cr_order_number AND + cs.cs_item_sk = cr.cr_item_sk) + , date_dim + WHERE + cr.cr_return_amount > 10000 + AND cs.cs_net_profit > 1 + AND cs.cs_net_paid > 0 + AND cs.cs_quantity > 0 + AND cs_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY cs.cs_item_sk + ) in_cat + ) catalog +WHERE (catalog.return_rank <= 10 OR catalog.currency_rank <= 10) +UNION +SELECT + 'store' AS channel, + store.item, + store.return_ratio, + store.return_rank, + store.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + sts.ss_item_sk AS item, + (cast(sum(coalesce(sr.sr_return_quantity, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(sts.ss_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, + (cast(sum(coalesce(sr.sr_return_amt, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(sts.ss_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio + FROM + store_sales sts LEFT OUTER JOIN store_returns sr + ON (sts.ss_ticket_number = sr.sr_ticket_number AND sts.ss_item_sk = sr.sr_item_sk) + , date_dim + WHERE + sr.sr_return_amt > 10000 + AND sts.ss_net_profit > 1 + AND sts.ss_net_paid > 0 + AND sts.ss_quantity > 0 + AND ss_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY sts.ss_item_sk + ) in_store + ) store +WHERE (store.return_rank <= 10 OR store.currency_rank <= 10) +ORDER BY 1, 4, 5 +LIMIT 100 diff --git a/src/test/resources/tpcds/q5.sql b/src/test/resources/tpcds/q5.sql new file mode 100644 index 000000000..b87cf3a44 --- /dev/null +++ b/src/test/resources/tpcds/q5.sql @@ -0,0 +1,131 @@ +WITH ssr AS +( SELECT + s_store_id, + sum(sales_price) AS sales, + sum(profit) AS profit, + sum(return_amt) AS RETURNS, + sum(net_loss) AS profit_loss + FROM + (SELECT + ss_store_sk AS store_sk, + ss_sold_date_sk AS date_sk, + ss_ext_sales_price AS sales_price, + ss_net_profit AS profit, + cast(0 AS DECIMAL(7, 2)) AS return_amt, + cast(0 AS DECIMAL(7, 2)) AS net_loss + FROM store_sales + UNION ALL + SELECT + sr_store_sk AS store_sk, + sr_returned_date_sk AS date_sk, + cast(0 AS DECIMAL(7, 2)) AS sales_price, + cast(0 AS DECIMAL(7, 2)) AS profit, + sr_return_amt AS return_amt, + sr_net_loss AS net_loss + FROM store_returns) + salesreturns, date_dim, store + WHERE date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) + AND store_sk = s_store_sk + GROUP BY s_store_id), + csr AS + ( SELECT + cp_catalog_page_id, + sum(sales_price) AS sales, + sum(profit) AS profit, + sum(return_amt) AS RETURNS, + sum(net_loss) AS profit_loss + FROM + (SELECT + cs_catalog_page_sk AS page_sk, + cs_sold_date_sk AS date_sk, + cs_ext_sales_price AS sales_price, + cs_net_profit AS profit, + cast(0 AS DECIMAL(7, 2)) AS return_amt, + cast(0 AS DECIMAL(7, 2)) AS net_loss + FROM catalog_sales + UNION ALL + SELECT + cr_catalog_page_sk AS page_sk, + cr_returned_date_sk AS date_sk, + cast(0 AS DECIMAL(7, 2)) AS sales_price, + cast(0 AS DECIMAL(7, 2)) AS profit, + cr_return_amount AS return_amt, + cr_net_loss AS net_loss + FROM catalog_returns + ) salesreturns, date_dim, catalog_page + WHERE date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) + AND page_sk = cp_catalog_page_sk + GROUP BY cp_catalog_page_id) + , + wsr AS + ( SELECT + web_site_id, + sum(sales_price) AS sales, + sum(profit) AS profit, + sum(return_amt) AS RETURNS, + sum(net_loss) AS profit_loss + FROM + (SELECT + ws_web_site_sk AS wsr_web_site_sk, + ws_sold_date_sk AS date_sk, + ws_ext_sales_price AS sales_price, + ws_net_profit AS profit, + cast(0 AS DECIMAL(7, 2)) AS return_amt, + cast(0 AS DECIMAL(7, 2)) AS net_loss + FROM web_sales + UNION ALL + SELECT + ws_web_site_sk AS wsr_web_site_sk, + wr_returned_date_sk AS date_sk, + cast(0 AS DECIMAL(7, 2)) AS sales_price, + cast(0 AS DECIMAL(7, 2)) AS profit, + wr_return_amt AS return_amt, + wr_net_loss AS net_loss + FROM web_returns + LEFT OUTER JOIN web_sales ON + (wr_item_sk = ws_item_sk + AND wr_order_number = ws_order_number) + ) salesreturns, date_dim, web_site + WHERE date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) + AND wsr_web_site_sk = web_site_sk + GROUP BY web_site_id) +SELECT + channel, + id, + sum(sales) AS sales, + sum(returns) AS returns, + sum(profit) AS profit +FROM + (SELECT + 'store channel' AS channel, + concat('store', s_store_id) AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM ssr + UNION ALL + SELECT + 'catalog channel' AS channel, + concat('catalog_page', cp_catalog_page_id) AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM csr + UNION ALL + SELECT + 'web channel' AS channel, + concat('web_site', web_site_id) AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM wsr + ) x +GROUP BY ROLLUP (channel, id) +ORDER BY channel, id +LIMIT 100 diff --git a/src/test/resources/tpcds/q50.sql b/src/test/resources/tpcds/q50.sql new file mode 100644 index 000000000..f1d4b1544 --- /dev/null +++ b/src/test/resources/tpcds/q50.sql @@ -0,0 +1,47 @@ +SELECT + s_store_name, + s_company_id, + s_street_number, + s_street_name, + s_street_type, + s_suite_number, + s_city, + s_county, + s_state, + s_zip, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk <= 30) + THEN 1 + ELSE 0 END) AS `30 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 30) AND + (sr_returned_date_sk - ss_sold_date_sk <= 60) + THEN 1 + ELSE 0 END) AS `31 - 60 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 60) AND + (sr_returned_date_sk - ss_sold_date_sk <= 90) + THEN 1 + ELSE 0 END) AS `61 - 90 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 90) AND + (sr_returned_date_sk - ss_sold_date_sk <= 120) + THEN 1 + ELSE 0 END) AS `91 - 120 days `, + sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 120) + THEN 1 + ELSE 0 END) AS `>120 days ` +FROM + store_sales, store_returns, store, date_dim d1, date_dim d2 +WHERE + d2.d_year = 2001 + AND d2.d_moy = 8 + AND ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk + AND ss_sold_date_sk = d1.d_date_sk + AND sr_returned_date_sk = d2.d_date_sk + AND ss_customer_sk = sr_customer_sk + AND ss_store_sk = s_store_sk +GROUP BY + s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, + s_suite_number, s_city, s_county, s_state, s_zip +ORDER BY + s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, + s_suite_number, s_city, s_county, s_state, s_zip +LIMIT 100 diff --git a/src/test/resources/tpcds/q51.sql b/src/test/resources/tpcds/q51.sql new file mode 100644 index 000000000..62b003eb6 --- /dev/null +++ b/src/test/resources/tpcds/q51.sql @@ -0,0 +1,55 @@ +WITH web_v1 AS ( + SELECT + ws_item_sk item_sk, + d_date, + sum(sum(ws_sales_price)) + OVER (PARTITION BY ws_item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales + FROM web_sales, date_dim + WHERE ws_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + AND ws_item_sk IS NOT NULL + GROUP BY ws_item_sk, d_date), + store_v1 AS ( + SELECT + ss_item_sk item_sk, + d_date, + sum(sum(ss_sales_price)) + OVER (PARTITION BY ss_item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + AND ss_item_sk IS NOT NULL + GROUP BY ss_item_sk, d_date) +SELECT * +FROM (SELECT + item_sk, + d_date, + web_sales, + store_sales, + max(web_sales) + OVER (PARTITION BY item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) web_cumulative, + max(store_sales) + OVER (PARTITION BY item_sk + ORDER BY d_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) store_cumulative +FROM (SELECT + CASE WHEN web.item_sk IS NOT NULL + THEN web.item_sk + ELSE store.item_sk END item_sk, + CASE WHEN web.d_date IS NOT NULL + THEN web.d_date + ELSE store.d_date END d_date, + web.cume_sales web_sales, + store.cume_sales store_sales +FROM web_v1 web FULL OUTER JOIN store_v1 store ON (web.item_sk = store.item_sk + AND web.d_date = store.d_date) + ) x) y +WHERE web_cumulative > store_cumulative +ORDER BY item_sk, d_date +LIMIT 100 diff --git a/src/test/resources/tpcds/q52.sql b/src/test/resources/tpcds/q52.sql new file mode 100644 index 000000000..467d1ae05 --- /dev/null +++ b/src/test/resources/tpcds/q52.sql @@ -0,0 +1,14 @@ +SELECT + dt.d_year, + item.i_brand_id brand_id, + item.i_brand brand, + sum(ss_ext_sales_price) ext_price +FROM date_dim dt, store_sales, item +WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + AND store_sales.ss_item_sk = item.i_item_sk + AND item.i_manager_id = 1 + AND dt.d_moy = 11 + AND dt.d_year = 2000 +GROUP BY dt.d_year, item.i_brand, item.i_brand_id +ORDER BY dt.d_year, ext_price DESC, brand_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q53.sql b/src/test/resources/tpcds/q53.sql new file mode 100644 index 000000000..b42c68dcf --- /dev/null +++ b/src/test/resources/tpcds/q53.sql @@ -0,0 +1,30 @@ +SELECT * +FROM + (SELECT + i_manufact_id, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER (PARTITION BY i_manufact_id) avg_quarterly_sales + FROM item, store_sales, date_dim, store + WHERE ss_item_sk = i_item_sk AND + ss_sold_date_sk = d_date_sk AND + ss_store_sk = s_store_sk AND + d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, + 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) AND + ((i_category IN ('Books', 'Children', 'Electronics') AND + i_class IN ('personal', 'portable', 'reference', 'self-help') AND + i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', + 'exportiunivamalg #9', 'scholaramalgamalg #9')) + OR + (i_category IN ('Women', 'Music', 'Men') AND + i_class IN ('accessories', 'classical', 'fragrances', 'pants') AND + i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', + 'importoamalg #1'))) + GROUP BY i_manufact_id, d_qoy) tmp1 +WHERE CASE WHEN avg_quarterly_sales > 0 + THEN abs(sum_sales - avg_quarterly_sales) / avg_quarterly_sales + ELSE NULL END > 0.1 +ORDER BY avg_quarterly_sales, + sum_sales, + i_manufact_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q54.sql b/src/test/resources/tpcds/q54.sql new file mode 100644 index 000000000..897237fb6 --- /dev/null +++ b/src/test/resources/tpcds/q54.sql @@ -0,0 +1,61 @@ +WITH my_customers AS ( + SELECT DISTINCT + c_customer_sk, + c_current_addr_sk + FROM + (SELECT + cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + FROM catalog_sales + UNION ALL + SELECT + ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + FROM web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + WHERE sold_date_sk = d_date_sk + AND item_sk = i_item_sk + AND i_category = 'Women' + AND i_class = 'maternity' + AND c_customer_sk = cs_or_ws_sales.customer_sk + AND d_moy = 12 + AND d_year = 1998 +) + , my_revenue AS ( + SELECT + c_customer_sk, + sum(ss_ext_sales_price) AS revenue + FROM my_customers, + store_sales, + customer_address, + store, + date_dim + WHERE c_current_addr_sk = ca_address_sk + AND ca_county = s_county + AND ca_state = s_state + AND ss_sold_date_sk = d_date_sk + AND c_customer_sk = ss_customer_sk + AND d_month_seq BETWEEN (SELECT DISTINCT d_month_seq + 1 + FROM date_dim + WHERE d_year = 1998 AND d_moy = 12) + AND (SELECT DISTINCT d_month_seq + 3 + FROM date_dim + WHERE d_year = 1998 AND d_moy = 12) + GROUP BY c_customer_sk +) + , segments AS +(SELECT cast((revenue / 50) AS INT) AS segment + FROM my_revenue) +SELECT + segment, + count(*) AS num_customers, + segment * 50 AS segment_base +FROM segments +GROUP BY segment +ORDER BY segment, num_customers +LIMIT 100 diff --git a/src/test/resources/tpcds/q55.sql b/src/test/resources/tpcds/q55.sql new file mode 100644 index 000000000..bc5d888c9 --- /dev/null +++ b/src/test/resources/tpcds/q55.sql @@ -0,0 +1,13 @@ +SELECT + i_brand_id brand_id, + i_brand brand, + sum(ss_ext_sales_price) ext_price +FROM date_dim, store_sales, item +WHERE d_date_sk = ss_sold_date_sk + AND ss_item_sk = i_item_sk + AND i_manager_id = 28 + AND d_moy = 11 + AND d_year = 1999 +GROUP BY i_brand, i_brand_id +ORDER BY ext_price DESC, brand_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q56.sql b/src/test/resources/tpcds/q56.sql new file mode 100644 index 000000000..2fa1738dc --- /dev/null +++ b/src/test/resources/tpcds/q56.sql @@ -0,0 +1,65 @@ +WITH ss AS ( + SELECT + i_item_id, + sum(ss_ext_sales_price) total_sales + FROM + store_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_color IN ('slate', 'blanched', 'burnished')) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 2 + AND ss_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + cs AS ( + SELECT + i_item_id, + sum(cs_ext_sales_price) total_sales + FROM + catalog_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_color IN ('slate', 'blanched', 'burnished')) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 2 + AND cs_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + ws AS ( + SELECT + i_item_id, + sum(ws_ext_sales_price) total_sales + FROM + web_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_color IN ('slate', 'blanched', 'burnished')) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 2 + AND ws_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id) +SELECT + i_item_id, + sum(total_sales) total_sales +FROM (SELECT * + FROM ss + UNION ALL + SELECT * + FROM cs + UNION ALL + SELECT * + FROM ws) tmp1 +GROUP BY i_item_id +ORDER BY total_sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q57.sql b/src/test/resources/tpcds/q57.sql new file mode 100644 index 000000000..cf70d4b90 --- /dev/null +++ b/src/test/resources/tpcds/q57.sql @@ -0,0 +1,56 @@ +WITH v1 AS ( + SELECT + i_category, + i_brand, + cc_name, + d_year, + d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) + OVER + (PARTITION BY i_category, i_brand, cc_name, d_year) + avg_monthly_sales, + rank() + OVER + (PARTITION BY i_category, i_brand, cc_name + ORDER BY d_year, d_moy) rn + FROM item, catalog_sales, date_dim, call_center + WHERE cs_item_sk = i_item_sk AND + cs_sold_date_sk = d_date_sk AND + cc_call_center_sk = cs_call_center_sk AND + ( + d_year = 1999 OR + (d_year = 1999 - 1 AND d_moy = 12) OR + (d_year = 1999 + 1 AND d_moy = 1) + ) + GROUP BY i_category, i_brand, + cc_name, d_year, d_moy), + v2 AS ( + SELECT + v1.i_category, + v1.i_brand, + v1.cc_name, + v1.d_year, + v1.d_moy, + v1.avg_monthly_sales, + v1.sum_sales, + v1_lag.sum_sales psum, + v1_lead.sum_sales nsum + FROM v1, v1 v1_lag, v1 v1_lead + WHERE v1.i_category = v1_lag.i_category AND + v1.i_category = v1_lead.i_category AND + v1.i_brand = v1_lag.i_brand AND + v1.i_brand = v1_lead.i_brand AND + v1.cc_name = v1_lag.cc_name AND + v1.cc_name = v1_lead.cc_name AND + v1.rn = v1_lag.rn + 1 AND + v1.rn = v1_lead.rn - 1) +SELECT * +FROM v2 +WHERE d_year = 1999 AND + avg_monthly_sales > 0 AND + CASE WHEN avg_monthly_sales > 0 + THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales + ELSE NULL END > 0.1 +ORDER BY sum_sales - avg_monthly_sales, 3 +LIMIT 100 diff --git a/src/test/resources/tpcds/q58.sql b/src/test/resources/tpcds/q58.sql new file mode 100644 index 000000000..5f63f33dc --- /dev/null +++ b/src/test/resources/tpcds/q58.sql @@ -0,0 +1,59 @@ +WITH ss_items AS +(SELECT + i_item_id item_id, + sum(ss_ext_sales_price) ss_item_rev + FROM store_sales, item, date_dim + WHERE ss_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_date = '2000-01-03')) + AND ss_sold_date_sk = d_date_sk + GROUP BY i_item_id), + cs_items AS + (SELECT + i_item_id item_id, + sum(cs_ext_sales_price) cs_item_rev + FROM catalog_sales, item, date_dim + WHERE cs_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_date = '2000-01-03')) + AND cs_sold_date_sk = d_date_sk + GROUP BY i_item_id), + ws_items AS + (SELECT + i_item_id item_id, + sum(ws_ext_sales_price) ws_item_rev + FROM web_sales, item, date_dim + WHERE ws_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq = (SELECT d_week_seq + FROM date_dim + WHERE d_date = '2000-01-03')) + AND ws_sold_date_sk = d_date_sk + GROUP BY i_item_id) +SELECT + ss_items.item_id, + ss_item_rev, + ss_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ss_dev, + cs_item_rev, + cs_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 cs_dev, + ws_item_rev, + ws_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ws_dev, + (ss_item_rev + cs_item_rev + ws_item_rev) / 3 average +FROM ss_items, cs_items, ws_items +WHERE ss_items.item_id = cs_items.item_id + AND ss_items.item_id = ws_items.item_id + AND ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev + AND ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev + AND cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev + AND cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev + AND ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev + AND ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev +ORDER BY item_id, ss_item_rev +LIMIT 100 diff --git a/src/test/resources/tpcds/q59.sql b/src/test/resources/tpcds/q59.sql new file mode 100644 index 000000000..3cef20276 --- /dev/null +++ b/src/test/resources/tpcds/q59.sql @@ -0,0 +1,75 @@ +WITH wss AS +(SELECT + d_week_seq, + ss_store_sk, + sum(CASE WHEN (d_day_name = 'Sunday') + THEN ss_sales_price + ELSE NULL END) sun_sales, + sum(CASE WHEN (d_day_name = 'Monday') + THEN ss_sales_price + ELSE NULL END) mon_sales, + sum(CASE WHEN (d_day_name = 'Tuesday') + THEN ss_sales_price + ELSE NULL END) tue_sales, + sum(CASE WHEN (d_day_name = 'Wednesday') + THEN ss_sales_price + ELSE NULL END) wed_sales, + sum(CASE WHEN (d_day_name = 'Thursday') + THEN ss_sales_price + ELSE NULL END) thu_sales, + sum(CASE WHEN (d_day_name = 'Friday') + THEN ss_sales_price + ELSE NULL END) fri_sales, + sum(CASE WHEN (d_day_name = 'Saturday') + THEN ss_sales_price + ELSE NULL END) sat_sales + FROM store_sales, date_dim + WHERE d_date_sk = ss_sold_date_sk + GROUP BY d_week_seq, ss_store_sk +) +SELECT + s_store_name1, + s_store_id1, + d_week_seq1, + sun_sales1 / sun_sales2, + mon_sales1 / mon_sales2, + tue_sales1 / tue_sales2, + wed_sales1 / wed_sales2, + thu_sales1 / thu_sales2, + fri_sales1 / fri_sales2, + sat_sales1 / sat_sales2 +FROM + (SELECT + s_store_name s_store_name1, + wss.d_week_seq d_week_seq1, + s_store_id s_store_id1, + sun_sales sun_sales1, + mon_sales mon_sales1, + tue_sales tue_sales1, + wed_sales wed_sales1, + thu_sales thu_sales1, + fri_sales fri_sales1, + sat_sales sat_sales1 + FROM wss, store, date_dim d + WHERE d.d_week_seq = wss.d_week_seq AND + ss_store_sk = s_store_sk AND + d_month_seq BETWEEN 1212 AND 1212 + 11) y, + (SELECT + s_store_name s_store_name2, + wss.d_week_seq d_week_seq2, + s_store_id s_store_id2, + sun_sales sun_sales2, + mon_sales mon_sales2, + tue_sales tue_sales2, + wed_sales wed_sales2, + thu_sales thu_sales2, + fri_sales fri_sales2, + sat_sales sat_sales2 + FROM wss, store, date_dim d + WHERE d.d_week_seq = wss.d_week_seq AND + ss_store_sk = s_store_sk AND + d_month_seq BETWEEN 1212 + 12 AND 1212 + 23) x +WHERE s_store_id1 = s_store_id2 + AND d_week_seq1 = d_week_seq2 - 52 +ORDER BY s_store_name1, s_store_id1, d_week_seq1 +LIMIT 100 diff --git a/src/test/resources/tpcds/q6.sql b/src/test/resources/tpcds/q6.sql new file mode 100644 index 000000000..f0f5cf05a --- /dev/null +++ b/src/test/resources/tpcds/q6.sql @@ -0,0 +1,21 @@ +SELECT + a.ca_state state, + count(*) cnt +FROM + customer_address a, customer c, store_sales s, date_dim d, item i +WHERE a.ca_address_sk = c.c_current_addr_sk + AND c.c_customer_sk = s.ss_customer_sk + AND s.ss_sold_date_sk = d.d_date_sk + AND s.ss_item_sk = i.i_item_sk + AND d.d_month_seq = + (SELECT DISTINCT (d_month_seq) + FROM date_dim + WHERE d_year = 2000 AND d_moy = 1) + AND i.i_current_price > 1.2 * + (SELECT avg(j.i_current_price) + FROM item j + WHERE j.i_category = i.i_category) +GROUP BY a.ca_state +HAVING count(*) >= 10 +ORDER BY cnt +LIMIT 100 diff --git a/src/test/resources/tpcds/q60.sql b/src/test/resources/tpcds/q60.sql new file mode 100644 index 000000000..41b963f44 --- /dev/null +++ b/src/test/resources/tpcds/q60.sql @@ -0,0 +1,62 @@ +WITH ss AS ( + SELECT + i_item_id, + sum(ss_ext_sales_price) total_sales + FROM store_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_category IN ('Music')) + AND ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 9 + AND ss_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + cs AS ( + SELECT + i_item_id, + sum(cs_ext_sales_price) total_sales + FROM catalog_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_category IN ('Music')) + AND cs_item_sk = i_item_sk + AND cs_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 9 + AND cs_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id), + ws AS ( + SELECT + i_item_id, + sum(ws_ext_sales_price) total_sales + FROM web_sales, date_dim, customer_address, item + WHERE + i_item_id IN (SELECT i_item_id + FROM item + WHERE i_category IN ('Music')) + AND ws_item_sk = i_item_sk + AND ws_sold_date_sk = d_date_sk + AND d_year = 1998 + AND d_moy = 9 + AND ws_bill_addr_sk = ca_address_sk + AND ca_gmt_offset = -5 + GROUP BY i_item_id) +SELECT + i_item_id, + sum(total_sales) total_sales +FROM (SELECT * + FROM ss + UNION ALL + SELECT * + FROM cs + UNION ALL + SELECT * + FROM ws) tmp1 +GROUP BY i_item_id +ORDER BY i_item_id, total_sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q61.sql b/src/test/resources/tpcds/q61.sql new file mode 100644 index 000000000..b0a872b4b --- /dev/null +++ b/src/test/resources/tpcds/q61.sql @@ -0,0 +1,33 @@ +SELECT + promotions, + total, + cast(promotions AS DECIMAL(15, 4)) / cast(total AS DECIMAL(15, 4)) * 100 +FROM + (SELECT sum(ss_ext_sales_price) promotions + FROM store_sales, store, promotion, date_dim, customer, customer_address, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_store_sk = s_store_sk + AND ss_promo_sk = p_promo_sk + AND ss_customer_sk = c_customer_sk + AND ca_address_sk = c_current_addr_sk + AND ss_item_sk = i_item_sk + AND ca_gmt_offset = -5 + AND i_category = 'Jewelry' + AND (p_channel_dmail = 'Y' OR p_channel_email = 'Y' OR p_channel_tv = 'Y') + AND s_gmt_offset = -5 + AND d_year = 1998 + AND d_moy = 11) promotional_sales, + (SELECT sum(ss_ext_sales_price) total + FROM store_sales, store, date_dim, customer, customer_address, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_store_sk = s_store_sk + AND ss_customer_sk = c_customer_sk + AND ca_address_sk = c_current_addr_sk + AND ss_item_sk = i_item_sk + AND ca_gmt_offset = -5 + AND i_category = 'Jewelry' + AND s_gmt_offset = -5 + AND d_year = 1998 + AND d_moy = 11) all_sales +ORDER BY promotions, total +LIMIT 100 diff --git a/src/test/resources/tpcds/q62.sql b/src/test/resources/tpcds/q62.sql new file mode 100644 index 000000000..8a414f154 --- /dev/null +++ b/src/test/resources/tpcds/q62.sql @@ -0,0 +1,35 @@ +SELECT + substr(w_warehouse_name, 1, 20), + sm_type, + web_name, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk <= 30) + THEN 1 + ELSE 0 END) AS `30 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 30) AND + (ws_ship_date_sk - ws_sold_date_sk <= 60) + THEN 1 + ELSE 0 END) AS `31 - 60 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 60) AND + (ws_ship_date_sk - ws_sold_date_sk <= 90) + THEN 1 + ELSE 0 END) AS `61 - 90 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 90) AND + (ws_ship_date_sk - ws_sold_date_sk <= 120) + THEN 1 + ELSE 0 END) AS `91 - 120 days `, + sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 120) + THEN 1 + ELSE 0 END) AS `>120 days ` +FROM + web_sales, warehouse, ship_mode, web_site, date_dim +WHERE + d_month_seq BETWEEN 1200 AND 1200 + 11 + AND ws_ship_date_sk = d_date_sk + AND ws_warehouse_sk = w_warehouse_sk + AND ws_ship_mode_sk = sm_ship_mode_sk + AND ws_web_site_sk = web_site_sk +GROUP BY + substr(w_warehouse_name, 1, 20), sm_type, web_name +ORDER BY + substr(w_warehouse_name, 1, 20), sm_type, web_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q63.sql b/src/test/resources/tpcds/q63.sql new file mode 100644 index 000000000..ef6867e0a --- /dev/null +++ b/src/test/resources/tpcds/q63.sql @@ -0,0 +1,31 @@ +SELECT * +FROM (SELECT + i_manager_id, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER (PARTITION BY i_manager_id) avg_monthly_sales +FROM item + , store_sales + , date_dim + , store +WHERE ss_item_sk = i_item_sk + AND ss_sold_date_sk = d_date_sk + AND ss_store_sk = s_store_sk + AND d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, + 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) + AND ((i_category IN ('Books', 'Children', 'Electronics') + AND i_class IN ('personal', 'portable', 'refernece', 'self-help') + AND i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', + 'exportiunivamalg #9', 'scholaramalgamalg #9')) + OR (i_category IN ('Women', 'Music', 'Men') + AND i_class IN ('accessories', 'classical', 'fragrances', 'pants') + AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', + 'importoamalg #1'))) +GROUP BY i_manager_id, d_moy) tmp1 +WHERE CASE WHEN avg_monthly_sales > 0 + THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales + ELSE NULL END > 0.1 +ORDER BY i_manager_id + , avg_monthly_sales + , sum_sales +LIMIT 100 diff --git a/src/test/resources/tpcds/q64.sql b/src/test/resources/tpcds/q64.sql new file mode 100644 index 000000000..8ec1d31b6 --- /dev/null +++ b/src/test/resources/tpcds/q64.sql @@ -0,0 +1,92 @@ +WITH cs_ui AS +(SELECT + cs_item_sk, + sum(cs_ext_list_price) AS sale, + sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit) AS refund + FROM catalog_sales + , catalog_returns + WHERE cs_item_sk = cr_item_sk + AND cs_order_number = cr_order_number + GROUP BY cs_item_sk + HAVING sum(cs_ext_list_price) > 2 * sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit)), + cross_sales AS + (SELECT + i_product_name product_name, + i_item_sk item_sk, + s_store_name store_name, + s_zip store_zip, + ad1.ca_street_number b_street_number, + ad1.ca_street_name b_streen_name, + ad1.ca_city b_city, + ad1.ca_zip b_zip, + ad2.ca_street_number c_street_number, + ad2.ca_street_name c_street_name, + ad2.ca_city c_city, + ad2.ca_zip c_zip, + d1.d_year AS syear, + d2.d_year AS fsyear, + d3.d_year s2year, + count(*) cnt, + sum(ss_wholesale_cost) s1, + sum(ss_list_price) s2, + sum(ss_coupon_amt) s3 + FROM store_sales, store_returns, cs_ui, date_dim d1, date_dim d2, date_dim d3, + store, customer, customer_demographics cd1, customer_demographics cd2, + promotion, household_demographics hd1, household_demographics hd2, + customer_address ad1, customer_address ad2, income_band ib1, income_band ib2, item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk = cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk AND + ss_item_sk = i_item_sk AND + ss_item_sk = sr_item_sk AND + ss_ticket_number = sr_ticket_number AND + ss_item_sk = cs_ui.cs_item_sk AND + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk AND + c_first_sales_date_sk = d2.d_date_sk AND + c_first_shipto_date_sk = d3.d_date_sk AND + ss_promo_sk = p_promo_sk AND + hd1.hd_income_band_sk = ib1.ib_income_band_sk AND + hd2.hd_income_band_sk = ib2.ib_income_band_sk AND + cd1.cd_marital_status <> cd2.cd_marital_status AND + i_color IN ('purple', 'burlywood', 'indian', 'spring', 'floral', 'medium') AND + i_current_price BETWEEN 64 AND 64 + 10 AND + i_current_price BETWEEN 64 + 1 AND 64 + 15 + GROUP BY i_product_name, i_item_sk, s_store_name, s_zip, ad1.ca_street_number, + ad1.ca_street_name, ad1.ca_city, ad1.ca_zip, ad2.ca_street_number, + ad2.ca_street_name, ad2.ca_city, ad2.ca_zip, d1.d_year, d2.d_year, d3.d_year + ) +SELECT + cs1.product_name, + cs1.store_name, + cs1.store_zip, + cs1.b_street_number, + cs1.b_streen_name, + cs1.b_city, + cs1.b_zip, + cs1.c_street_number, + cs1.c_street_name, + cs1.c_city, + cs1.c_zip, + cs1.syear, + cs1.cnt, + cs1.s1, + cs1.s2, + cs1.s3, + cs2.s1, + cs2.s2, + cs2.s3, + cs2.syear, + cs2.cnt +FROM cross_sales cs1, cross_sales cs2 +WHERE cs1.item_sk = cs2.item_sk AND + cs1.syear = 1999 AND + cs2.syear = 1999 + 1 AND + cs2.cnt <= cs1.cnt AND + cs1.store_name = cs2.store_name AND + cs1.store_zip = cs2.store_zip +ORDER BY cs1.product_name, cs1.store_name, cs2.cnt diff --git a/src/test/resources/tpcds/q65.sql b/src/test/resources/tpcds/q65.sql new file mode 100644 index 000000000..aad04be1b --- /dev/null +++ b/src/test/resources/tpcds/q65.sql @@ -0,0 +1,33 @@ +SELECT + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand +FROM store, item, + (SELECT + ss_store_sk, + avg(revenue) AS ave + FROM + (SELECT + ss_store_sk, + ss_item_sk, + sum(ss_sales_price) AS revenue + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 + GROUP BY ss_store_sk, ss_item_sk) sa + GROUP BY ss_store_sk) sb, + (SELECT + ss_store_sk, + ss_item_sk, + sum(ss_sales_price) AS revenue + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 + GROUP BY ss_store_sk, ss_item_sk) sc +WHERE sb.ss_store_sk = sc.ss_store_sk AND + sc.revenue <= 0.1 * sb.ave AND + s_store_sk = sc.ss_store_sk AND + i_item_sk = sc.ss_item_sk +ORDER BY s_store_name, i_item_desc +LIMIT 100 diff --git a/src/test/resources/tpcds/q66.sql b/src/test/resources/tpcds/q66.sql new file mode 100644 index 000000000..f826b4164 --- /dev/null +++ b/src/test/resources/tpcds/q66.sql @@ -0,0 +1,240 @@ +SELECT + w_warehouse_name, + w_warehouse_sq_ft, + w_city, + w_county, + w_state, + w_country, + ship_carriers, + year, + sum(jan_sales) AS jan_sales, + sum(feb_sales) AS feb_sales, + sum(mar_sales) AS mar_sales, + sum(apr_sales) AS apr_sales, + sum(may_sales) AS may_sales, + sum(jun_sales) AS jun_sales, + sum(jul_sales) AS jul_sales, + sum(aug_sales) AS aug_sales, + sum(sep_sales) AS sep_sales, + sum(oct_sales) AS oct_sales, + sum(nov_sales) AS nov_sales, + sum(dec_sales) AS dec_sales, + sum(jan_sales / w_warehouse_sq_ft) AS jan_sales_per_sq_foot, + sum(feb_sales / w_warehouse_sq_ft) AS feb_sales_per_sq_foot, + sum(mar_sales / w_warehouse_sq_ft) AS mar_sales_per_sq_foot, + sum(apr_sales / w_warehouse_sq_ft) AS apr_sales_per_sq_foot, + sum(may_sales / w_warehouse_sq_ft) AS may_sales_per_sq_foot, + sum(jun_sales / w_warehouse_sq_ft) AS jun_sales_per_sq_foot, + sum(jul_sales / w_warehouse_sq_ft) AS jul_sales_per_sq_foot, + sum(aug_sales / w_warehouse_sq_ft) AS aug_sales_per_sq_foot, + sum(sep_sales / w_warehouse_sq_ft) AS sep_sales_per_sq_foot, + sum(oct_sales / w_warehouse_sq_ft) AS oct_sales_per_sq_foot, + sum(nov_sales / w_warehouse_sq_ft) AS nov_sales_per_sq_foot, + sum(dec_sales / w_warehouse_sq_ft) AS dec_sales_per_sq_foot, + sum(jan_net) AS jan_net, + sum(feb_net) AS feb_net, + sum(mar_net) AS mar_net, + sum(apr_net) AS apr_net, + sum(may_net) AS may_net, + sum(jun_net) AS jun_net, + sum(jul_net) AS jul_net, + sum(aug_net) AS aug_net, + sum(sep_net) AS sep_net, + sum(oct_net) AS oct_net, + sum(nov_net) AS nov_net, + sum(dec_net) AS dec_net +FROM ( + (SELECT + w_warehouse_name, + w_warehouse_sq_ft, + w_city, + w_county, + w_state, + w_country, + concat('DHL', ',', 'BARIAN') AS ship_carriers, + d_year AS year, + sum(CASE WHEN d_moy = 1 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS jan_sales, + sum(CASE WHEN d_moy = 2 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS feb_sales, + sum(CASE WHEN d_moy = 3 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS mar_sales, + sum(CASE WHEN d_moy = 4 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS apr_sales, + sum(CASE WHEN d_moy = 5 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS may_sales, + sum(CASE WHEN d_moy = 6 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS jun_sales, + sum(CASE WHEN d_moy = 7 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS jul_sales, + sum(CASE WHEN d_moy = 8 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS aug_sales, + sum(CASE WHEN d_moy = 9 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS sep_sales, + sum(CASE WHEN d_moy = 10 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS oct_sales, + sum(CASE WHEN d_moy = 11 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS nov_sales, + sum(CASE WHEN d_moy = 12 + THEN ws_ext_sales_price * ws_quantity + ELSE 0 END) AS dec_sales, + sum(CASE WHEN d_moy = 1 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS jan_net, + sum(CASE WHEN d_moy = 2 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS feb_net, + sum(CASE WHEN d_moy = 3 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS mar_net, + sum(CASE WHEN d_moy = 4 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS apr_net, + sum(CASE WHEN d_moy = 5 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS may_net, + sum(CASE WHEN d_moy = 6 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS jun_net, + sum(CASE WHEN d_moy = 7 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS jul_net, + sum(CASE WHEN d_moy = 8 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS aug_net, + sum(CASE WHEN d_moy = 9 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS sep_net, + sum(CASE WHEN d_moy = 10 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS oct_net, + sum(CASE WHEN d_moy = 11 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS nov_net, + sum(CASE WHEN d_moy = 12 + THEN ws_net_paid * ws_quantity + ELSE 0 END) AS dec_net + FROM + web_sales, warehouse, date_dim, time_dim, ship_mode + WHERE + ws_warehouse_sk = w_warehouse_sk + AND ws_sold_date_sk = d_date_sk + AND ws_sold_time_sk = t_time_sk + AND ws_ship_mode_sk = sm_ship_mode_sk + AND d_year = 2001 + AND t_time BETWEEN 30838 AND 30838 + 28800 + AND sm_carrier IN ('DHL', 'BARIAN') + GROUP BY + w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year) + UNION ALL + (SELECT + w_warehouse_name, + w_warehouse_sq_ft, + w_city, + w_county, + w_state, + w_country, + concat('DHL', ',', 'BARIAN') AS ship_carriers, + d_year AS year, + sum(CASE WHEN d_moy = 1 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS jan_sales, + sum(CASE WHEN d_moy = 2 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS feb_sales, + sum(CASE WHEN d_moy = 3 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS mar_sales, + sum(CASE WHEN d_moy = 4 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS apr_sales, + sum(CASE WHEN d_moy = 5 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS may_sales, + sum(CASE WHEN d_moy = 6 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS jun_sales, + sum(CASE WHEN d_moy = 7 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS jul_sales, + sum(CASE WHEN d_moy = 8 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS aug_sales, + sum(CASE WHEN d_moy = 9 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS sep_sales, + sum(CASE WHEN d_moy = 10 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS oct_sales, + sum(CASE WHEN d_moy = 11 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS nov_sales, + sum(CASE WHEN d_moy = 12 + THEN cs_sales_price * cs_quantity + ELSE 0 END) AS dec_sales, + sum(CASE WHEN d_moy = 1 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS jan_net, + sum(CASE WHEN d_moy = 2 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS feb_net, + sum(CASE WHEN d_moy = 3 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS mar_net, + sum(CASE WHEN d_moy = 4 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS apr_net, + sum(CASE WHEN d_moy = 5 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS may_net, + sum(CASE WHEN d_moy = 6 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS jun_net, + sum(CASE WHEN d_moy = 7 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS jul_net, + sum(CASE WHEN d_moy = 8 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS aug_net, + sum(CASE WHEN d_moy = 9 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS sep_net, + sum(CASE WHEN d_moy = 10 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS oct_net, + sum(CASE WHEN d_moy = 11 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS nov_net, + sum(CASE WHEN d_moy = 12 + THEN cs_net_paid_inc_tax * cs_quantity + ELSE 0 END) AS dec_net + FROM + catalog_sales, warehouse, date_dim, time_dim, ship_mode + WHERE + cs_warehouse_sk = w_warehouse_sk + AND cs_sold_date_sk = d_date_sk + AND cs_sold_time_sk = t_time_sk + AND cs_ship_mode_sk = sm_ship_mode_sk + AND d_year = 2001 + AND t_time BETWEEN 30838 AND 30838 + 28800 + AND sm_carrier IN ('DHL', 'BARIAN') + GROUP BY + w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year + ) + ) x +GROUP BY + w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, + ship_carriers, year +ORDER BY w_warehouse_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q67.sql b/src/test/resources/tpcds/q67.sql new file mode 100644 index 000000000..f66e2252b --- /dev/null +++ b/src/test/resources/tpcds/q67.sql @@ -0,0 +1,38 @@ +SELECT * +FROM + (SELECT + i_category, + i_class, + i_brand, + i_product_name, + d_year, + d_qoy, + d_moy, + s_store_id, + sumsales, + rank() + OVER (PARTITION BY i_category + ORDER BY sumsales DESC) rk + FROM + (SELECT + i_category, + i_class, + i_brand, + i_product_name, + d_year, + d_qoy, + d_moy, + s_store_id, + sum(coalesce(ss_sales_price * ss_quantity, 0)) sumsales + FROM store_sales, date_dim, store, item + WHERE ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + AND ss_store_sk = s_store_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + GROUP BY ROLLUP (i_category, i_class, i_brand, i_product_name, d_year, d_qoy, + d_moy, s_store_id)) dw1) dw2 +WHERE rk <= 100 +ORDER BY + i_category, i_class, i_brand, i_product_name, d_year, + d_qoy, d_moy, s_store_id, sumsales, rk +LIMIT 100 diff --git a/src/test/resources/tpcds/q68.sql b/src/test/resources/tpcds/q68.sql new file mode 100644 index 000000000..adb8a7189 --- /dev/null +++ b/src/test/resources/tpcds/q68.sql @@ -0,0 +1,34 @@ +SELECT + c_last_name, + c_first_name, + ca_city, + bought_city, + ss_ticket_number, + extended_price, + extended_tax, + list_price +FROM (SELECT + ss_ticket_number, + ss_customer_sk, + ca_city bought_city, + sum(ss_ext_sales_price) extended_price, + sum(ss_ext_list_price) list_price, + sum(ss_ext_tax) extended_tax +FROM store_sales, date_dim, store, household_demographics, customer_address +WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND store_sales.ss_addr_sk = customer_address.ca_address_sk + AND date_dim.d_dom BETWEEN 1 AND 2 + AND (household_demographics.hd_dep_count = 4 OR + household_demographics.hd_vehicle_count = 3) + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_city IN ('Midway', 'Fairview') +GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, + customer, + customer_address current_addr +WHERE ss_customer_sk = c_customer_sk + AND customer.c_current_addr_sk = current_addr.ca_address_sk + AND current_addr.ca_city <> bought_city +ORDER BY c_last_name, ss_ticket_number +LIMIT 100 diff --git a/src/test/resources/tpcds/q69.sql b/src/test/resources/tpcds/q69.sql new file mode 100644 index 000000000..1f0ee64f5 --- /dev/null +++ b/src/test/resources/tpcds/q69.sql @@ -0,0 +1,38 @@ +SELECT + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 +FROM + customer c, customer_address ca, customer_demographics +WHERE + c.c_current_addr_sk = ca.ca_address_sk AND + ca_state IN ('KY', 'GA', 'NM') AND + cd_demo_sk = c.c_current_cdemo_sk AND + exists(SELECT * + FROM store_sales, date_dim + WHERE c.c_customer_sk = ss_customer_sk AND + ss_sold_date_sk = d_date_sk AND + d_year = 2001 AND + d_moy BETWEEN 4 AND 4 + 2) AND + (NOT exists(SELECT * + FROM web_sales, date_dim + WHERE c.c_customer_sk = ws_bill_customer_sk AND + ws_sold_date_sk = d_date_sk AND + d_year = 2001 AND + d_moy BETWEEN 4 AND 4 + 2) AND + NOT exists(SELECT * + FROM catalog_sales, date_dim + WHERE c.c_customer_sk = cs_ship_customer_sk AND + cs_sold_date_sk = d_date_sk AND + d_year = 2001 AND + d_moy BETWEEN 4 AND 4 + 2)) +GROUP BY cd_gender, cd_marital_status, cd_education_status, + cd_purchase_estimate, cd_credit_rating +ORDER BY cd_gender, cd_marital_status, cd_education_status, + cd_purchase_estimate, cd_credit_rating +LIMIT 100 diff --git a/src/test/resources/tpcds/q7.sql b/src/test/resources/tpcds/q7.sql new file mode 100644 index 000000000..6630a0054 --- /dev/null +++ b/src/test/resources/tpcds/q7.sql @@ -0,0 +1,19 @@ +SELECT + i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 +FROM store_sales, customer_demographics, date_dim, item, promotion +WHERE ss_sold_date_sk = d_date_sk AND + ss_item_sk = i_item_sk AND + ss_cdemo_sk = cd_demo_sk AND + ss_promo_sk = p_promo_sk AND + cd_gender = 'M' AND + cd_marital_status = 'S' AND + cd_education_status = 'College' AND + (p_channel_email = 'N' OR p_channel_event = 'N') AND + d_year = 2000 +GROUP BY i_item_id +ORDER BY i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q70.sql b/src/test/resources/tpcds/q70.sql new file mode 100644 index 000000000..625011b21 --- /dev/null +++ b/src/test/resources/tpcds/q70.sql @@ -0,0 +1,38 @@ +SELECT + sum(ss_net_profit) AS total_sum, + s_state, + s_county, + grouping(s_state) + grouping(s_county) AS lochierarchy, + rank() + OVER ( + PARTITION BY grouping(s_state) + grouping(s_county), + CASE WHEN grouping(s_county) = 0 + THEN s_state END + ORDER BY sum(ss_net_profit) DESC) AS rank_within_parent +FROM + store_sales, date_dim d1, store +WHERE + d1.d_month_seq BETWEEN 1200 AND 1200 + 11 + AND d1.d_date_sk = ss_sold_date_sk + AND s_store_sk = ss_store_sk + AND s_state IN + (SELECT s_state + FROM + (SELECT + s_state AS s_state, + rank() + OVER (PARTITION BY s_state + ORDER BY sum(ss_net_profit) DESC) AS ranking + FROM store_sales, store, date_dim + WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 + AND d_date_sk = ss_sold_date_sk + AND s_store_sk = ss_store_sk + GROUP BY s_state) tmp1 + WHERE ranking <= 5) +GROUP BY ROLLUP (s_state, s_county) +ORDER BY + lochierarchy DESC + , CASE WHEN lochierarchy = 0 + THEN s_state END + , rank_within_parent +LIMIT 100 diff --git a/src/test/resources/tpcds/q71.sql b/src/test/resources/tpcds/q71.sql new file mode 100644 index 000000000..8d724b924 --- /dev/null +++ b/src/test/resources/tpcds/q71.sql @@ -0,0 +1,44 @@ +SELECT + i_brand_id brand_id, + i_brand brand, + t_hour, + t_minute, + sum(ext_price) ext_price +FROM item, + (SELECT + ws_ext_sales_price AS ext_price, + ws_sold_date_sk AS sold_date_sk, + ws_item_sk AS sold_item_sk, + ws_sold_time_sk AS time_sk + FROM web_sales, date_dim + WHERE d_date_sk = ws_sold_date_sk + AND d_moy = 11 + AND d_year = 1999 + UNION ALL + SELECT + cs_ext_sales_price AS ext_price, + cs_sold_date_sk AS sold_date_sk, + cs_item_sk AS sold_item_sk, + cs_sold_time_sk AS time_sk + FROM catalog_sales, date_dim + WHERE d_date_sk = cs_sold_date_sk + AND d_moy = 11 + AND d_year = 1999 + UNION ALL + SELECT + ss_ext_sales_price AS ext_price, + ss_sold_date_sk AS sold_date_sk, + ss_item_sk AS sold_item_sk, + ss_sold_time_sk AS time_sk + FROM store_sales, date_dim + WHERE d_date_sk = ss_sold_date_sk + AND d_moy = 11 + AND d_year = 1999 + ) AS tmp, time_dim +WHERE + sold_item_sk = i_item_sk + AND i_manager_id = 1 + AND time_sk = t_time_sk + AND (t_meal_time = 'breakfast' OR t_meal_time = 'dinner') +GROUP BY i_brand, i_brand_id, t_hour, t_minute +ORDER BY ext_price DESC, brand_id diff --git a/src/test/resources/tpcds/q72.sql b/src/test/resources/tpcds/q72.sql new file mode 100644 index 000000000..99b3eee54 --- /dev/null +++ b/src/test/resources/tpcds/q72.sql @@ -0,0 +1,33 @@ +SELECT + i_item_desc, + w_warehouse_name, + d1.d_week_seq, + count(CASE WHEN p_promo_sk IS NULL + THEN 1 + ELSE 0 END) no_promo, + count(CASE WHEN p_promo_sk IS NOT NULL + THEN 1 + ELSE 0 END) promo, + count(*) total_cnt +FROM catalog_sales + JOIN inventory ON (cs_item_sk = inv_item_sk) + JOIN warehouse ON (w_warehouse_sk = inv_warehouse_sk) + JOIN item ON (i_item_sk = cs_item_sk) + JOIN customer_demographics ON (cs_bill_cdemo_sk = cd_demo_sk) + JOIN household_demographics ON (cs_bill_hdemo_sk = hd_demo_sk) + JOIN date_dim d1 ON (cs_sold_date_sk = d1.d_date_sk) + JOIN date_dim d2 ON (inv_date_sk = d2.d_date_sk) + JOIN date_dim d3 ON (cs_ship_date_sk = d3.d_date_sk) + LEFT OUTER JOIN promotion ON (cs_promo_sk = p_promo_sk) + LEFT OUTER JOIN catalog_returns ON (cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number) +WHERE d1.d_week_seq = d2.d_week_seq + AND inv_quantity_on_hand < cs_quantity + AND d3.d_date > (cast(d1.d_date AS DATE) + interval 5 days) + AND hd_buy_potential = '>10000' + AND d1.d_year = 1999 + AND hd_buy_potential = '>10000' + AND cd_marital_status = 'D' + AND d1.d_year = 1999 +GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq +ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq +LIMIT 100 diff --git a/src/test/resources/tpcds/q73.sql b/src/test/resources/tpcds/q73.sql new file mode 100644 index 000000000..881be2e90 --- /dev/null +++ b/src/test/resources/tpcds/q73.sql @@ -0,0 +1,30 @@ +SELECT + c_last_name, + c_first_name, + c_salutation, + c_preferred_cust_flag, + ss_ticket_number, + cnt +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + count(*) cnt + FROM store_sales, date_dim, store, household_demographics + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND date_dim.d_dom BETWEEN 1 AND 2 + AND (household_demographics.hd_buy_potential = '>10000' OR + household_demographics.hd_buy_potential = 'unknown') + AND household_demographics.hd_vehicle_count > 0 + AND CASE WHEN household_demographics.hd_vehicle_count > 0 + THEN + household_demographics.hd_dep_count / household_demographics.hd_vehicle_count + ELSE NULL END > 1 + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_county IN ('Williamson County', 'Franklin Parish', 'Bronx County', 'Orange County') + GROUP BY ss_ticket_number, ss_customer_sk) dj, customer +WHERE ss_customer_sk = c_customer_sk + AND cnt BETWEEN 1 AND 5 +ORDER BY cnt DESC diff --git a/src/test/resources/tpcds/q74.sql b/src/test/resources/tpcds/q74.sql new file mode 100644 index 000000000..154b26d68 --- /dev/null +++ b/src/test/resources/tpcds/q74.sql @@ -0,0 +1,58 @@ +WITH year_total AS ( + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + d_year AS year, + sum(ss_net_paid) year_total, + 's' sale_type + FROM + customer, store_sales, date_dim + WHERE c_customer_sk = ss_customer_sk + AND ss_sold_date_sk = d_date_sk + AND d_year IN (2001, 2001 + 1) + GROUP BY + c_customer_id, c_first_name, c_last_name, d_year + UNION ALL + SELECT + c_customer_id customer_id, + c_first_name customer_first_name, + c_last_name customer_last_name, + d_year AS year, + sum(ws_net_paid) year_total, + 'w' sale_type + FROM + customer, web_sales, date_dim + WHERE c_customer_sk = ws_bill_customer_sk + AND ws_sold_date_sk = d_date_sk + AND d_year IN (2001, 2001 + 1) + GROUP BY + c_customer_id, c_first_name, c_last_name, d_year) +SELECT + t_s_secyear.customer_id, + t_s_secyear.customer_first_name, + t_s_secyear.customer_last_name +FROM + year_total t_s_firstyear, year_total t_s_secyear, + year_total t_w_firstyear, year_total t_w_secyear +WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id + AND t_s_firstyear.customer_id = t_w_secyear.customer_id + AND t_s_firstyear.customer_id = t_w_firstyear.customer_id + AND t_s_firstyear.sale_type = 's' + AND t_w_firstyear.sale_type = 'w' + AND t_s_secyear.sale_type = 's' + AND t_w_secyear.sale_type = 'w' + AND t_s_firstyear.year = 2001 + AND t_s_secyear.year = 2001 + 1 + AND t_w_firstyear.year = 2001 + AND t_w_secyear.year = 2001 + 1 + AND t_s_firstyear.year_total > 0 + AND t_w_firstyear.year_total > 0 + AND CASE WHEN t_w_firstyear.year_total > 0 + THEN t_w_secyear.year_total / t_w_firstyear.year_total + ELSE NULL END + > CASE WHEN t_s_firstyear.year_total > 0 + THEN t_s_secyear.year_total / t_s_firstyear.year_total + ELSE NULL END +ORDER BY 1, 1, 1 +LIMIT 100 diff --git a/src/test/resources/tpcds/q75.sql b/src/test/resources/tpcds/q75.sql new file mode 100644 index 000000000..2a143232b --- /dev/null +++ b/src/test/resources/tpcds/q75.sql @@ -0,0 +1,76 @@ +WITH all_sales AS ( + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + SUM(sales_cnt) AS sales_cnt, + SUM(sales_amt) AS sales_amt + FROM ( + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + cs_quantity - COALESCE(cr_return_quantity, 0) AS sales_cnt, + cs_ext_sales_price - COALESCE(cr_return_amount, 0.0) AS sales_amt + FROM catalog_sales + JOIN item ON i_item_sk = cs_item_sk + JOIN date_dim ON d_date_sk = cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number = cr_order_number + AND cs_item_sk = cr_item_sk) + WHERE i_category = 'Books' + UNION + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + ss_quantity - COALESCE(sr_return_quantity, 0) AS sales_cnt, + ss_ext_sales_price - COALESCE(sr_return_amt, 0.0) AS sales_amt + FROM store_sales + JOIN item ON i_item_sk = ss_item_sk + JOIN date_dim ON d_date_sk = ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number = sr_ticket_number + AND ss_item_sk = sr_item_sk) + WHERE i_category = 'Books' + UNION + SELECT + d_year, + i_brand_id, + i_class_id, + i_category_id, + i_manufact_id, + ws_quantity - COALESCE(wr_return_quantity, 0) AS sales_cnt, + ws_ext_sales_price - COALESCE(wr_return_amt, 0.0) AS sales_amt + FROM web_sales + JOIN item ON i_item_sk = ws_item_sk + JOIN date_dim ON d_date_sk = ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number = wr_order_number + AND ws_item_sk = wr_item_sk) + WHERE i_category = 'Books') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) +SELECT + prev_yr.d_year AS prev_year, + curr_yr.d_year AS year, + curr_yr.i_brand_id, + curr_yr.i_class_id, + curr_yr.i_category_id, + curr_yr.i_manufact_id, + prev_yr.sales_cnt AS prev_yr_cnt, + curr_yr.sales_cnt AS curr_yr_cnt, + curr_yr.sales_cnt - prev_yr.sales_cnt AS sales_cnt_diff, + curr_yr.sales_amt - prev_yr.sales_amt AS sales_amt_diff +FROM all_sales curr_yr, all_sales prev_yr +WHERE curr_yr.i_brand_id = prev_yr.i_brand_id + AND curr_yr.i_class_id = prev_yr.i_class_id + AND curr_yr.i_category_id = prev_yr.i_category_id + AND curr_yr.i_manufact_id = prev_yr.i_manufact_id + AND curr_yr.d_year = 2002 + AND prev_yr.d_year = 2002 - 1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17, 2)) / CAST(prev_yr.sales_cnt AS DECIMAL(17, 2)) < 0.9 +ORDER BY sales_cnt_diff +LIMIT 100 diff --git a/src/test/resources/tpcds/q76.sql b/src/test/resources/tpcds/q76.sql new file mode 100644 index 000000000..815fa922b --- /dev/null +++ b/src/test/resources/tpcds/q76.sql @@ -0,0 +1,47 @@ +SELECT + channel, + col_name, + d_year, + d_qoy, + i_category, + COUNT(*) sales_cnt, + SUM(ext_sales_price) sales_amt +FROM ( + SELECT + 'store' AS channel, + ss_store_sk col_name, + d_year, + d_qoy, + i_category, + ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_store_sk IS NULL + AND ss_sold_date_sk = d_date_sk + AND ss_item_sk = i_item_sk + UNION ALL + SELECT + 'web' AS channel, + ws_ship_customer_sk col_name, + d_year, + d_qoy, + i_category, + ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_ship_customer_sk IS NULL + AND ws_sold_date_sk = d_date_sk + AND ws_item_sk = i_item_sk + UNION ALL + SELECT + 'catalog' AS channel, + cs_ship_addr_sk col_name, + d_year, + d_qoy, + i_category, + cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_ship_addr_sk IS NULL + AND cs_sold_date_sk = d_date_sk + AND cs_item_sk = i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +LIMIT 100 diff --git a/src/test/resources/tpcds/q77.sql b/src/test/resources/tpcds/q77.sql new file mode 100644 index 000000000..a69df9fbc --- /dev/null +++ b/src/test/resources/tpcds/q77.sql @@ -0,0 +1,100 @@ +WITH ss AS +(SELECT + s_store_sk, + sum(ss_ext_sales_price) AS sales, + sum(ss_net_profit) AS profit + FROM store_sales, date_dim, store + WHERE ss_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND ss_store_sk = s_store_sk + GROUP BY s_store_sk), + sr AS + (SELECT + s_store_sk, + sum(sr_return_amt) AS returns, + sum(sr_net_loss) AS profit_loss + FROM store_returns, date_dim, store + WHERE sr_returned_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND sr_store_sk = s_store_sk + GROUP BY s_store_sk), + cs AS + (SELECT + cs_call_center_sk, + sum(cs_ext_sales_price) AS sales, + sum(cs_net_profit) AS profit + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + GROUP BY cs_call_center_sk), + cr AS + (SELECT + sum(cr_return_amount) AS returns, + sum(cr_net_loss) AS profit_loss + FROM catalog_returns, date_dim + WHERE cr_returned_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days)), + ws AS + (SELECT + wp_web_page_sk, + sum(ws_ext_sales_price) AS sales, + sum(ws_net_profit) AS profit + FROM web_sales, date_dim, web_page + WHERE ws_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND ws_web_page_sk = wp_web_page_sk + GROUP BY wp_web_page_sk), + wr AS + (SELECT + wp_web_page_sk, + sum(wr_return_amt) AS returns, + sum(wr_net_loss) AS profit_loss + FROM web_returns, date_dim, web_page + WHERE wr_returned_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-03' AS DATE) AND + (cast('2000-08-03' AS DATE) + INTERVAL 30 days) + AND wr_web_page_sk = wp_web_page_sk + GROUP BY wp_web_page_sk) +SELECT + channel, + id, + sum(sales) AS sales, + sum(returns) AS returns, + sum(profit) AS profit +FROM + (SELECT + 'store channel' AS channel, + ss.s_store_sk AS id, + sales, + coalesce(returns, 0) AS returns, + (profit - coalesce(profit_loss, 0)) AS profit + FROM ss + LEFT JOIN sr + ON ss.s_store_sk = sr.s_store_sk + UNION ALL + SELECT + 'catalog channel' AS channel, + cs_call_center_sk AS id, + sales, + returns, + (profit - profit_loss) AS profit + FROM cs, cr + UNION ALL + SELECT + 'web channel' AS channel, + ws.wp_web_page_sk AS id, + sales, + coalesce(returns, 0) returns, + (profit - coalesce(profit_loss, 0)) AS profit + FROM ws + LEFT JOIN wr + ON ws.wp_web_page_sk = wr.wp_web_page_sk + ) x +GROUP BY ROLLUP (channel, id) +ORDER BY channel, id +LIMIT 100 diff --git a/src/test/resources/tpcds/q78.sql b/src/test/resources/tpcds/q78.sql new file mode 100644 index 000000000..07b0940e2 --- /dev/null +++ b/src/test/resources/tpcds/q78.sql @@ -0,0 +1,64 @@ +WITH ws AS +(SELECT + d_year AS ws_sold_year, + ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + FROM web_sales + LEFT JOIN web_returns ON wr_order_number = ws_order_number AND ws_item_sk = wr_item_sk + JOIN date_dim ON ws_sold_date_sk = d_date_sk + WHERE wr_order_number IS NULL + GROUP BY d_year, ws_item_sk, ws_bill_customer_sk +), + cs AS + (SELECT + d_year AS cs_sold_year, + cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + FROM catalog_sales + LEFT JOIN catalog_returns ON cr_order_number = cs_order_number AND cs_item_sk = cr_item_sk + JOIN date_dim ON cs_sold_date_sk = d_date_sk + WHERE cr_order_number IS NULL + GROUP BY d_year, cs_item_sk, cs_bill_customer_sk + ), + ss AS + (SELECT + d_year AS ss_sold_year, + ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + FROM store_sales + LEFT JOIN store_returns ON sr_ticket_number = ss_ticket_number AND ss_item_sk = sr_item_sk + JOIN date_dim ON ss_sold_date_sk = d_date_sk + WHERE sr_ticket_number IS NULL + GROUP BY d_year, ss_item_sk, ss_customer_sk + ) +SELECT + round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) ratio, + ss_qty store_qty, + ss_wc store_wholesale_cost, + ss_sp store_sales_price, + coalesce(ws_qty, 0) + coalesce(cs_qty, 0) other_chan_qty, + coalesce(ws_wc, 0) + coalesce(cs_wc, 0) other_chan_wholesale_cost, + coalesce(ws_sp, 0) + coalesce(cs_sp, 0) other_chan_sales_price +FROM ss + LEFT JOIN ws + ON (ws_sold_year = ss_sold_year AND ws_item_sk = ss_item_sk AND ws_customer_sk = ss_customer_sk) + LEFT JOIN cs + ON (cs_sold_year = ss_sold_year AND cs_item_sk = ss_item_sk AND cs_customer_sk = ss_customer_sk) +WHERE coalesce(ws_qty, 0) > 0 AND coalesce(cs_qty, 0) > 0 AND ss_sold_year = 2000 +ORDER BY + ratio, + ss_qty DESC, ss_wc DESC, ss_sp DESC, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) +LIMIT 100 diff --git a/src/test/resources/tpcds/q79.sql b/src/test/resources/tpcds/q79.sql new file mode 100644 index 000000000..08f86dc20 --- /dev/null +++ b/src/test/resources/tpcds/q79.sql @@ -0,0 +1,27 @@ +SELECT + c_last_name, + c_first_name, + substr(s_city, 1, 30), + ss_ticket_number, + amt, + profit +FROM + (SELECT + ss_ticket_number, + ss_customer_sk, + store.s_city, + sum(ss_coupon_amt) amt, + sum(ss_net_profit) profit + FROM store_sales, date_dim, store, household_demographics + WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_store_sk = store.s_store_sk + AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + AND (household_demographics.hd_dep_count = 6 OR + household_demographics.hd_vehicle_count > 2) + AND date_dim.d_dow = 1 + AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) + AND store.s_number_employees BETWEEN 200 AND 295 + GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, store.s_city) ms, customer +WHERE ss_customer_sk = c_customer_sk +ORDER BY c_last_name, c_first_name, substr(s_city, 1, 30), profit +LIMIT 100 diff --git a/src/test/resources/tpcds/q8.sql b/src/test/resources/tpcds/q8.sql new file mode 100644 index 000000000..497725111 --- /dev/null +++ b/src/test/resources/tpcds/q8.sql @@ -0,0 +1,87 @@ +SELECT + s_store_name, + sum(ss_net_profit) +FROM store_sales, date_dim, store, + (SELECT ca_zip + FROM ( + (SELECT substr(ca_zip, 1, 5) ca_zip + FROM customer_address + WHERE substr(ca_zip, 1, 5) IN ( + '24128','76232','65084','87816','83926','77556','20548', + '26231','43848','15126','91137','61265','98294','25782', + '17920','18426','98235','40081','84093','28577','55565', + '17183','54601','67897','22752','86284','18376','38607', + '45200','21756','29741','96765','23932','89360','29839', + '25989','28898','91068','72550','10390','18845','47770', + '82636','41367','76638','86198','81312','37126','39192', + '88424','72175','81426','53672','10445','42666','66864', + '66708','41248','48583','82276','18842','78890','49448', + '14089','38122','34425','79077','19849','43285','39861', + '66162','77610','13695','99543','83444','83041','12305', + '57665','68341','25003','57834','62878','49130','81096', + '18840','27700','23470','50412','21195','16021','76107', + '71954','68309','18119','98359','64544','10336','86379', + '27068','39736','98569','28915','24206','56529','57647', + '54917','42961','91110','63981','14922','36420','23006', + '67467','32754','30903','20260','31671','51798','72325', + '85816','68621','13955','36446','41766','68806','16725', + '15146','22744','35850','88086','51649','18270','52867', + '39972','96976','63792','11376','94898','13595','10516', + '90225','58943','39371','94945','28587','96576','57855', + '28488','26105','83933','25858','34322','44438','73171', + '30122','34102','22685','71256','78451','54364','13354', + '45375','40558','56458','28286','45266','47305','69399', + '83921','26233','11101','15371','69913','35942','15882', + '25631','24610','44165','99076','33786','70738','26653', + '14328','72305','62496','22152','10144','64147','48425', + '14663','21076','18799','30450','63089','81019','68893', + '24996','51200','51211','45692','92712','70466','79994', + '22437','25280','38935','71791','73134','56571','14060', + '19505','72425','56575','74351','68786','51650','20004', + '18383','76614','11634','18906','15765','41368','73241', + '76698','78567','97189','28545','76231','75691','22246', + '51061','90578','56691','68014','51103','94167','57047', + '14867','73520','15734','63435','25733','35474','24676', + '94627','53535','17879','15559','53268','59166','11928', + '59402','33282','45721','43933','68101','33515','36634', + '71286','19736','58058','55253','67473','41918','19515', + '36495','19430','22351','77191','91393','49156','50298', + '87501','18652','53179','18767','63193','23968','65164', + '68880','21286','72823','58470','67301','13394','31016', + '70372','67030','40604','24317','45748','39127','26065', + '77721','31029','31880','60576','24671','45549','13376', + '50016','33123','19769','22927','97789','46081','72151', + '15723','46136','51949','68100','96888','64528','14171', + '79777','28709','11489','25103','32213','78668','22245', + '15798','27156','37930','62971','21337','51622','67853', + '10567','38415','15455','58263','42029','60279','37125', + '56240','88190','50308','26859','64457','89091','82136', + '62377','36233','63837','58078','17043','30010','60099', + '28810','98025','29178','87343','73273','30469','64034', + '39516','86057','21309','90257','67875','40162','11356', + '73650','61810','72013','30431','22461','19512','13375', + '55307','30625','83849','68908','26689','96451','38193', + '46820','88885','84935','69035','83144','47537','56616', + '94983','48033','69952','25486','61547','27385','61860', + '58048','56910','16807','17871','35258','31387','35458', + '35576')) + INTERSECT + (SELECT ca_zip + FROM + (SELECT + substr(ca_zip, 1, 5) ca_zip, + count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk AND + c_preferred_cust_flag = 'Y' + GROUP BY ca_zip + HAVING count(*) > 10) A1) + ) A2 + ) V1 +WHERE ss_store_sk = s_store_sk + AND ss_sold_date_sk = d_date_sk + AND d_qoy = 2 AND d_year = 1998 + AND (substr(s_zip, 1, 2) = substr(V1.ca_zip, 1, 2)) +GROUP BY s_store_name +ORDER BY s_store_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q80.sql b/src/test/resources/tpcds/q80.sql new file mode 100644 index 000000000..433db87d2 --- /dev/null +++ b/src/test/resources/tpcds/q80.sql @@ -0,0 +1,94 @@ +WITH ssr AS +(SELECT + s_store_id AS store_id, + sum(ss_ext_sales_price) AS sales, + sum(coalesce(sr_return_amt, 0)) AS returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) AS profit + FROM store_sales + LEFT OUTER JOIN store_returns ON + (ss_item_sk = sr_item_sk AND + ss_ticket_number = sr_ticket_number) + , + date_dim, store, item, promotion + WHERE ss_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) + AND ss_store_sk = s_store_sk + AND ss_item_sk = i_item_sk + AND i_current_price > 50 + AND ss_promo_sk = p_promo_sk + AND p_channel_tv = 'N' + GROUP BY s_store_id), + csr AS + (SELECT + cp_catalog_page_id AS catalog_page_id, + sum(cs_ext_sales_price) AS sales, + sum(coalesce(cr_return_amount, 0)) AS returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) AS profit + FROM catalog_sales + LEFT OUTER JOIN catalog_returns ON + (cs_item_sk = cr_item_sk AND + cs_order_number = cr_order_number) + , + date_dim, catalog_page, item, promotion + WHERE cs_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) + AND cs_catalog_page_sk = cp_catalog_page_sk + AND cs_item_sk = i_item_sk + AND i_current_price > 50 + AND cs_promo_sk = p_promo_sk + AND p_channel_tv = 'N' + GROUP BY cp_catalog_page_id), + wsr AS + (SELECT + web_site_id, + sum(ws_ext_sales_price) AS sales, + sum(coalesce(wr_return_amt, 0)) AS returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) AS profit + FROM web_sales + LEFT OUTER JOIN web_returns ON + (ws_item_sk = wr_item_sk AND ws_order_number = wr_order_number) + , + date_dim, web_site, item, promotion + WHERE ws_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('2000-08-23' AS DATE) + AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) + AND ws_web_site_sk = web_site_sk + AND ws_item_sk = i_item_sk + AND i_current_price > 50 + AND ws_promo_sk = p_promo_sk + AND p_channel_tv = 'N' + GROUP BY web_site_id) +SELECT + channel, + id, + sum(sales) AS sales, + sum(returns) AS returns, + sum(profit) AS profit +FROM (SELECT + 'store channel' AS channel, + concat('store', store_id) AS id, + sales, + returns, + profit + FROM ssr + UNION ALL + SELECT + 'catalog channel' AS channel, + concat('catalog_page', catalog_page_id) AS id, + sales, + returns, + profit + FROM csr + UNION ALL + SELECT + 'web channel' AS channel, + concat('web_site', web_site_id) AS id, + sales, + returns, + profit + FROM wsr) x +GROUP BY ROLLUP (channel, id) +ORDER BY channel, id +LIMIT 100 diff --git a/src/test/resources/tpcds/q81.sql b/src/test/resources/tpcds/q81.sql new file mode 100644 index 000000000..18f0ffa7e --- /dev/null +++ b/src/test/resources/tpcds/q81.sql @@ -0,0 +1,38 @@ +WITH customer_total_return AS +(SELECT + cr_returning_customer_sk AS ctr_customer_sk, + ca_state AS ctr_state, + sum(cr_return_amt_inc_tax) AS ctr_total_return + FROM catalog_returns, date_dim, customer_address + WHERE cr_returned_date_sk = d_date_sk + AND d_year = 2000 + AND cr_returning_addr_sk = ca_address_sk + GROUP BY cr_returning_customer_sk, ca_state ) +SELECT + c_customer_id, + c_salutation, + c_first_name, + c_last_name, + ca_street_number, + ca_street_name, + ca_street_type, + ca_suite_number, + ca_city, + ca_county, + ca_state, + ca_zip, + ca_country, + ca_gmt_offset, + ca_location_type, + ctr_total_return +FROM customer_total_return ctr1, customer_address, customer +WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 +FROM customer_total_return ctr2 +WHERE ctr1.ctr_state = ctr2.ctr_state) + AND ca_address_sk = c_current_addr_sk + AND ca_state = 'GA' + AND ctr1.ctr_customer_sk = c_customer_sk +ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name + , ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset + , ca_location_type, ctr_total_return +LIMIT 100 diff --git a/src/test/resources/tpcds/q82.sql b/src/test/resources/tpcds/q82.sql new file mode 100644 index 000000000..20942cfeb --- /dev/null +++ b/src/test/resources/tpcds/q82.sql @@ -0,0 +1,15 @@ +SELECT + i_item_id, + i_item_desc, + i_current_price +FROM item, inventory, date_dim, store_sales +WHERE i_current_price BETWEEN 62 AND 62 + 30 + AND inv_item_sk = i_item_sk + AND d_date_sk = inv_date_sk + AND d_date BETWEEN cast('2000-05-25' AS DATE) AND (cast('2000-05-25' AS DATE) + INTERVAL 60 days) + AND i_manufact_id IN (129, 270, 821, 423) + AND inv_quantity_on_hand BETWEEN 100 AND 500 + AND ss_item_sk = i_item_sk +GROUP BY i_item_id, i_item_desc, i_current_price +ORDER BY i_item_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q83.sql b/src/test/resources/tpcds/q83.sql new file mode 100644 index 000000000..53c10c7de --- /dev/null +++ b/src/test/resources/tpcds/q83.sql @@ -0,0 +1,56 @@ +WITH sr_items AS +(SELECT + i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + FROM store_returns, item, date_dim + WHERE sr_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq IN + (SELECT d_week_seq + FROM date_dim + WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) + AND sr_returned_date_sk = d_date_sk + GROUP BY i_item_id), + cr_items AS + (SELECT + i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + FROM catalog_returns, item, date_dim + WHERE cr_item_sk = i_item_sk + AND d_date IN (SELECT d_date + FROM date_dim + WHERE d_week_seq IN + (SELECT d_week_seq + FROM date_dim + WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) + AND cr_returned_date_sk = d_date_sk + GROUP BY i_item_id), + wr_items AS + (SELECT + i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + FROM web_returns, item, date_dim + WHERE wr_item_sk = i_item_sk AND d_date IN + (SELECT d_date + FROM date_dim + WHERE d_week_seq IN + (SELECT d_week_seq + FROM date_dim + WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) + AND wr_returned_date_sk = d_date_sk + GROUP BY i_item_id) +SELECT + sr_items.item_id, + sr_item_qty, + sr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 sr_dev, + cr_item_qty, + cr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 cr_dev, + wr_item_qty, + wr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 wr_dev, + (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 average +FROM sr_items, cr_items, wr_items +WHERE sr_items.item_id = cr_items.item_id + AND sr_items.item_id = wr_items.item_id +ORDER BY sr_items.item_id, sr_item_qty +LIMIT 100 diff --git a/src/test/resources/tpcds/q84.sql b/src/test/resources/tpcds/q84.sql new file mode 100644 index 000000000..a1076b57c --- /dev/null +++ b/src/test/resources/tpcds/q84.sql @@ -0,0 +1,19 @@ +SELECT + c_customer_id AS customer_id, + concat(c_last_name, ', ', c_first_name) AS customername +FROM customer + , customer_address + , customer_demographics + , household_demographics + , income_band + , store_returns +WHERE ca_city = 'Edgewood' + AND c_current_addr_sk = ca_address_sk + AND ib_lower_bound >= 38128 + AND ib_upper_bound <= 38128 + 50000 + AND ib_income_band_sk = hd_income_band_sk + AND cd_demo_sk = c_current_cdemo_sk + AND hd_demo_sk = c_current_hdemo_sk + AND sr_cdemo_sk = cd_demo_sk +ORDER BY c_customer_id +LIMIT 100 diff --git a/src/test/resources/tpcds/q85.sql b/src/test/resources/tpcds/q85.sql new file mode 100644 index 000000000..cf718b0f8 --- /dev/null +++ b/src/test/resources/tpcds/q85.sql @@ -0,0 +1,82 @@ +SELECT + substr(r_reason_desc, 1, 20), + avg(ws_quantity), + avg(wr_refunded_cash), + avg(wr_fee) +FROM web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason +WHERE ws_web_page_sk = wp_web_page_sk + AND ws_item_sk = wr_item_sk + AND ws_order_number = wr_order_number + AND ws_sold_date_sk = d_date_sk AND d_year = 2000 + AND cd1.cd_demo_sk = wr_refunded_cdemo_sk + AND cd2.cd_demo_sk = wr_returning_cdemo_sk + AND ca_address_sk = wr_refunded_addr_sk + AND r_reason_sk = wr_reason_sk + AND + ( + ( + cd1.cd_marital_status = 'M' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = 'Advanced Degree' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 100.00 AND 150.00 + ) + OR + ( + cd1.cd_marital_status = 'S' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = 'College' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 50.00 AND 100.00 + ) + OR + ( + cd1.cd_marital_status = 'W' + AND + cd1.cd_marital_status = cd2.cd_marital_status + AND + cd1.cd_education_status = '2 yr Degree' + AND + cd1.cd_education_status = cd2.cd_education_status + AND + ws_sales_price BETWEEN 150.00 AND 200.00 + ) + ) + AND + ( + ( + ca_country = 'United States' + AND + ca_state IN ('IN', 'OH', 'NJ') + AND ws_net_profit BETWEEN 100 AND 200 + ) + OR + ( + ca_country = 'United States' + AND + ca_state IN ('WI', 'CT', 'KY') + AND ws_net_profit BETWEEN 150 AND 300 + ) + OR + ( + ca_country = 'United States' + AND + ca_state IN ('LA', 'IA', 'AR') + AND ws_net_profit BETWEEN 50 AND 250 + ) + ) +GROUP BY r_reason_desc +ORDER BY substr(r_reason_desc, 1, 20) + , avg(ws_quantity) + , avg(wr_refunded_cash) + , avg(wr_fee) +LIMIT 100 diff --git a/src/test/resources/tpcds/q86.sql b/src/test/resources/tpcds/q86.sql new file mode 100644 index 000000000..789a4abf7 --- /dev/null +++ b/src/test/resources/tpcds/q86.sql @@ -0,0 +1,24 @@ +SELECT + sum(ws_net_paid) AS total_sum, + i_category, + i_class, + grouping(i_category) + grouping(i_class) AS lochierarchy, + rank() + OVER ( + PARTITION BY grouping(i_category) + grouping(i_class), + CASE WHEN grouping(i_class) = 0 + THEN i_category END + ORDER BY sum(ws_net_paid) DESC) AS rank_within_parent +FROM + web_sales, date_dim d1, item +WHERE + d1.d_month_seq BETWEEN 1200 AND 1200 + 11 + AND d1.d_date_sk = ws_sold_date_sk + AND i_item_sk = ws_item_sk +GROUP BY ROLLUP (i_category, i_class) +ORDER BY + lochierarchy DESC, + CASE WHEN lochierarchy = 0 + THEN i_category END, + rank_within_parent +LIMIT 100 diff --git a/src/test/resources/tpcds/q87.sql b/src/test/resources/tpcds/q87.sql new file mode 100644 index 000000000..4aaa9f39d --- /dev/null +++ b/src/test/resources/tpcds/q87.sql @@ -0,0 +1,28 @@ +SELECT count(*) +FROM ((SELECT DISTINCT + c_last_name, + c_first_name, + d_date +FROM store_sales, date_dim, customer +WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + AND store_sales.ss_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + EXCEPT + (SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM catalog_sales, date_dim, customer + WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + EXCEPT + (SELECT DISTINCT + c_last_name, + c_first_name, + d_date + FROM web_sales, date_dim, customer + WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk + AND web_sales.ws_bill_customer_sk = customer.c_customer_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11) + ) cool_cust diff --git a/src/test/resources/tpcds/q88.sql b/src/test/resources/tpcds/q88.sql new file mode 100644 index 000000000..25bcd90f4 --- /dev/null +++ b/src/test/resources/tpcds/q88.sql @@ -0,0 +1,122 @@ +SELECT * +FROM + (SELECT count(*) h8_30_to_9 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 8 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s1, + (SELECT count(*) h9_to_9_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 9 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s2, + (SELECT count(*) h9_30_to_10 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 9 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s3, + (SELECT count(*) h10_to_10_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 10 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s4, + (SELECT count(*) h10_30_to_11 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 10 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s5, + (SELECT count(*) h11_to_11_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 11 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s6, + (SELECT count(*) h11_30_to_12 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 11 + AND time_dim.t_minute >= 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s7, + (SELECT count(*) h12_to_12_30 + FROM store_sales, household_demographics, time_dim, store + WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 12 + AND time_dim.t_minute < 30 + AND ( + (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) + OR + (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) + OR + (household_demographics.hd_dep_count = 0 AND + household_demographics.hd_vehicle_count <= 0 + 2)) + AND store.s_store_name = 'ese') s8 diff --git a/src/test/resources/tpcds/q89.sql b/src/test/resources/tpcds/q89.sql new file mode 100644 index 000000000..75408cb03 --- /dev/null +++ b/src/test/resources/tpcds/q89.sql @@ -0,0 +1,30 @@ +SELECT * +FROM ( + SELECT + i_category, + i_class, + i_brand, + s_store_name, + s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) + OVER + (PARTITION BY i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales + FROM item, store_sales, date_dim, store + WHERE ss_item_sk = i_item_sk AND + ss_sold_date_sk = d_date_sk AND + ss_store_sk = s_store_sk AND + d_year IN (1999) AND + ((i_category IN ('Books', 'Electronics', 'Sports') AND + i_class IN ('computers', 'stereo', 'football')) + OR (i_category IN ('Men', 'Jewelry', 'Women') AND + i_class IN ('shirts', 'birdal', 'dresses'))) + GROUP BY i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +WHERE CASE WHEN (avg_monthly_sales <> 0) + THEN (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) + ELSE NULL END > 0.1 +ORDER BY sum_sales - avg_monthly_sales, s_store_name +LIMIT 100 diff --git a/src/test/resources/tpcds/q9.sql b/src/test/resources/tpcds/q9.sql new file mode 100644 index 000000000..de3db9d98 --- /dev/null +++ b/src/test/resources/tpcds/q9.sql @@ -0,0 +1,48 @@ +SELECT + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) > 62316685 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 1 AND 20) END bucket1, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) > 19045798 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 21 AND 40) END bucket2, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) > 365541424 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 41 AND 60) END bucket3, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) > 216357808 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 61 AND 80) END bucket4, + CASE WHEN (SELECT count(*) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) > 184483884 + THEN (SELECT avg(ss_ext_discount_amt) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) + ELSE (SELECT avg(ss_net_paid) + FROM store_sales + WHERE ss_quantity BETWEEN 81 AND 100) END bucket5 +FROM reason +WHERE r_reason_sk = 1 diff --git a/src/test/resources/tpcds/q90.sql b/src/test/resources/tpcds/q90.sql new file mode 100644 index 000000000..85e35bf8b --- /dev/null +++ b/src/test/resources/tpcds/q90.sql @@ -0,0 +1,19 @@ +SELECT cast(amc AS DECIMAL(15, 4)) / cast(pmc AS DECIMAL(15, 4)) am_pm_ratio +FROM (SELECT count(*) amc +FROM web_sales, household_demographics, time_dim, web_page +WHERE ws_sold_time_sk = time_dim.t_time_sk + AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk + AND ws_web_page_sk = web_page.wp_web_page_sk + AND time_dim.t_hour BETWEEN 8 AND 8 + 1 + AND household_demographics.hd_dep_count = 6 + AND web_page.wp_char_count BETWEEN 5000 AND 5200) at, + (SELECT count(*) pmc + FROM web_sales, household_demographics, time_dim, web_page + WHERE ws_sold_time_sk = time_dim.t_time_sk + AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk + AND ws_web_page_sk = web_page.wp_web_page_sk + AND time_dim.t_hour BETWEEN 19 AND 19 + 1 + AND household_demographics.hd_dep_count = 6 + AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt +ORDER BY am_pm_ratio +LIMIT 100 diff --git a/src/test/resources/tpcds/q91.sql b/src/test/resources/tpcds/q91.sql new file mode 100644 index 000000000..9ca7ce00a --- /dev/null +++ b/src/test/resources/tpcds/q91.sql @@ -0,0 +1,23 @@ +SELECT + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +FROM + call_center, catalog_returns, date_dim, customer, customer_address, + customer_demographics, household_demographics +WHERE + cr_call_center_sk = cc_call_center_sk + AND cr_returned_date_sk = d_date_sk + AND cr_returning_customer_sk = c_customer_sk + AND cd_demo_sk = c_current_cdemo_sk + AND hd_demo_sk = c_current_hdemo_sk + AND ca_address_sk = c_current_addr_sk + AND d_year = 1998 + AND d_moy = 11 + AND ((cd_marital_status = 'M' AND cd_education_status = 'Unknown') + OR (cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree')) + AND hd_buy_potential LIKE 'Unknown%' + AND ca_gmt_offset = -7 +GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status +ORDER BY sum(cr_net_loss) DESC diff --git a/src/test/resources/tpcds/q92.sql b/src/test/resources/tpcds/q92.sql new file mode 100644 index 000000000..99129c3bd --- /dev/null +++ b/src/test/resources/tpcds/q92.sql @@ -0,0 +1,16 @@ +SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount ` +FROM web_sales, item, date_dim +WHERE i_manufact_id = 350 + AND i_item_sk = ws_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) + AND d_date_sk = ws_sold_date_sk + AND ws_ext_discount_amt > + ( + SELECT 1.3 * avg(ws_ext_discount_amt) + FROM web_sales, date_dim + WHERE ws_item_sk = i_item_sk + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) + AND d_date_sk = ws_sold_date_sk + ) +ORDER BY sum(ws_ext_discount_amt) +LIMIT 100 diff --git a/src/test/resources/tpcds/q93.sql b/src/test/resources/tpcds/q93.sql new file mode 100644 index 000000000..222dc31c1 --- /dev/null +++ b/src/test/resources/tpcds/q93.sql @@ -0,0 +1,19 @@ +SELECT + ss_customer_sk, + sum(act_sales) sumsales +FROM (SELECT + ss_item_sk, + ss_ticket_number, + ss_customer_sk, + CASE WHEN sr_return_quantity IS NOT NULL + THEN (ss_quantity - sr_return_quantity) * ss_sales_price + ELSE (ss_quantity * ss_sales_price) END act_sales +FROM store_sales + LEFT OUTER JOIN store_returns + ON (sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number) + , + reason +WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 28') t +GROUP BY ss_customer_sk +ORDER BY sumsales, ss_customer_sk +LIMIT 100 diff --git a/src/test/resources/tpcds/q94.sql b/src/test/resources/tpcds/q94.sql new file mode 100644 index 000000000..d6de3d75b --- /dev/null +++ b/src/test/resources/tpcds/q94.sql @@ -0,0 +1,23 @@ +SELECT + count(DISTINCT ws_order_number) AS `order count `, + sum(ws_ext_ship_cost) AS `total shipping cost `, + sum(ws_net_profit) AS `total net profit ` +FROM + web_sales ws1, date_dim, customer_address, web_site +WHERE + d_date BETWEEN '1999-02-01' AND + (CAST('1999-02-01' AS DATE) + INTERVAL 60 days) + AND ws1.ws_ship_date_sk = d_date_sk + AND ws1.ws_ship_addr_sk = ca_address_sk + AND ca_state = 'IL' + AND ws1.ws_web_site_sk = web_site_sk + AND web_company_name = 'pri' + AND EXISTS(SELECT * + FROM web_sales ws2 + WHERE ws1.ws_order_number = ws2.ws_order_number + AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + AND NOT EXISTS(SELECT * + FROM web_returns wr1 + WHERE ws1.ws_order_number = wr1.wr_order_number) +ORDER BY count(DISTINCT ws_order_number) +LIMIT 100 diff --git a/src/test/resources/tpcds/q95.sql b/src/test/resources/tpcds/q95.sql new file mode 100644 index 000000000..df71f00bd --- /dev/null +++ b/src/test/resources/tpcds/q95.sql @@ -0,0 +1,29 @@ +WITH ws_wh AS +(SELECT + ws1.ws_order_number, + ws1.ws_warehouse_sk wh1, + ws2.ws_warehouse_sk wh2 + FROM web_sales ws1, web_sales ws2 + WHERE ws1.ws_order_number = ws2.ws_order_number + AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +SELECT + count(DISTINCT ws_order_number) AS `order count `, + sum(ws_ext_ship_cost) AS `total shipping cost `, + sum(ws_net_profit) AS `total net profit ` +FROM + web_sales ws1, date_dim, customer_address, web_site +WHERE + d_date BETWEEN '1999-02-01' AND + (CAST('1999-02-01' AS DATE) + INTERVAL 60 DAY) + AND ws1.ws_ship_date_sk = d_date_sk + AND ws1.ws_ship_addr_sk = ca_address_sk + AND ca_state = 'IL' + AND ws1.ws_web_site_sk = web_site_sk + AND web_company_name = 'pri' + AND ws1.ws_order_number IN (SELECT ws_order_number + FROM ws_wh) + AND ws1.ws_order_number IN (SELECT wr_order_number + FROM web_returns, ws_wh + WHERE wr_order_number = ws_wh.ws_order_number) +ORDER BY count(DISTINCT ws_order_number) +LIMIT 100 diff --git a/src/test/resources/tpcds/q96.sql b/src/test/resources/tpcds/q96.sql new file mode 100644 index 000000000..7ab17e7bc --- /dev/null +++ b/src/test/resources/tpcds/q96.sql @@ -0,0 +1,11 @@ +SELECT count(*) +FROM store_sales, household_demographics, time_dim, store +WHERE ss_sold_time_sk = time_dim.t_time_sk + AND ss_hdemo_sk = household_demographics.hd_demo_sk + AND ss_store_sk = s_store_sk + AND time_dim.t_hour = 20 + AND time_dim.t_minute >= 30 + AND household_demographics.hd_dep_count = 7 + AND store.s_store_name = 'ese' +ORDER BY count(*) +LIMIT 100 diff --git a/src/test/resources/tpcds/q97.sql b/src/test/resources/tpcds/q97.sql new file mode 100644 index 000000000..e7e0b1a05 --- /dev/null +++ b/src/test/resources/tpcds/q97.sql @@ -0,0 +1,30 @@ +WITH ssci AS ( + SELECT + ss_customer_sk customer_sk, + ss_item_sk item_sk + FROM store_sales, date_dim + WHERE ss_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + GROUP BY ss_customer_sk, ss_item_sk), + csci AS ( + SELECT + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + FROM catalog_sales, date_dim + WHERE cs_sold_date_sk = d_date_sk + AND d_month_seq BETWEEN 1200 AND 1200 + 11 + GROUP BY cs_bill_customer_sk, cs_item_sk) +SELECT + sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL + THEN 1 + ELSE 0 END) store_only, + sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL + THEN 1 + ELSE 0 END) catalog_only, + sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL + THEN 1 + ELSE 0 END) store_and_catalog +FROM ssci + FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk + AND ssci.item_sk = csci.item_sk) +LIMIT 100 diff --git a/src/test/resources/tpcds/q98.sql b/src/test/resources/tpcds/q98.sql new file mode 100644 index 000000000..bb10d4bf8 --- /dev/null +++ b/src/test/resources/tpcds/q98.sql @@ -0,0 +1,21 @@ +SELECT + i_item_desc, + i_category, + i_class, + i_current_price, + sum(ss_ext_sales_price) AS itemrevenue, + sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) + OVER + (PARTITION BY i_class) AS revenueratio +FROM + store_sales, item, date_dim +WHERE + ss_item_sk = i_item_sk + AND i_category IN ('Sports', 'Books', 'Home') + AND ss_sold_date_sk = d_date_sk + AND d_date BETWEEN cast('1999-02-22' AS DATE) + AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) +GROUP BY + i_item_id, i_item_desc, i_category, i_class, i_current_price +ORDER BY + i_category, i_class, i_item_id, i_item_desc, revenueratio diff --git a/src/test/resources/tpcds/q99.sql b/src/test/resources/tpcds/q99.sql new file mode 100644 index 000000000..f1a3d4d2b --- /dev/null +++ b/src/test/resources/tpcds/q99.sql @@ -0,0 +1,34 @@ +SELECT + substr(w_warehouse_name, 1, 20), + sm_type, + cc_name, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30) + THEN 1 + ELSE 0 END) AS `30 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND + (cs_ship_date_sk - cs_sold_date_sk <= 60) + THEN 1 + ELSE 0 END) AS `31 - 60 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND + (cs_ship_date_sk - cs_sold_date_sk <= 90) + THEN 1 + ELSE 0 END) AS `61 - 90 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND + (cs_ship_date_sk - cs_sold_date_sk <= 120) + THEN 1 + ELSE 0 END) AS `91 - 120 days `, + sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120) + THEN 1 + ELSE 0 END) AS `>120 days ` +FROM + catalog_sales, warehouse, ship_mode, call_center, date_dim +WHERE + d_month_seq BETWEEN 1200 AND 1200 + 11 + AND cs_ship_date_sk = d_date_sk + AND cs_warehouse_sk = w_warehouse_sk + AND cs_ship_mode_sk = sm_ship_mode_sk + AND cs_call_center_sk = cc_call_center_sk +GROUP BY + substr(w_warehouse_name, 1, 20), sm_type, cc_name +ORDER BY substr(w_warehouse_name, 1, 20), sm_type, cc_name +LIMIT 100 diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala new file mode 100644 index 000000000..755822967 --- /dev/null +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala @@ -0,0 +1,277 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.microsoft.hyperspace.goldstandard + +import java.io.File +import java.nio.charset.StandardCharsets + +import scala.collection.mutable + +import org.apache.commons.io.FileUtils +import org.apache.spark.internal.Logging +import org.apache.spark.sql.catalyst.expressions.AttributeSet +import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.execution._ +import org.apache.spark.sql.execution.command.ExplainCommand +import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec} + +// scalastyle:off filelinelengthchecker +/** + * Check that TPC-DS SparkPlans don't change. + * If there are plan differences, the error message looks like this: + * Plans did not match: + * last approved simplified plan: /path/to/tpcds-plan-stability/approved-plans-xxx/q1/simplified.txt + * last approved explain plan: /path/to/tpcds-plan-stability/approved-plans-xxx/q1/explain.txt + * [last approved simplified plan] + * + * actual simplified plan: /path/to/tmp/q1.actual.simplified.txt + * actual explain plan: /path/to/tmp/q1.actual.explain.txt + * [actual simplified plan] + * + * The explain files are saved to help debug later, they are not checked. Only the simplified + * plans are checked (by string comparison). + * + * + * To run the entire test suite: + * {{{ + * build/sbt "sql/testOnly *PlanStabilitySuite" + * }}} + * + * To run a single test file upon change: + * {{{ + * build/sbt "sql/testOnly *PlanStabilitySuite -- -z (tpcds-v1.4/q49)" + * }}} + * + * To re-generate golden files for entire suite, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStabilitySuite" + * }}} + * + * To re-generate golden file for a single test, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStabilitySuite -- -z (tpcds-v1.4/q49)" + * }}} + */ +// scalastyle:on filelinelengthchecker +trait PlanStabilitySuite extends TPCDSBase with Logging { + + override def beforeAll(): Unit = { + spark.conf.set("spark.sql.crossJoin.enabled", "true") + super.beforeAll() + } + + override def afterAll(): Unit = { + super.afterAll() + } + + private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1" + + protected val baseResourcePath = { + // use the same way as `SQLQueryTestSuite` to get the resource path + java.nio.file.Paths.get("src", "test", "resources", "tpcds-plan-stability").toFile + } + + private val referenceRegex = "#\\d+".r + private val normalizeRegex = "#\\d+L?".r + + private val clsName = this.getClass.getCanonicalName + + def goldenFilePath: String + + private def getDirForTest(name: String): File = { + new File(goldenFilePath, name) + } + + private def isApproved(dir: File, actualSimplifiedPlan: String): Boolean = { + val file = new File(dir, "simplified.txt") + val expected = FileUtils.readFileToString(file, StandardCharsets.UTF_8) + expected == actualSimplifiedPlan + } + + /** + * Serialize and save this SparkPlan. + * The resulting file is used by [[checkWithApproved]] to check stability. + * + * @param plan the SparkPlan + * @param name the name of the query + * @param explain the full explain output; this is saved to help debug later as the simplified + * plan is not too useful for debugging + */ + private def generateGoldenFile(plan: SparkPlan, name: String, explain: String): Unit = { + val dir = getDirForTest(name) + val simplified = getSimplifiedPlan(plan) + val foundMatch = dir.exists() && isApproved(dir, simplified) + + if (!foundMatch) { + FileUtils.deleteDirectory(dir) + assert(dir.mkdirs()) + + val file = new File(dir, "simplified.txt") + FileUtils.writeStringToFile(file, simplified, StandardCharsets.UTF_8) + val fileOriginalPlan = new File(dir, "explain.txt") + FileUtils.writeStringToFile(fileOriginalPlan, explain, StandardCharsets.UTF_8) + logDebug(s"APPROVED: $file $fileOriginalPlan") + } + } + + private def checkWithApproved(plan: SparkPlan, name: String, explain: String): Unit = { + val dir = getDirForTest(name) + val tempDir = FileUtils.getTempDirectory + val actualSimplified = getSimplifiedPlan(plan) + val foundMatch = isApproved(dir, actualSimplified) + + if (!foundMatch) { + // show diff with last approved + val approvedSimplifiedFile = new File(dir, "simplified.txt") + val approvedExplainFile = new File(dir, "explain.txt") + + val actualSimplifiedFile = new File(tempDir, s"$name.actual.simplified.txt") + val actualExplainFile = new File(tempDir, s"$name.actual.explain.txt") + + val approvedSimplified = + FileUtils.readFileToString(approvedSimplifiedFile, StandardCharsets.UTF_8) + // write out for debugging + FileUtils.writeStringToFile(actualSimplifiedFile, actualSimplified, StandardCharsets.UTF_8) + FileUtils.writeStringToFile(actualExplainFile, explain, StandardCharsets.UTF_8) + + fail(s""" + |Plans did not match: + |last approved simplified plan: ${approvedSimplifiedFile.getAbsolutePath} + |last approved explain plan: ${approvedExplainFile.getAbsolutePath} + | + |$approvedSimplified + | + |actual simplified plan: ${actualSimplifiedFile.getAbsolutePath} + |actual explain plan: ${actualExplainFile.getAbsolutePath} + | + |$actualSimplified + """.stripMargin) + } + } + + /** + * Get the simplified plan for a specific SparkPlan. In the simplified plan, the node only has + * its name and all the sorted reference and produced attributes names(without ExprId) and its + * simplified children as well. And we'll only identify the performance sensitive nodes, e.g., + * Exchange, Subquery, in the simplified plan. Given such a identical but simplified plan, we'd + * expect to avoid frequent plan changing and catch the possible meaningful regression. + */ + private def getSimplifiedPlan(plan: SparkPlan): String = { + val exchangeIdMap = new mutable.HashMap[SparkPlan, Int]() + val subqueriesMap = new mutable.HashMap[SparkPlan, Int]() + + def getId(plan: SparkPlan): Int = plan match { + case exchange: Exchange => exchangeIdMap.getOrElseUpdate(exchange, exchangeIdMap.size + 1) + case ReusedExchangeExec(_, exchange) => + exchangeIdMap.getOrElseUpdate(exchange, exchangeIdMap.size + 1) + case subquery: SubqueryExec => + subqueriesMap.getOrElseUpdate(subquery, subqueriesMap.size + 1) + case _ => -1 + } + + /** + * Some expression names have ExprId in them due to using things such as + * "sum(sr_return_amt#14)", so we remove all of these using regex + */ + def cleanUpReferences(references: AttributeSet): String = { + referenceRegex.replaceAllIn(references.map(_.name).mkString(","), "") + } + + /** + * Generate a simplified plan as a string + * Example output: + * TakeOrderedAndProject [c_customer_id] + * WholeStageCodegen + * Project [c_customer_id] + */ + def simplifyNode(node: SparkPlan, depth: Int): String = { + val padding = " " * depth + var thisNode = node.nodeName + if (node.references.nonEmpty) { + thisNode += s" [${cleanUpReferences(node.references)}]" + } + if (node.producedAttributes.nonEmpty) { + thisNode += s" [${cleanUpReferences(node.producedAttributes)}]" + } + val id = getId(node) + if (id > 0) { + thisNode += s" #$id" + } + val childrenSimplified = node.children.map(simplifyNode(_, depth + 1)) + val subqueriesSimplified = node.subqueries.map(simplifyNode(_, depth + 1)) + s"$padding$thisNode\n${subqueriesSimplified.mkString("")}${childrenSimplified.mkString("")}" + } + + simplifyNode(plan, 0) + } + + private def normalizeIds(plan: String): String = { + val map = new mutable.HashMap[String, String]() + normalizeRegex + .findAllMatchIn(plan) + .map(_.toString) + .foreach(map.getOrElseUpdate(_, (map.size + 1).toString)) + normalizeRegex.replaceAllIn(plan, regexMatch => s"#${map(regexMatch.toString)}") + } + + private def normalizeLocation(plan: String): String = { + plan.replaceAll( + s"Location.*$clsName/", + "Location [not included in comparison]/{warehouse_dir}/") + } + + /** + * Test a TPC-DS query. Depending on the settings this test will either check if the plan matches + * a golden file or it will create a new golden file. + */ + protected def testQuery(tpcdsGroup: String, query: String, suffix: String = ""): Unit = { + val queryString = resourceToString( + s"$tpcdsGroup/$query.sql", + classLoader = Thread.currentThread().getContextClassLoader) + val qe = spark.sql(queryString).queryExecution + val plan = qe.executedPlan + val explain = normalizeLocation(normalizeIds(explainString(qe))) + + if (regenerateGoldenFiles) { + generateGoldenFile(plan, query + suffix, explain) + } else { + checkWithApproved(plan, query + suffix, explain) + } + } + + def explainString(queryExecution: QueryExecution): String = { + val explain = ExplainCommand(queryExecution.logical, extended = false) + spark.sessionState + .executePlan(explain) + .executedPlan + .executeCollect() + .map(_.getString(0)) + .mkString("\n") + } +} + +class TPCDSV1_4_PlanStabilitySuite extends PlanStabilitySuite { + override val goldenFilePath: String = + new File(baseResourcePath, s"approved-plans-v1_4").getAbsolutePath + + tpcdsQueries.foreach { q => + test(s"check simplified (tpcds-v1.4/$q)") { + testQuery("tpcds", q) + } + } +} diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala new file mode 100644 index 000000000..d8670bd48 --- /dev/null +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala @@ -0,0 +1,588 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.microsoft.hyperspace.goldstandard + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.internal.SQLConf + +import com.microsoft.hyperspace.SparkInvolvedSuite + +trait TPCDSBase extends SparkFunSuite with SparkInvolvedSuite { + + val conf = SQLConf.get + + // The TPCDS queries below are based on v1.4 + val tpcdsQueries = Seq( + "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", + "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", + "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", + "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40", + "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q49", "q50", + "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60", + "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70", + "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", + "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", + "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") + + // This list only includes TPCDS v2.7 queries that are different from v1.4 ones + val tpcdsQueriesV2_7_0 = Seq( + "q5a", "q6", "q10a", "q11", "q12", "q14", "q14a", "q18a", + "q20", "q22", "q22a", "q24", "q27a", "q34", "q35", "q35a", "q36a", "q47", "q49", + "q51a", "q57", "q64", "q67a", "q70a", "q72", "q74", "q75", "q77a", "q78", + "q80a", "q86a", "q98") + + // These queries are from https://github.com/cloudera/impala-tpcds-kit/tree/master/queries + val modifiedTPCDSQueries = Seq( + "q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59", + "q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max") + + private val tableColumns = Map( + "store_sales" -> + """ + |`ss_sold_date_sk` INT, + |`ss_sold_time_sk` INT, + |`ss_item_sk` INT, + |`ss_customer_sk` INT, + |`ss_cdemo_sk` INT, + |`ss_hdemo_sk` INT, + |`ss_addr_sk` INT, + |`ss_store_sk` INT, + |`ss_promo_sk` INT, + |`ss_ticket_number` INT, + |`ss_quantity` INT, + |`ss_wholesale_cost` DECIMAL(7,2), + |`ss_list_price` DECIMAL(7,2), + |`ss_sales_price` DECIMAL(7,2), + |`ss_ext_discount_amt` DECIMAL(7,2), + |`ss_ext_sales_price` DECIMAL(7,2), + |`ss_ext_wholesale_cost` DECIMAL(7,2), + |`ss_ext_list_price` DECIMAL(7,2), + |`ss_ext_tax` DECIMAL(7,2), + |`ss_coupon_amt` DECIMAL(7,2), + |`ss_net_paid` DECIMAL(7,2), + |`ss_net_paid_inc_tax` DECIMAL(7,2), + |`ss_net_profit` DECIMAL(7,2) + """.stripMargin, + "store_returns" -> + """ + |`sr_returned_date_sk` BIGINT, + |`sr_return_time_sk` BIGINT, + |`sr_item_sk` BIGINT, + |`sr_customer_sk` BIGINT, + |`sr_cdemo_sk` BIGINT, + |`sr_hdemo_sk` BIGINT, + |`sr_addr_sk` BIGINT, + |`sr_store_sk` BIGINT, + |`sr_reason_sk` BIGINT, + |`sr_ticket_number` BIGINT, + |`sr_return_quantity` INT, + |`sr_return_amt` DECIMAL(7,2), + |`sr_return_tax` DECIMAL(7,2), + |`sr_return_amt_inc_tax` DECIMAL(7,2), + |`sr_fee` DECIMAL(7,2), + |`sr_return_ship_cost` DECIMAL(7,2), + |`sr_refunded_cash` DECIMAL(7,2), + |`sr_reversed_charge` DECIMAL(7,2), + |`sr_store_credit` DECIMAL(7,2), + |`sr_net_loss` DECIMAL(7,2) + """.stripMargin, + "catalog_sales" -> + """ + |`cs_sold_date_sk` INT, + |`cs_sold_time_sk` INT, + |`cs_ship_date_sk` INT, + |`cs_bill_customer_sk` INT, + |`cs_bill_cdemo_sk` INT, + |`cs_bill_hdemo_sk` INT, + |`cs_bill_addr_sk` INT, + |`cs_ship_customer_sk` INT, + |`cs_ship_cdemo_sk` INT, + |`cs_ship_hdemo_sk` INT, + |`cs_ship_addr_sk` INT, + |`cs_call_center_sk` INT, + |`cs_catalog_page_sk` INT, + |`cs_ship_mode_sk` INT, + |`cs_warehouse_sk` INT, + |`cs_item_sk` INT, + |`cs_promo_sk` INT, + |`cs_order_number` INT, + |`cs_quantity` INT, + |`cs_wholesale_cost` DECIMAL(7,2), + |`cs_list_price` DECIMAL(7,2), + |`cs_sales_price` DECIMAL(7,2), + |`cs_ext_discount_amt` DECIMAL(7,2), + |`cs_ext_sales_price` DECIMAL(7,2), + |`cs_ext_wholesale_cost` DECIMAL(7,2), + |`cs_ext_list_price` DECIMAL(7,2), + |`cs_ext_tax` DECIMAL(7,2), + |`cs_coupon_amt` DECIMAL(7,2), + |`cs_ext_ship_cost` DECIMAL(7,2), + |`cs_net_paid` DECIMAL(7,2), + |`cs_net_paid_inc_tax` DECIMAL(7,2), + |`cs_net_paid_inc_ship` DECIMAL(7,2), + |`cs_net_paid_inc_ship_tax` DECIMAL(7,2), + |`cs_net_profit` DECIMAL(7,2) + """.stripMargin, + "catalog_returns" -> + """ + |`cr_returned_date_sk` INT, + |`cr_returned_time_sk` INT, + |`cr_item_sk` INT, + |`cr_refunded_customer_sk` INT, + |`cr_refunded_cdemo_sk` INT, + |`cr_refunded_hdemo_sk` INT, + |`cr_refunded_addr_sk` INT, + |`cr_returning_customer_sk` INT, + |`cr_returning_cdemo_sk` INT, + |`cr_returning_hdemo_sk` INT, + |`cr_returning_addr_sk` INT, + |`cr_call_center_sk` INT, + |`cr_catalog_page_sk` INT, + |`cr_ship_mode_sk` INT, + |`cr_warehouse_sk` INT, + |`cr_reason_sk` INT,`cr_order_number` INT, + |`cr_return_quantity` INT, + |`cr_return_amount` DECIMAL(7,2), + |`cr_return_tax` DECIMAL(7,2), + |`cr_return_amt_inc_tax` DECIMAL(7,2), + |`cr_fee` DECIMAL(7,2), + |`cr_return_ship_cost` DECIMAL(7,2), + |`cr_refunded_cash` DECIMAL(7,2), + |`cr_reversed_charge` DECIMAL(7,2), + |`cr_store_credit` DECIMAL(7,2), + |`cr_net_loss` DECIMAL(7,2) + """.stripMargin, + "web_sales" -> + """ + |`ws_sold_date_sk` INT, + |`ws_sold_time_sk` INT, + |`ws_ship_date_sk` INT, + |`ws_item_sk` INT, + |`ws_bill_customer_sk` INT, + |`ws_bill_cdemo_sk` INT, + |`ws_bill_hdemo_sk` INT, + |`ws_bill_addr_sk` INT, + |`ws_ship_customer_sk` INT, + |`ws_ship_cdemo_sk` INT, + |`ws_ship_hdemo_sk` INT, + |`ws_ship_addr_sk` INT, + |`ws_web_page_sk` INT, + |`ws_web_site_sk` INT, + |`ws_ship_mode_sk` INT, + |`ws_warehouse_sk` INT, + |`ws_promo_sk` INT, + |`ws_order_number` INT, + |`ws_quantity` INT, + |`ws_wholesale_cost` DECIMAL(7,2), + |`ws_list_price` DECIMAL(7,2), + |`ws_sales_price` DECIMAL(7,2), + |`ws_ext_discount_amt` DECIMAL(7,2), + |`ws_ext_sales_price` DECIMAL(7,2), + |`ws_ext_wholesale_cost` DECIMAL(7,2), + |`ws_ext_list_price` DECIMAL(7,2), + |`ws_ext_tax` DECIMAL(7,2), + |`ws_coupon_amt` DECIMAL(7,2), + |`ws_ext_ship_cost` DECIMAL(7,2), + |`ws_net_paid` DECIMAL(7,2), + |`ws_net_paid_inc_tax` DECIMAL(7,2), + |`ws_net_paid_inc_ship` DECIMAL(7,2), + |`ws_net_paid_inc_ship_tax` DECIMAL(7,2), + |`ws_net_profit` DECIMAL(7,2) + """.stripMargin, + "web_returns" -> + """ + |`wr_returned_date_sk` BIGINT, + |`wr_returned_time_sk` BIGINT, + |`wr_item_sk` BIGINT, + |`wr_refunded_customer_sk` BIGINT, + |`wr_refunded_cdemo_sk` BIGINT, + |`wr_refunded_hdemo_sk` BIGINT, + |`wr_refunded_addr_sk` BIGINT, + |`wr_returning_customer_sk` BIGINT, + |`wr_returning_cdemo_sk` BIGINT, + |`wr_returning_hdemo_sk` BIGINT, + |`wr_returning_addr_sk` BIGINT, + |`wr_web_page_sk` BIGINT, + |`wr_reason_sk` BIGINT, + |`wr_order_number` BIGINT, + |`wr_return_quantity` INT, + |`wr_return_amt` DECIMAL(7,2), + |`wr_return_tax` DECIMAL(7,2), + |`wr_return_amt_inc_tax` DECIMAL(7,2), + |`wr_fee` DECIMAL(7,2), + |`wr_return_ship_cost` DECIMAL(7,2), + |`wr_refunded_cash` DECIMAL(7,2), + |`wr_reversed_charge` DECIMAL(7,2), + |`wr_account_credit` DECIMAL(7,2), + |`wr_net_loss` DECIMAL(7,2) + """.stripMargin, + "inventory" -> + """ + |`inv_date_sk` INT, + |`inv_item_sk` INT, + |`inv_warehouse_sk` INT, + |`inv_quantity_on_hand` INT + """.stripMargin, + "store" -> + """ + |`s_store_sk` INT, + |`s_store_id` CHAR(16), + |`s_rec_start_date` DATE, + |`s_rec_end_date` DATE, + |`s_closed_date_sk` INT, + |`s_store_name` VARCHAR(50), + |`s_number_employees` INT, + |`s_floor_space` INT, + |`s_hours` CHAR(20), + |`s_manager` VARCHAR(40), + |`s_market_id` INT, + |`s_geography_class` VARCHAR(100), + |`s_market_desc` VARCHAR(100), + |`s_market_manager` VARCHAR(40), + |`s_division_id` INT, + |`s_division_name` VARCHAR(50), + |`s_company_id` INT, + |`s_company_name` VARCHAR(50), + |`s_street_number` VARCHAR(10), + |`s_street_name` VARCHAR(60), + |`s_street_type` CHAR(15), + |`s_suite_number` CHAR(10), + |`s_city` VARCHAR(60), + |`s_county` VARCHAR(30), + |`s_state` CHAR(2), + |`s_zip` CHAR(10), + |`s_country` VARCHAR(20), + |`s_gmt_offset` DECIMAL(5,2), + |`s_tax_percentage` DECIMAL(5,2) + """.stripMargin, + "call_center" -> + """ + |`cc_call_center_sk` INT, + |`cc_call_center_id` CHAR(16), + |`cc_rec_start_date` DATE, + |`cc_rec_end_date` DATE, + |`cc_closed_date_sk` INT, + |`cc_open_date_sk` INT, + |`cc_name` VARCHAR(50), + |`cc_class` VARCHAR(50), + |`cc_employees` INT, + |`cc_sq_ft` INT, + |`cc_hours` CHAR(20), + |`cc_manager` VARCHAR(40), + |`cc_mkt_id` INT, + |`cc_mkt_class` CHAR(50), + |`cc_mkt_desc` VARCHAR(100), + |`cc_market_manager` VARCHAR(40), + |`cc_division` INT, + |`cc_division_name` VARCHAR(50), + |`cc_company` INT, + |`cc_company_name` CHAR(50), + |`cc_street_number` CHAR(10), + |`cc_street_name` VARCHAR(60), + |`cc_street_type` CHAR(15), + |`cc_suite_number` CHAR(10), + |`cc_city` VARCHAR(60), + |`cc_county` VARCHAR(30), + |`cc_state` CHAR(2), + |`cc_zip` CHAR(10), + |`cc_country` VARCHAR(20), + |`cc_gmt_offset` DECIMAL(5,2), + |`cc_tax_percentage` DECIMAL(5,2) + """.stripMargin, + "catalog_page" -> + """ + |`cp_catalog_page_sk` INT, + |`cp_catalog_page_id` CHAR(16), + |`cp_start_date_sk` INT, + |`cp_end_date_sk` INT, + |`cp_department` VARCHAR(50), + |`cp_catalog_number` INT, + |`cp_catalog_page_number` INT, + |`cp_description` VARCHAR(100), + |`cp_type` VARCHAR(100) + """.stripMargin, + "web_site" -> + """ + |`web_site_sk` INT, + |`web_site_id` CHAR(16), + |`web_rec_start_date` DATE, + |`web_rec_end_date` DATE, + |`web_name` VARCHAR(50), + |`web_open_date_sk` INT, + |`web_close_date_sk` INT, + |`web_class` VARCHAR(50), + |`web_manager` VARCHAR(40), + |`web_mkt_id` INT, + |`web_mkt_class` VARCHAR(50), + |`web_mkt_desc` VARCHAR(100), + |`web_market_manager` VARCHAR(40), + |`web_company_id` INT, + |`web_company_name` CHAR(50), + |`web_street_number` CHAR(10), + |`web_street_name` VARCHAR(60), + |`web_street_type` CHAR(15), + |`web_suite_number` CHAR(10), + |`web_city` VARCHAR(60), + |`web_county` VARCHAR(30), + |`web_state` CHAR(2), + |`web_zip` CHAR(10), + |`web_country` VARCHAR(20), + |`web_gmt_offset` DECIMAL(5,2), + |`web_tax_percentage` DECIMAL(5,2) + """.stripMargin, + "web_page" -> + """ + |`wp_web_page_sk` INT, + |`wp_web_page_id` CHAR(16), + |`wp_rec_start_date` DATE, + |`wp_rec_end_date` DATE, + |`wp_creation_date_sk` INT, + |`wp_access_date_sk` INT, + |`wp_autogen_flag` CHAR(1), + |`wp_customer_sk` INT, + |`wp_url` VARCHAR(100), + |`wp_type` CHAR(50), + |`wp_char_count` INT, + |`wp_link_count` INT, + |`wp_image_count` INT, + |`wp_max_ad_count` INT + """.stripMargin, + "warehouse" -> + """ + |`w_warehouse_sk` INT, + |`w_warehouse_id` CHAR(16), + |`w_warehouse_name` VARCHAR(20), + |`w_warehouse_sq_ft` INT, + |`w_street_number` CHAR(10), + |`w_street_name` VARCHAR(20), + |`w_street_type` CHAR(15), + |`w_suite_number` CHAR(10), + |`w_city` VARCHAR(60), + |`w_county` VARCHAR(30), + |`w_state` CHAR(2), + |`w_zip` CHAR(10), + |`w_country` VARCHAR(20), + |`w_gmt_offset` DECIMAL(5,2) + """.stripMargin, + "customer" -> + """ + |`c_customer_sk` INT, + |`c_customer_id` CHAR(16), + |`c_current_cdemo_sk` INT, + |`c_current_hdemo_sk` INT, + |`c_current_addr_sk` INT, + |`c_first_shipto_date_sk` INT, + |`c_first_sales_date_sk` INT, + |`c_salutation` CHAR(10), + |`c_first_name` CHAR(20), + |`c_last_name` CHAR(30), + |`c_preferred_cust_flag` CHAR(1), + |`c_birth_day` INT, + |`c_birth_month` INT, + |`c_birth_year` INT, + |`c_birth_country` VARCHAR(20), + |`c_login` CHAR(13), + |`c_email_address` CHAR(50), + |`c_last_review_date` INT + """.stripMargin, + "customer_address" -> + """ + |`ca_address_sk` INT, + |`ca_address_id` CHAR(16), + |`ca_street_number` CHAR(10), + |`ca_street_name` VARCHAR(60), + |`ca_street_type` CHAR(15), + |`ca_suite_number` CHAR(10), + |`ca_city` VARCHAR(60), + |`ca_county` VARCHAR(30), + |`ca_state` CHAR(2), + |`ca_zip` CHAR(10), + |`ca_country` VARCHAR(20), + |`ca_gmt_offset` DECIMAL(5,2), + |`ca_location_type` CHAR(20) + """.stripMargin, + "customer_demographics" -> + """ + |`cd_demo_sk` INT, + |`cd_gender` CHAR(1), + |`cd_marital_status` CHAR(1), + |`cd_education_status` CHAR(20), + |`cd_purchase_estimate` INT, + |`cd_credit_rating` CHAR(10), + |`cd_dep_count` INT, + |`cd_dep_employed_count` INT, + |`cd_dep_college_count` INT + """.stripMargin, + "date_dim" -> + """ + |`d_date_sk` INT, + |`d_date_id` CHAR(16), + |`d_date` DATE, + |`d_month_seq` INT, + |`d_week_seq` INT, + |`d_quarter_seq` INT, + |`d_year` INT, + |`d_dow` INT, + |`d_moy` INT, + |`d_dom` INT, + |`d_qoy` INT, + |`d_fy_year` INT, + |`d_fy_quarter_seq` INT, + |`d_fy_week_seq` INT, + |`d_day_name` CHAR(9), + |`d_quarter_name` CHAR(1), + |`d_holiday` CHAR(1), + |`d_weekend` CHAR(1), + |`d_following_holiday` CHAR(1), + |`d_first_dom` INT, + |`d_last_dom` INT, + |`d_same_day_ly` INT, + |`d_same_day_lq` INT, + |`d_current_day` CHAR(1), + |`d_current_week` CHAR(1), + |`d_current_month` CHAR(1), + |`d_current_quarter` CHAR(1), + |`d_current_year` CHAR(1) + """.stripMargin, + "household_demographics" -> + """ + |`hd_demo_sk` INT, + |`hd_income_band_sk` INT, + |`hd_buy_potential` CHAR(15), + |`hd_dep_count` INT, + |`hd_vehicle_count` INT + """.stripMargin, + "item" -> + """ + |`i_item_sk` INT, + |`i_item_id` CHAR(16), + |`i_rec_start_date` DATE, + |`i_rec_end_date` DATE, + |`i_item_desc` VARCHAR(200), + |`i_current_price` DECIMAL(7,2), + |`i_wholesale_cost` DECIMAL(7,2), + |`i_brand_id` INT, + |`i_brand` CHAR(50), + |`i_class_id` INT, + |`i_class` CHAR(50), + |`i_category_id` INT, + |`i_category` CHAR(50), + |`i_manufact_id` INT, + |`i_manufact` CHAR(50), + |`i_size` CHAR(20), + |`i_formulation` CHAR(20), + |`i_color` CHAR(20), + |`i_units` CHAR(10), + |`i_container` CHAR(10), + |`i_manager_id` INT, + |`i_product_name` CHAR(50) + """.stripMargin, + "income_band" -> + """ + |`ib_income_band_sk` INT, + |`ib_lower_bound` INT, + |`ib_upper_bound` INT + """.stripMargin, + "promotion" -> + """ + |`p_promo_sk` INT, + |`p_promo_id` CHAR(16), + |`p_start_date_sk` INT, + |`p_end_date_sk` INT, + |`p_item_sk` INT, + |`p_cost` DECIMAL(15,2), + |`p_response_target` INT, + |`p_promo_name` CHAR(50), + |`p_channel_dmail` CHAR(1), + |`p_channel_email` CHAR(1), + |`p_channel_catalog` CHAR(1), + |`p_channel_tv` CHAR(1), + |`p_channel_radio` CHAR(1), + |`p_channel_press` CHAR(1), + |`p_channel_event` CHAR(1), + |`p_channel_demo` CHAR(1), + |`p_channel_details` VARCHAR(100), + |`p_purpose` CHAR(15), + |`p_discount_active` CHAR(1) + """.stripMargin, + "reason" -> + """ + |`r_reason_sk` INT, + |`r_reason_id` CHAR(16), + |`r_reason_desc` CHAR(100) + """.stripMargin, + "ship_mode" -> + """ + |`sm_ship_mode_sk` INT, + |`sm_ship_mode_id` CHAR(16), + |`sm_type` CHAR(30), + |`sm_code` CHAR(10), + |`sm_carrier` CHAR(20), + |`sm_contract` CHAR(20) + """.stripMargin, + "time_dim" -> + """ + |`t_time_sk` INT, + |`t_time_id` CHAR(16), + |`t_time` INT, + |`t_hour` INT, + |`t_minute` INT, + |`t_second` INT, + |`t_am_pm` CHAR(2), + |`t_shift` CHAR(20), + |`t_sub_shift` CHAR(20), + |`t_meal_time` CHAR(20) + """.stripMargin + ) + + val tableNames: Iterable[String] = tableColumns.keys + + def createTable( + spark: SparkSession, + tableName: String, + format: String = "parquet", + options: Seq[String] = Nil): Unit = { + spark.sql( + s""" + |CREATE TABLE `$tableName` (${tableColumns(tableName)}) + |USING $format + |${options.mkString("\n")} + """.stripMargin) + } + + private val originalCBCEnabled = conf.cboEnabled + private val originalJoinReorderEnabled = conf.joinReorderEnabled + + override def beforeAll(): Unit = { + super.beforeAll() + tableNames.foreach { tableName => + createTable(spark, tableName) + } + } + + override def afterAll(): Unit = { + conf.setConf(SQLConf.CBO_ENABLED, originalCBCEnabled) + conf.setConf(SQLConf.JOIN_REORDER_ENABLED, originalJoinReorderEnabled) + tableNames.foreach { tableName => + spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true) + } + super.afterAll() + } +} From cc14991be20c78534a1daaea1caedbe9da09de5e Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Thu, 18 Feb 2021 17:27:04 -0800 Subject: [PATCH 02/31] fix q32 --- .../approved-plans-v1_4/q32/explain.txt | 7 ++----- .../approved-plans-v1_4/q32/simplified.txt | 6 +----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt index f542e4cef..26c877156 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt @@ -25,9 +25,6 @@ CollectLimit 100 : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [d_date_sk#3] - : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27])) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#3)) + : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#3)) : +- *(2) FileScan parquet default.date_dim[d_date_sk#3,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [d_date_sk#3] - +- *(5) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#3)) - +- *(5) FileScan parquet default.date_dim[d_date_sk#3,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file + +- ReusedExchange [d_date_sk#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt index 39e8073fa..1da0b46e1 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt @@ -36,8 +36,4 @@ CollectLimit Filter [d_date,d_date_sk] Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + ReusedExchange [d_date_sk] [d_date_sk] #4 From 4b007a53d100f6626703cda2be1dbac5f1cb7daf Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 10 Mar 2021 10:41:07 -0800 Subject: [PATCH 03/31] keep only tpcds v1.4 and remove others --- .../goldstandard/PlanStabilitySuite.scala | 9 +++++++++ .../hyperspace/goldstandard/TPCDSBase.scala | 20 ++++++++----------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala index 755822967..659b5debb 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala @@ -1,3 +1,11 @@ +// scalastyle:off + +/** + * This trait was built upon: https://github.com/apache/spark/blob/v3.1.1/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala. + * + * The below license was copied from: https://github.com/FasterXML/jackson-module-scala/blob/2.10/src/main/resources/META-INF/LICENSE + */ + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -68,6 +76,7 @@ import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec} * }}} */ // scalastyle:on filelinelengthchecker + trait PlanStabilitySuite extends TPCDSBase with Logging { override def beforeAll(): Unit = { diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala index d8670bd48..6c48e37b9 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala @@ -1,3 +1,11 @@ +// scalastyle:off + +/** + * This trait was built upon: https://github.com/apache/spark/blob/v3.1.1/sql/core/src/test/scala/org/apache/spark/sql/TPCDSBase.scala. + * + * The below license was copied from: https://github.com/FasterXML/jackson-module-scala/blob/2.10/src/main/resources/META-INF/LICENSE + */ + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -41,18 +49,6 @@ trait TPCDSBase extends SparkFunSuite with SparkInvolvedSuite { "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") - // This list only includes TPCDS v2.7 queries that are different from v1.4 ones - val tpcdsQueriesV2_7_0 = Seq( - "q5a", "q6", "q10a", "q11", "q12", "q14", "q14a", "q18a", - "q20", "q22", "q22a", "q24", "q27a", "q34", "q35", "q35a", "q36a", "q47", "q49", - "q51a", "q57", "q64", "q67a", "q70a", "q72", "q74", "q75", "q77a", "q78", - "q80a", "q86a", "q98") - - // These queries are from https://github.com/cloudera/impala-tpcds-kit/tree/master/queries - val modifiedTPCDSQueries = Seq( - "q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59", - "q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max") - private val tableColumns = Map( "store_sales" -> """ From 5c0cee9f51bdfdcb08b3fc1a43befa4328cc31d5 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 10 Mar 2021 12:13:26 -0800 Subject: [PATCH 04/31] Trigger Build From 900539d913f9dd9551d9187af41076af151358f1 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 10 Mar 2021 12:19:07 -0800 Subject: [PATCH 05/31] build error: test with sequential run --- azure-pipelines.yml | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 65c99ff1e..14fc9a3de 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -40,12 +40,6 @@ jobs: inputs: artifactName: 'hyperspace-core' pathtoPublish: '$(Build.ArtifactStagingDirectory)/hyperspace-core/' - - - job: Build_2_12 - displayName: 'Build sources and run unit tests for Scala 2.12' - pool: - vmImage: 'ubuntu-18.04' - steps: - script: sbt ++2.12.8 clean displayName: 'Running $sbt clean' - script: sbt ++2.12.8 update @@ -69,37 +63,3 @@ jobs: inputs: artifactName: 'hyperspace-core' pathtoPublish: '$(Build.ArtifactStagingDirectory)/hyperspace-core/' - - - job: PythonTest - displayName: 'Run Python tests' - pool: - vmImage: 'ubuntu-18.04' - steps: - - task: UsePythonVersion@0 - displayName: 'Set Python version' - inputs: - versionSpec: '2.7' - addToPath: true - - task: JavaToolInstaller@0 - displayName: 'Set Java version' - inputs: - versionSpec: '8' - jdkArchitectureOption: 'x64' - jdkSourceOption: 'PreInstalled' - - script: sbt ++2.11.12 clean - displayName: 'Running $sbt clean' - - script: sbt ++2.11.12 update - displayName: 'Running $sbt update' - - script: sbt ++2.11.12 compile - displayName: 'Running $sbt compile' - - task: Bash@3 - inputs: - filePath: 'script/download_spark.sh' - displayName: 'Downloading spark' - - task: PythonScript@0 - inputs: - scriptSource: 'filePath' - scriptPath: 'run-tests.py' - displayName: 'Running python tests' - env: - SPARK_HOME: $(Build.SourcesDirectory)/spark-2.4.2-bin-hadoop2.7 From 530dfa705aff26337c92a87fcc93f6dcf4f5fb15 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 10 Mar 2021 12:36:32 -0800 Subject: [PATCH 06/31] revert previous commit --- azure-pipelines.yml | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 14fc9a3de..65c99ff1e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -40,6 +40,12 @@ jobs: inputs: artifactName: 'hyperspace-core' pathtoPublish: '$(Build.ArtifactStagingDirectory)/hyperspace-core/' + + - job: Build_2_12 + displayName: 'Build sources and run unit tests for Scala 2.12' + pool: + vmImage: 'ubuntu-18.04' + steps: - script: sbt ++2.12.8 clean displayName: 'Running $sbt clean' - script: sbt ++2.12.8 update @@ -63,3 +69,37 @@ jobs: inputs: artifactName: 'hyperspace-core' pathtoPublish: '$(Build.ArtifactStagingDirectory)/hyperspace-core/' + + - job: PythonTest + displayName: 'Run Python tests' + pool: + vmImage: 'ubuntu-18.04' + steps: + - task: UsePythonVersion@0 + displayName: 'Set Python version' + inputs: + versionSpec: '2.7' + addToPath: true + - task: JavaToolInstaller@0 + displayName: 'Set Java version' + inputs: + versionSpec: '8' + jdkArchitectureOption: 'x64' + jdkSourceOption: 'PreInstalled' + - script: sbt ++2.11.12 clean + displayName: 'Running $sbt clean' + - script: sbt ++2.11.12 update + displayName: 'Running $sbt update' + - script: sbt ++2.11.12 compile + displayName: 'Running $sbt compile' + - task: Bash@3 + inputs: + filePath: 'script/download_spark.sh' + displayName: 'Downloading spark' + - task: PythonScript@0 + inputs: + scriptSource: 'filePath' + scriptPath: 'run-tests.py' + displayName: 'Running python tests' + env: + SPARK_HOME: $(Build.SourcesDirectory)/spark-2.4.2-bin-hadoop2.7 From add01f1a52639e78fc2ea325a9ac687c8a5098d8 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 10 Mar 2021 15:27:32 -0800 Subject: [PATCH 07/31] update plans --- .../approved-plans-v1_4/q1/explain.txt | 6 +- .../approved-plans-v1_4/q1/simplified.txt | 10 +- .../approved-plans-v1_4/q10/explain.txt | 2 +- .../approved-plans-v1_4/q10/simplified.txt | 14 +- .../approved-plans-v1_4/q11/explain.txt | 8 +- .../approved-plans-v1_4/q11/simplified.txt | 60 ++++---- .../approved-plans-v1_4/q12/explain.txt | 4 +- .../approved-plans-v1_4/q12/simplified.txt | 18 +-- .../approved-plans-v1_4/q13/simplified.txt | 20 +-- .../approved-plans-v1_4/q14a/explain.txt | 30 ++-- .../approved-plans-v1_4/q14a/simplified.txt | 56 +++---- .../approved-plans-v1_4/q14b/explain.txt | 54 +++---- .../approved-plans-v1_4/q14b/simplified.txt | 48 +++--- .../approved-plans-v1_4/q16/explain.txt | 2 +- .../approved-plans-v1_4/q16/simplified.txt | 16 +- .../approved-plans-v1_4/q17/explain.txt | 10 +- .../approved-plans-v1_4/q17/simplified.txt | 36 ++--- .../approved-plans-v1_4/q18/explain.txt | 2 +- .../approved-plans-v1_4/q18/simplified.txt | 28 ++-- .../approved-plans-v1_4/q19/explain.txt | 2 +- .../approved-plans-v1_4/q19/simplified.txt | 22 +-- .../approved-plans-v1_4/q2/explain.txt | 6 +- .../approved-plans-v1_4/q2/simplified.txt | 14 +- .../approved-plans-v1_4/q20/explain.txt | 4 +- .../approved-plans-v1_4/q20/simplified.txt | 18 +-- .../approved-plans-v1_4/q21/explain.txt | 2 +- .../approved-plans-v1_4/q21/simplified.txt | 4 +- .../approved-plans-v1_4/q22/explain.txt | 2 +- .../approved-plans-v1_4/q22/simplified.txt | 18 +-- .../approved-plans-v1_4/q23a/explain.txt | 10 +- .../approved-plans-v1_4/q23a/simplified.txt | 16 +- .../approved-plans-v1_4/q23b/explain.txt | 14 +- .../approved-plans-v1_4/q23b/simplified.txt | 24 +-- .../approved-plans-v1_4/q24a/explain.txt | 10 +- .../approved-plans-v1_4/q24a/simplified.txt | 54 +++---- .../approved-plans-v1_4/q24b/explain.txt | 10 +- .../approved-plans-v1_4/q24b/simplified.txt | 54 +++---- .../approved-plans-v1_4/q25/explain.txt | 14 +- .../approved-plans-v1_4/q25/simplified.txt | 38 ++--- .../approved-plans-v1_4/q26/explain.txt | 6 +- .../approved-plans-v1_4/q26/simplified.txt | 20 +-- .../approved-plans-v1_4/q27/explain.txt | 6 +- .../approved-plans-v1_4/q27/simplified.txt | 22 +-- .../approved-plans-v1_4/q28/explain.txt | 12 +- .../approved-plans-v1_4/q28/simplified.txt | 48 +++--- .../approved-plans-v1_4/q29/explain.txt | 10 +- .../approved-plans-v1_4/q29/simplified.txt | 36 ++--- .../approved-plans-v1_4/q3/explain.txt | 2 +- .../approved-plans-v1_4/q3/simplified.txt | 4 +- .../approved-plans-v1_4/q30/explain.txt | 6 +- .../approved-plans-v1_4/q30/simplified.txt | 20 +-- .../approved-plans-v1_4/q31/explain.txt | 14 +- .../approved-plans-v1_4/q31/simplified.txt | 34 ++--- .../approved-plans-v1_4/q32/explain.txt | 2 +- .../approved-plans-v1_4/q32/simplified.txt | 4 +- .../approved-plans-v1_4/q34/explain.txt | 4 +- .../approved-plans-v1_4/q34/simplified.txt | 10 +- .../approved-plans-v1_4/q35/explain.txt | 2 +- .../approved-plans-v1_4/q35/simplified.txt | 14 +- .../approved-plans-v1_4/q36/explain.txt | 4 +- .../approved-plans-v1_4/q36/simplified.txt | 14 +- .../approved-plans-v1_4/q37/explain.txt | 2 +- .../approved-plans-v1_4/q37/simplified.txt | 4 +- .../approved-plans-v1_4/q38/explain.txt | 6 +- .../approved-plans-v1_4/q38/simplified.txt | 4 +- .../approved-plans-v1_4/q39a/explain.txt | 6 +- .../approved-plans-v1_4/q39a/simplified.txt | 24 +-- .../approved-plans-v1_4/q39b/explain.txt | 6 +- .../approved-plans-v1_4/q39b/simplified.txt | 24 +-- .../approved-plans-v1_4/q4/explain.txt | 12 +- .../approved-plans-v1_4/q4/simplified.txt | 120 +++++++-------- .../approved-plans-v1_4/q40/explain.txt | 2 +- .../approved-plans-v1_4/q40/simplified.txt | 16 +- .../approved-plans-v1_4/q41/explain.txt | 4 +- .../approved-plans-v1_4/q41/simplified.txt | 6 +- .../approved-plans-v1_4/q42/explain.txt | 2 +- .../approved-plans-v1_4/q42/simplified.txt | 4 +- .../approved-plans-v1_4/q43/explain.txt | 2 +- .../approved-plans-v1_4/q43/simplified.txt | 6 +- .../approved-plans-v1_4/q44/explain.txt | 16 +- .../approved-plans-v1_4/q44/simplified.txt | 10 +- .../approved-plans-v1_4/q45/explain.txt | 2 +- .../approved-plans-v1_4/q45/simplified.txt | 2 +- .../approved-plans-v1_4/q46/explain.txt | 2 +- .../approved-plans-v1_4/q46/simplified.txt | 24 +-- .../approved-plans-v1_4/q47/explain.txt | 91 ++++++------ .../approved-plans-v1_4/q47/simplified.txt | 127 ++++++++-------- .../approved-plans-v1_4/q48/simplified.txt | 8 +- .../approved-plans-v1_4/q49/explain.txt | 90 +++++------ .../approved-plans-v1_4/q49/simplified.txt | 140 +++++++++--------- .../approved-plans-v1_4/q5/explain.txt | 8 +- .../approved-plans-v1_4/q5/simplified.txt | 36 ++--- .../approved-plans-v1_4/q50/explain.txt | 6 +- .../approved-plans-v1_4/q50/simplified.txt | 28 ++-- .../approved-plans-v1_4/q51/explain.txt | 14 +- .../approved-plans-v1_4/q51/simplified.txt | 8 +- .../approved-plans-v1_4/q52/explain.txt | 2 +- .../approved-plans-v1_4/q52/simplified.txt | 4 +- .../approved-plans-v1_4/q53/explain.txt | 4 +- .../approved-plans-v1_4/q53/simplified.txt | 4 +- .../approved-plans-v1_4/q54/explain.txt | 24 +-- .../approved-plans-v1_4/q54/simplified.txt | 2 +- .../approved-plans-v1_4/q55/explain.txt | 2 +- .../approved-plans-v1_4/q55/simplified.txt | 4 +- .../approved-plans-v1_4/q57/explain.txt | 14 +- .../approved-plans-v1_4/q57/simplified.txt | 50 +++---- .../approved-plans-v1_4/q58/explain.txt | 24 +-- .../approved-plans-v1_4/q58/simplified.txt | 4 +- .../approved-plans-v1_4/q59/explain.txt | 4 +- .../approved-plans-v1_4/q59/simplified.txt | 20 +-- .../approved-plans-v1_4/q6/explain.txt | 14 +- .../approved-plans-v1_4/q6/simplified.txt | 6 +- .../approved-plans-v1_4/q61/simplified.txt | 12 +- .../approved-plans-v1_4/q62/explain.txt | 2 +- .../approved-plans-v1_4/q62/simplified.txt | 18 +-- .../approved-plans-v1_4/q63/explain.txt | 4 +- .../approved-plans-v1_4/q63/simplified.txt | 4 +- .../approved-plans-v1_4/q64/explain.txt | 8 +- .../approved-plans-v1_4/q64/simplified.txt | 124 ++++++++-------- .../approved-plans-v1_4/q65/explain.txt | 6 +- .../approved-plans-v1_4/q65/simplified.txt | 16 +- .../approved-plans-v1_4/q66/explain.txt | 6 +- .../approved-plans-v1_4/q66/simplified.txt | 50 +++---- .../approved-plans-v1_4/q67/explain.txt | 4 +- .../approved-plans-v1_4/q67/simplified.txt | 26 ++-- .../approved-plans-v1_4/q68/explain.txt | 2 +- .../approved-plans-v1_4/q68/simplified.txt | 24 +-- .../approved-plans-v1_4/q69/explain.txt | 2 +- .../approved-plans-v1_4/q69/simplified.txt | 14 +- .../approved-plans-v1_4/q7/explain.txt | 6 +- .../approved-plans-v1_4/q7/simplified.txt | 20 +-- .../approved-plans-v1_4/q70/explain.txt | 6 +- .../approved-plans-v1_4/q70/simplified.txt | 8 +- .../approved-plans-v1_4/q71/explain.txt | 4 +- .../approved-plans-v1_4/q71/simplified.txt | 6 +- .../approved-plans-v1_4/q72/explain.txt | 6 +- .../approved-plans-v1_4/q72/simplified.txt | 34 ++--- .../approved-plans-v1_4/q73/explain.txt | 4 +- .../approved-plans-v1_4/q73/simplified.txt | 10 +- .../approved-plans-v1_4/q74/explain.txt | 8 +- .../approved-plans-v1_4/q74/simplified.txt | 36 ++--- .../approved-plans-v1_4/q75/explain.txt | 16 +- .../approved-plans-v1_4/q75/simplified.txt | 118 +++++++-------- .../approved-plans-v1_4/q76/explain.txt | 2 +- .../approved-plans-v1_4/q76/simplified.txt | 14 +- .../approved-plans-v1_4/q77/explain.txt | 12 +- .../approved-plans-v1_4/q77/simplified.txt | 38 ++--- .../approved-plans-v1_4/q78/explain.txt | 8 +- .../approved-plans-v1_4/q78/simplified.txt | 46 +++--- .../approved-plans-v1_4/q79/explain.txt | 2 +- .../approved-plans-v1_4/q79/simplified.txt | 18 +-- .../approved-plans-v1_4/q80/explain.txt | 8 +- .../approved-plans-v1_4/q80/simplified.txt | 62 ++++---- .../approved-plans-v1_4/q81/explain.txt | 6 +- .../approved-plans-v1_4/q81/simplified.txt | 24 +-- .../approved-plans-v1_4/q82/explain.txt | 2 +- .../approved-plans-v1_4/q82/simplified.txt | 4 +- .../approved-plans-v1_4/q83/explain.txt | 6 +- .../approved-plans-v1_4/q83/simplified.txt | 2 +- .../approved-plans-v1_4/q84/simplified.txt | 10 +- .../approved-plans-v1_4/q85/explain.txt | 10 +- .../approved-plans-v1_4/q85/simplified.txt | 34 ++--- .../approved-plans-v1_4/q86/explain.txt | 4 +- .../approved-plans-v1_4/q86/simplified.txt | 8 +- .../approved-plans-v1_4/q87/explain.txt | 6 +- .../approved-plans-v1_4/q87/simplified.txt | 4 +- .../approved-plans-v1_4/q89/explain.txt | 4 +- .../approved-plans-v1_4/q89/simplified.txt | 18 +-- .../approved-plans-v1_4/q9/explain.txt | 32 ++-- .../approved-plans-v1_4/q9/simplified.txt | 20 +-- .../approved-plans-v1_4/q91/explain.txt | 4 +- .../approved-plans-v1_4/q91/simplified.txt | 18 +-- .../approved-plans-v1_4/q92/explain.txt | 2 +- .../approved-plans-v1_4/q92/simplified.txt | 4 +- .../approved-plans-v1_4/q93/explain.txt | 2 +- .../approved-plans-v1_4/q93/simplified.txt | 4 +- .../approved-plans-v1_4/q94/explain.txt | 2 +- .../approved-plans-v1_4/q94/simplified.txt | 16 +- .../approved-plans-v1_4/q95/explain.txt | 2 +- .../approved-plans-v1_4/q95/simplified.txt | 14 +- .../approved-plans-v1_4/q97/explain.txt | 8 +- .../approved-plans-v1_4/q97/simplified.txt | 4 +- .../approved-plans-v1_4/q98/explain.txt | 6 +- .../approved-plans-v1_4/q98/simplified.txt | 22 +-- .../approved-plans-v1_4/q99/explain.txt | 2 +- .../approved-plans-v1_4/q99/simplified.txt | 18 +-- 186 files changed, 1621 insertions(+), 1613 deletions(-) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt index 97885fe7f..1e2caffbe 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt @@ -8,7 +8,7 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], outp : : +- *(9) BroadcastHashJoin [ctr_store_sk#4], [ctr_store_sk#4#6], Inner, BuildRight, (cast(ctr_total_return#7 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#8) : : :- *(9) Filter isnotnull(ctr_total_return#7) : : : +- *(9) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[sum(UnscaledValue(sr_return_amt#11))]) - : : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 200) + : : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 5) : : : +- *(2) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[partial_sum(UnscaledValue(sr_return_amt#11))]) : : : +- *(2) Project [sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] : : : +- *(2) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight @@ -22,10 +22,10 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], outp : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true])) : : +- *(6) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#8) : : +- *(6) HashAggregate(keys=[ctr_store_sk#4], functions=[avg(ctr_total_return#7)]) - : : +- Exchange hashpartitioning(ctr_store_sk#4, 200) + : : +- Exchange hashpartitioning(ctr_store_sk#4, 5) : : +- *(5) HashAggregate(keys=[ctr_store_sk#4], functions=[partial_avg(ctr_total_return#7)]) : : +- *(5) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[sum(UnscaledValue(sr_return_amt#11))]) - : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 200) + : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 5) : : +- *(4) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[partial_sum(UnscaledValue(sr_return_amt#11))]) : : +- *(4) Project [sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] : : +- *(4) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt index 6104e2ac9..a27c58fe2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt @@ -7,11 +7,11 @@ TakeOrderedAndProject [c_customer_id] Project [ctr_customer_sk,ctr_store_sk] BroadcastHashJoin [ctr_store_sk,ctr_store_skL,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] Filter [ctr_total_return] - HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_customer_sk,ctr_total_return,ctr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] + HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_customer_sk,ctr_total_return,sum,ctr_store_sk,sum(UnscaledValue(sr_return_amt))] InputAdapter Exchange [sr_customer_sk,sr_store_sk] #1 WholeStageCodegen - HashAggregate [sr_customer_sk,sum,sr_return_amt,sum,sr_store_sk] [sum,sum] + HashAggregate [sum,sr_return_amt,sr_customer_sk,sr_store_sk,sum] [sum,sum] Project [sr_customer_sk,sr_store_sk,sr_return_amt] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] Project [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] @@ -27,16 +27,16 @@ TakeOrderedAndProject [c_customer_id] BroadcastExchange #3 WholeStageCodegen Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [ctr_store_sk,sum,count,avg(ctr_total_return)] [avg(ctr_total_return),ctr_store_skL,sum,count,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [ctr_store_sk,sum,count,avg(ctr_total_return)] [sum,ctr_store_skL,avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),count] InputAdapter Exchange [ctr_store_sk] #4 WholeStageCodegen - HashAggregate [ctr_store_sk,ctr_total_return,sum,count,sum,count] [sum,count,sum,count] + HashAggregate [sum,ctr_total_return,ctr_store_sk,count,sum,count] [sum,count,sum,count] HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [sum(UnscaledValue(sr_return_amt)),ctr_store_sk,ctr_total_return,sum] InputAdapter Exchange [sr_customer_sk,sr_store_sk] #5 WholeStageCodegen - HashAggregate [sum,sr_customer_sk,sum,sr_return_amt,sr_store_sk] [sum,sum] + HashAggregate [sr_return_amt,sr_customer_sk,sum,sr_store_sk,sum] [sum,sum] Project [sr_customer_sk,sr_store_sk,sr_return_amt] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] Project [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt index a108b1a61..678ed7254 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST,cd_dep_count#6 ASC NULLS FIRST,cd_dep_employed_count#7 ASC NULLS FIRST,cd_dep_college_count#8 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#9,cd_purchase_estimate#4,cnt2#10,cd_credit_rating#5,cnt3#11,cd_dep_count#6,cnt4#12,cd_dep_employed_count#7,cnt5#13,cd_dep_college_count#8,cnt6#14]) +- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[count(1)]) - +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8, 200) + +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8, 5) +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[partial_count(1)]) +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] +- *(9) BroadcastHashJoin [c_current_cdemo_sk#15], [cd_demo_sk#16], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt index 876a5eab0..78aec4ebf 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt @@ -1,11 +1,11 @@ -TakeOrderedAndProject [cnt5,cnt6,cnt1,cnt4,cd_purchase_estimate,cnt2,cnt3,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] +TakeOrderedAndProject [cnt5,cd_dep_employed_count,cd_gender,cd_dep_college_count,cd_education_status,cd_dep_count,cnt6,cd_credit_rating,cnt4,cd_purchase_estimate,cd_marital_status,cnt2,cnt1,cnt3] WholeStageCodegen - HashAggregate [count,count(1),cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] [cnt5,cnt6,cnt1,count,cnt4,count(1),cnt2,cnt3] + HashAggregate [cd_dep_employed_count,cd_gender,cd_dep_college_count,cd_education_status,cd_dep_count,cd_credit_rating,cd_purchase_estimate,cd_marital_status,count,count(1)] [cnt5,cnt6,cnt4,count,count(1),cnt2,cnt1,cnt3] InputAdapter - Exchange [cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] #1 + Exchange [cd_dep_employed_count,cd_gender,cd_dep_college_count,cd_education_status,cd_dep_count,cd_credit_rating,cd_purchase_estimate,cd_marital_status] #1 WholeStageCodegen - HashAggregate [count,cd_purchase_estimate,count,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] [count,count] - Project [cd_dep_college_count,cd_dep_employed_count,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + HashAggregate [count,cd_dep_employed_count,cd_gender,cd_dep_college_count,cd_education_status,cd_dep_count,cd_credit_rating,cd_purchase_estimate,cd_marital_status,count] [count,count] + Project [cd_gender,cd_dep_count,cd_education_status,cd_dep_college_count,cd_marital_status,cd_credit_rating,cd_purchase_estimate,cd_dep_employed_count] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] @@ -60,6 +60,6 @@ TakeOrderedAndProject [cnt5,cnt6,cnt1,cnt4,cd_purchase_estimate,cnt2,cnt3,cd_edu InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + Project [cd_gender,cd_dep_count,cd_education_status,cd_dep_college_count,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate,cd_dep_employed_count] Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + Scan parquet default.customer_demographics [cd_gender,cd_dep_count,cd_education_status,cd_dep_college_count,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate,cd_dep_employed_count] [cd_gender,cd_dep_count,cd_education_status,cd_dep_college_count,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate,cd_dep_employed_count] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt index f8275bad5..480f583e0 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt @@ -9,7 +9,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_preferred_cust_flag#1 ASC NUL : : :- Union : : : :- *(4) Filter (isnotnull(year_total#6) && (year_total#6 > 0.00)) : : : : +- *(4) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) - : : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 200) + : : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 5) : : : : +- *(3) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) : : : : +- *(3) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight @@ -30,7 +30,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_preferred_cust_flag#1 ASC NUL : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : : +- Union : : :- *(8) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) - : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 200) + : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 5) : : : +- *(7) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) : : : +- *(7) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight @@ -50,7 +50,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_preferred_cust_flag#1 ASC NUL : :- LocalTableScan , [customer_id#8, year_total#4] : +- *(12) Filter (isnotnull(year_total#25) && (year_total#25 > 0.00)) : +- *(12) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) - : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 200) + : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 5) : +- *(11) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) : +- *(11) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] : +- *(11) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight @@ -68,7 +68,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_preferred_cust_flag#1 ASC NUL +- Union :- LocalTableScan , [customer_id#3, year_total#5] +- *(16) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) - +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 200) + +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 5) +- *(15) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) +- *(15) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] +- *(15) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt index 75734357c..dd6939bc5 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt @@ -1,8 +1,8 @@ TakeOrderedAndProject [customer_preferred_cust_flag] WholeStageCodegen Project [customer_preferred_cust_flag] - BroadcastHashJoin [year_total,year_total,customer_id,year_total,year_total,customer_id] - Project [customer_preferred_cust_flag,year_total,year_total,customer_id,year_total] + BroadcastHashJoin [year_total,customer_id,year_total,customer_id,year_total,year_total] + Project [year_total,year_total,customer_id,year_total,customer_preferred_cust_flag] BroadcastHashJoin [customer_id,customer_id] Project [customer_id,year_total,customer_preferred_cust_flag,year_total] BroadcastHashJoin [customer_id,customer_id] @@ -10,18 +10,18 @@ TakeOrderedAndProject [customer_preferred_cust_flag] Union WholeStageCodegen Filter [year_total] - HashAggregate [d_year,sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_first_name,c_email_address] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] + HashAggregate [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_email_address,sum,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #1 + Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #1 WholeStageCodegen - HashAggregate [d_year,sum,ss_ext_discount_amt,c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,c_last_name,c_first_name,c_birth_country,ss_ext_list_price] + HashAggregate [c_email_address,sum,ss_ext_list_price,c_login,c_last_name,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name,ss_ext_discount_amt] [sum,sum] + Project [d_year,c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_discount_amt,ss_ext_list_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price] + Project [c_customer_id,c_birth_country,c_login,ss_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_discount_amt,ss_ext_list_price] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] InputAdapter BroadcastExchange #2 WholeStageCodegen @@ -39,18 +39,18 @@ TakeOrderedAndProject [customer_preferred_cust_flag] BroadcastExchange #4 Union WholeStageCodegen - HashAggregate [d_year,sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_first_name,c_email_address] [customer_preferred_cust_flag,sum,year_total,customer_id,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2))))] + HashAggregate [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_email_address,c_login,c_last_name,sum,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),year_total,sum,customer_id,customer_preferred_cust_flag] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #5 + Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #5 WholeStageCodegen - HashAggregate [d_year,sum,ss_ext_discount_amt,c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,c_last_name,c_first_name,c_birth_country,ss_ext_list_price] + HashAggregate [c_email_address,ss_ext_list_price,c_login,c_last_name,sum,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name,ss_ext_discount_amt] [sum,sum] + Project [d_year,c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_discount_amt,ss_ext_list_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price] + Project [c_customer_id,c_birth_country,c_login,ss_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_discount_amt,ss_ext_list_price] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] InputAdapter ReusedExchange [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] #2 InputAdapter @@ -66,18 +66,18 @@ TakeOrderedAndProject [customer_preferred_cust_flag] LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [d_year,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] + HashAggregate [c_email_address,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_login,c_last_name,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #8 + Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #8 WholeStageCodegen - HashAggregate [d_year,ws_ext_discount_amt,sum,c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country] + HashAggregate [sum,c_email_address,ws_ext_list_price,c_login,c_last_name,ws_ext_discount_amt,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name] [sum,sum] + Project [d_year,c_customer_id,c_birth_country,c_login,ws_ext_list_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_discount_amt] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,ws_ext_list_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_discount_amt,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] InputAdapter BroadcastExchange #9 WholeStageCodegen @@ -91,18 +91,18 @@ TakeOrderedAndProject [customer_preferred_cust_flag] Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [d_year,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] + HashAggregate [c_email_address,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_login,c_last_name,sum,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #11 + Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #11 WholeStageCodegen - HashAggregate [d_year,ws_ext_discount_amt,sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address,sum] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country] + HashAggregate [c_email_address,ws_ext_list_price,c_login,c_last_name,sum,ws_ext_discount_amt,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name,sum] [sum,sum] + Project [d_year,c_customer_id,c_birth_country,c_login,ws_ext_list_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_discount_amt] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,ws_ext_list_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_discount_amt,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] InputAdapter ReusedExchange [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] #9 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt index 4008850ff..ed4ddfefe 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt @@ -3,9 +3,9 @@ TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 +- *(6) Project [i_item_desc#4, i_category#1, i_class#2, i_current_price#6, itemrevenue#7, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#5, i_item_id#3] +- Window [sum(_w1#10) windowspecdefinition(i_class#2, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#2] +- *(5) Sort [i_class#2 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_class#2, 200) + +- Exchange hashpartitioning(i_class#2, 5) +- *(4) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[sum(UnscaledValue(ws_ext_sales_price#11))]) - +- Exchange hashpartitioning(i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6, 200) + +- Exchange hashpartitioning(i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6, 5) +- *(3) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#11))]) +- *(3) Project [ws_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt index 497db4cde..6bd7cd99e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,i_category,revenueratio,i_current_price,i_class] +TakeOrderedAndProject [i_current_price,revenueratio,i_item_desc,i_item_id,i_category,itemrevenue,i_class] WholeStageCodegen - Project [i_item_id,i_item_desc,itemrevenue,_we0,i_category,i_current_price,i_class,_w0] + Project [i_current_price,_we0,_w0,i_item_desc,i_item_id,i_category,itemrevenue,i_class] InputAdapter Window [_w1,i_class] WholeStageCodegen @@ -8,14 +8,14 @@ TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,i_category,revenueratio InputAdapter Exchange [i_class] #1 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,i_category,sum,i_current_price,sum(UnscaledValue(ws_ext_sales_price)),i_class] [itemrevenue,_w1,sum,sum(UnscaledValue(ws_ext_sales_price)),_w0] + HashAggregate [i_current_price,sum,i_item_desc,sum(UnscaledValue(ws_ext_sales_price)),i_item_id,i_category,i_class] [_w0,sum,_w1,sum(UnscaledValue(ws_ext_sales_price)),itemrevenue] InputAdapter - Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #2 + Exchange [i_current_price,i_item_desc,i_item_id,i_category,i_class] #2 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,ws_ext_sales_price,i_category,sum,i_current_price,i_class,sum] [sum,sum] - Project [i_class,i_current_price,ws_ext_sales_price,i_category,i_item_desc,i_item_id] + HashAggregate [i_current_price,sum,sum,i_item_desc,ws_ext_sales_price,i_item_id,i_category,i_class] [sum,sum] + Project [i_current_price,i_category,i_item_id,ws_ext_sales_price,i_class,i_item_desc] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [i_class,i_current_price,ws_ext_sales_price,ws_sold_date_sk,i_category,i_item_desc,i_item_id] + Project [i_current_price,i_category,i_item_id,ws_ext_sales_price,i_class,i_item_desc,ws_sold_date_sk] BroadcastHashJoin [ws_item_sk,i_item_sk] Project [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] Filter [ws_item_sk,ws_sold_date_sk] @@ -23,9 +23,9 @@ TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,i_category,revenueratio InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Project [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] Filter [i_category,i_item_sk] - Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Scan parquet default.item [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt index 0b02236c0..a5dc71c58 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt @@ -1,22 +1,22 @@ WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum,avg(cast(ss_quantity as bigint)),count,sum,avg(UnscaledValue(ss_ext_sales_price)),count,avg(UnscaledValue(ss_ext_wholesale_cost)),count] [sum,avg(ss_ext_sales_price),sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum,sum(ss_ext_wholesale_cost),avg(ss_ext_wholesale_cost),avg(cast(ss_quantity as bigint)),avg(ss_quantity),count,sum,avg(UnscaledValue(ss_ext_sales_price)),count,avg(UnscaledValue(ss_ext_wholesale_cost)),count] + HashAggregate [avg(UnscaledValue(ss_ext_wholesale_cost)),count,sum,sum,avg(cast(ss_quantity as bigint)),sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),avg(UnscaledValue(ss_ext_sales_price)),count,count] [avg(UnscaledValue(ss_ext_wholesale_cost)),count,sum,sum(ss_ext_wholesale_cost),sum,avg(cast(ss_quantity as bigint)),avg(ss_ext_sales_price),sum,sum,avg(ss_ext_wholesale_cost),avg(ss_quantity),sum(UnscaledValue(ss_ext_wholesale_cost)),avg(UnscaledValue(ss_ext_sales_price)),count,count] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [ss_ext_wholesale_cost,count,sum,sum,count,sum,count,sum,sum,count,sum,sum,sum,ss_ext_sales_price,count,ss_quantity,count] [count,sum,sum,count,sum,count,sum,sum,count,sum,sum,sum,count,count] + HashAggregate [sum,count,ss_quantity,count,sum,sum,count,ss_ext_sales_price,sum,sum,sum,sum,ss_ext_wholesale_cost,count,count,count,sum] [sum,count,count,sum,sum,count,sum,sum,sum,sum,count,count,count,sum] Project [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] - BroadcastHashJoin [hd_dep_count,ss_hdemo_sk,ss_sales_price,cd_education_status,hd_demo_sk,cd_marital_status] - Project [ss_quantity,cd_marital_status,ss_ext_sales_price,ss_sales_price,ss_hdemo_sk,cd_education_status,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_hdemo_sk,cd_education_status,hd_demo_sk,hd_dep_count,cd_marital_status,ss_sales_price] + Project [ss_sales_price,ss_hdemo_sk,cd_education_status,ss_quantity,cd_marital_status,ss_ext_sales_price,ss_ext_wholesale_cost] BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [ss_quantity,ss_ext_sales_price,ss_sales_price,ss_cdemo_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + Project [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_ext_sales_price,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + Project [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_quantity,ss_sold_date_sk,ss_ext_sales_price,ss_ext_wholesale_cost] BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] - Project [ss_addr_sk,ss_quantity,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + Project [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit,ss_ext_wholesale_cost] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] - Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + Project [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_ext_wholesale_cost] + Filter [ss_hdemo_sk,ss_store_sk,ss_sold_date_sk,ss_addr_sk,ss_cdemo_sk] + Scan parquet default.store_sales [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_ext_wholesale_cost] [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_ext_wholesale_cost] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt index 27a206912..cb9881765 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt @@ -1,13 +1,13 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sum(sales)#5,sum(number_sales)#6]) +- *(80) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[sum(sales#8), sum(number_sales#9)]) - +- Exchange hashpartitioning(channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7, 200) + +- Exchange hashpartitioning(channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7, 5) +- *(79) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[partial_sum(sales#8), partial_sum(number_sales#9)]) +- *(79) Expand [List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13, 0), List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, null, 1), List(sales#8, number_sales#9, channel#10, i_brand_id#11, null, null, 3), List(sales#8, number_sales#9, channel#10, null, null, null, 7), List(sales#8, number_sales#9, null, null, null, null, 15)], [sales#8, number_sales#9, channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7] +- Union :- *(26) Project [sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(26) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16 as decimal(32,6)) > cast(Subquery subquery1662 as decimal(32,6)))) - : : +- Subquery subquery1662 + : +- *(26) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16 as decimal(32,6)) > cast(Subquery subquery3902 as decimal(32,6)))) + : : +- Subquery subquery3902 : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) : : +- Exchange SinglePartition : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) @@ -34,7 +34,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(26) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 200) + : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) : +- *(25) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) : +- *(25) Project [ss_quantity#14, ss_list_price#15, i_brand_id#11, i_class_id#12, i_category_id#13] : +- *(25) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight @@ -48,14 +48,14 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : +- *(11) Project [i_item_sk#33 AS ss_item_sk#34] : : : +- *(11) BroadcastHashJoin [i_brand_id#11, i_class_id#12, i_category_id#13], [brand_id#35, class_id#36, category_id#37], Inner, BuildRight : : : :- *(11) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : +- *(11) Filter ((isnotnull(i_class_id#12) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) - : : : : +- *(11) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : +- *(11) Filter ((isnotnull(i_category_id#13) && isnotnull(i_brand_id#11)) && isnotnull(i_class_id#12)) + : : : : +- *(11) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) : : : :- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) - : : : : +- Exchange hashpartitioning(brand_id#35, class_id#36, category_id#37, 200) + : : : : +- Exchange hashpartitioning(brand_id#35, class_id#36, category_id#37, 5) : : : : +- *(6) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) : : : : :- *(6) Project [i_brand_id#11 AS brand_id#35, i_class_id#12 AS class_id#36, i_category_id#13 AS category_id#37] @@ -67,8 +67,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : +- *(1) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#33) && isnotnull(i_class_id#12)) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) - : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#33) && isnotnull(i_category_id#13)) && isnotnull(i_brand_id#11)) && isnotnull(i_class_id#12)) + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(2) Project [d_date_sk#20] : : : : : +- *(2) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) @@ -107,8 +107,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : +- *(24) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#40)) && (d_year#21 = 2001)) && (d_moy#40 = 11)) && isnotnull(d_date_sk#20)) : +- *(24) FileScan parquet default.date_dim[d_date_sk#20,d_year#21,d_moy#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct :- *(52) Project [sales#41, number_sales#42, channel#43, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44) && (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44 as decimal(32,6)) > cast(Subquery subquery1667 as decimal(32,6)))) - : : +- Subquery subquery1667 + : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44) && (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44 as decimal(32,6)) > cast(Subquery subquery3907 as decimal(32,6)))) + : : +- Subquery subquery3907 : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) : : +- Exchange SinglePartition : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) @@ -135,7 +135,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(52) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 200) + : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) : +- *(51) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) : +- *(51) Project [cs_quantity#22, cs_list_price#24, i_brand_id#11, i_class_id#12, i_category_id#13] : +- *(51) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight @@ -149,8 +149,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(78) Project [sales#45, number_sales#46, channel#47, i_brand_id#11, i_class_id#12, i_category_id#13] - +- *(78) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48 as decimal(32,6)) > cast(Subquery subquery1672 as decimal(32,6)))) - : +- Subquery subquery1672 + +- *(78) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48 as decimal(32,6)) > cast(Subquery subquery3912 as decimal(32,6)))) + : +- Subquery subquery3912 : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) : +- Exchange SinglePartition : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) @@ -177,7 +177,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(78) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 200) + +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) +- *(77) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) +- *(77) Project [ws_quantity#27, ws_list_price#29, i_brand_id#11, i_class_id#12, i_category_id#13] +- *(77) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt index 93bae0792..a35247783 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt @@ -1,15 +1,15 @@ -TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_category_id,i_class_id] +TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_brand_id,i_class_id] WholeStageCodegen - HashAggregate [sum(sales),spark_grouping_id,i_brand_id,sum,sum,channel,i_category_id,i_class_id,sum(number_salesL)] [sum(sales),sum(sales),sum(number_sales),sum,sum,sum(number_salesL)] + HashAggregate [i_category_id,channel,sum,sum(number_salesL),sum(sales),i_brand_id,i_class_id,spark_grouping_id,sum] [sum(sales),sum(number_sales),sum,sum(number_salesL),sum(sales),sum] InputAdapter - Exchange [spark_grouping_id,i_brand_id,channel,i_category_id,i_class_id] #1 + Exchange [i_category_id,channel,i_brand_id,i_class_id,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [sales,spark_grouping_id,sum,i_brand_id,sum,number_sales,sum,sum,channel,i_category_id,i_class_id] [sum,sum,sum,sum] - Expand [channel,sales,i_category_id,number_sales,i_class_id,i_brand_id] + HashAggregate [sum,sum,i_category_id,channel,sum,sales,number_sales,i_brand_id,i_class_id,spark_grouping_id,sum] [sum,sum,sum,sum] + Expand [sales,i_brand_id,number_sales,channel,i_category_id,i_class_id] InputAdapter Union WholeStageCodegen - Project [channel,sales,i_category_id,number_sales,i_class_id,i_brand_id] + Project [sales,i_brand_id,number_sales,channel,i_category_id,i_class_id] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #1 WholeStageCodegen @@ -17,7 +17,7 @@ TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_categor InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + HashAggregate [sum,list_price,sum,quantity,count,count] [sum,count,sum,count] InputAdapter Union WholeStageCodegen @@ -48,14 +48,14 @@ TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_categor Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,i_category_id,i_brand_id,sum,i_class_id] [sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,number_sales,channel,sum] + HashAggregate [i_brand_id,count(1),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count,sum,i_class_id,i_category_id] [count(1),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),channel,sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count,sum,number_sales] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #2 WholeStageCodegen - HashAggregate [count,count,sum,ss_list_price,i_category_id,i_brand_id,ss_quantity,sum,i_class_id] [sum,count,sum,count] - Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,i_class_id] + HashAggregate [i_brand_id,ss_quantity,count,count,sum,i_class_id,ss_list_price,i_category_id,sum] [sum,count,sum,count] + Project [ss_list_price,i_class_id,i_category_id,i_brand_id,ss_quantity] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,ss_sold_date_sk,i_class_id] + Project [ss_list_price,i_class_id,i_category_id,i_brand_id,ss_quantity,ss_sold_date_sk] BroadcastHashJoin [ss_item_sk,i_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] @@ -65,22 +65,22 @@ TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_categor BroadcastExchange #3 WholeStageCodegen Project [i_item_sk] - BroadcastHashJoin [category_id,class_id,i_category_id,i_brand_id,i_class_id,brand_id] + BroadcastHashJoin [i_brand_id,category_id,class_id,i_class_id,brand_id,i_category_id] Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_class_id,i_brand_id,i_category_id] + Filter [i_category_id,i_brand_id,i_class_id] Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #4 WholeStageCodegen HashAggregate [brand_id,class_id,category_id] HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [class_id,i_class_id,i_category_id,brand_id,i_brand_id,category_id] + BroadcastHashJoin [i_category_id,class_id,brand_id,i_brand_id,i_class_id,category_id] HashAggregate [brand_id,class_id,category_id] InputAdapter Exchange [brand_id,class_id,category_id] #5 WholeStageCodegen HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,class_id,brand_id,category_id] + BroadcastHashJoin [class_id,i_category_id,i_class_id,brand_id,category_id,i_brand_id] Project [i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] @@ -92,7 +92,7 @@ TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_categor BroadcastExchange #6 WholeStageCodegen Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_item_sk,i_class_id,i_brand_id,i_category_id] + Filter [i_item_sk,i_category_id,i_brand_id,i_class_id] Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #7 @@ -148,7 +148,7 @@ TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_categor Filter [d_year,d_moy,d_date_sk] Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] WholeStageCodegen - Project [sales,channel,i_category_id,i_brand_id,number_sales,i_class_id] + Project [i_brand_id,number_sales,i_class_id,channel,i_category_id,sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #2 WholeStageCodegen @@ -156,7 +156,7 @@ TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_categor InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + HashAggregate [sum,list_price,sum,quantity,count,count] [sum,count,sum,count] InputAdapter Union WholeStageCodegen @@ -187,14 +187,14 @@ TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_categor Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum,i_category_id,i_brand_id,count,count(1),i_class_id] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sales,channel,sum,number_sales,count,count(1)] + HashAggregate [i_brand_id,count(1),sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count,i_class_id,i_category_id] [count(1),number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count,channel,sales] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #15 WholeStageCodegen - HashAggregate [sum,cs_list_price,sum,i_category_id,count,i_brand_id,cs_quantity,count,i_class_id] [sum,count,sum,count] - Project [cs_quantity,i_category_id,i_brand_id,i_class_id,cs_list_price] + HashAggregate [cs_quantity,i_brand_id,sum,sum,cs_list_price,count,i_class_id,count,i_category_id] [sum,count,sum,count] + Project [i_class_id,cs_quantity,i_category_id,cs_list_price,i_brand_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,i_category_id,cs_sold_date_sk,i_brand_id,i_class_id,cs_list_price] + Project [i_class_id,cs_quantity,i_category_id,cs_list_price,i_brand_id,cs_sold_date_sk] BroadcastHashJoin [cs_item_sk,i_item_sk] BroadcastHashJoin [cs_item_sk,ss_item_sk] Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] @@ -207,7 +207,7 @@ TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_categor InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #12 WholeStageCodegen - Project [number_sales,i_category_id,i_brand_id,i_class_id,sales,channel] + Project [i_brand_id,sales,i_class_id,i_category_id,number_sales,channel] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #3 WholeStageCodegen @@ -215,7 +215,7 @@ TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_categor InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + HashAggregate [sum,list_price,sum,quantity,count,count] [sum,count,sum,count] InputAdapter Union WholeStageCodegen @@ -246,14 +246,14 @@ TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_categor Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [count(1),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,i_category_id,i_brand_id,i_class_id] [count(1),number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,sales,channel] + HashAggregate [i_brand_id,sum,count,count(1),i_class_id,i_category_id,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,sales,count(1),number_sales,channel,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #16 WholeStageCodegen - HashAggregate [count,sum,sum,count,i_category_id,i_brand_id,i_class_id,ws_list_price,ws_quantity] [sum,count,sum,count] - Project [ws_quantity,i_category_id,ws_list_price,i_brand_id,i_class_id] + HashAggregate [i_brand_id,sum,count,count,sum,ws_quantity,i_class_id,i_category_id,ws_list_price] [sum,count,sum,count] + Project [i_class_id,i_category_id,i_brand_id,ws_list_price,ws_quantity] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_quantity,i_category_id,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id] + Project [i_class_id,i_category_id,i_brand_id,ws_list_price,ws_quantity,ws_sold_date_sk] BroadcastHashJoin [ws_item_sk,i_item_sk] BroadcastHashJoin [ws_item_sk,ss_item_sk] Project [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt index b3130331d..25a3c636b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt @@ -2,8 +2,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sales#5,number_sales#6,channel#7,i_brand_id#8,i_class_id#9,i_category_id#10,sales#11,number_sales#12]) +- *(52) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [i_brand_id#8, i_class_id#9, i_category_id#10], Inner, BuildRight :- *(52) Project [channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, sales#5, number_sales#6] - : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15 as decimal(32,6)) > cast(Subquery subquery1884 as decimal(32,6)))) - : : +- Subquery subquery1884 + : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15 as decimal(32,6)) > cast(Subquery subquery4124 as decimal(32,6)))) + : : +- Subquery subquery4124 : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) : : +- Exchange SinglePartition : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) @@ -30,7 +30,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(52) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - : +- Exchange hashpartitioning(i_brand_id#2, i_class_id#3, i_category_id#4, 200) + : +- Exchange hashpartitioning(i_brand_id#2, i_class_id#3, i_category_id#4, 5) : +- *(25) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) : +- *(25) Project [ss_quantity#13, ss_list_price#14, i_brand_id#2, i_class_id#3, i_category_id#4] : +- *(25) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight @@ -44,14 +44,14 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : +- *(11) Project [i_item_sk#32 AS ss_item_sk#33] : : : +- *(11) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [brand_id#34, class_id#35, category_id#36], Inner, BuildRight : : : :- *(11) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : +- *(11) Filter ((isnotnull(i_class_id#3) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) - : : : : +- *(11) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : +- *(11) Filter ((isnotnull(i_category_id#4) && isnotnull(i_brand_id#2)) && isnotnull(i_class_id#3)) + : : : : +- *(11) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) : : : :- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) - : : : : +- Exchange hashpartitioning(brand_id#34, class_id#35, category_id#36, 200) + : : : : +- Exchange hashpartitioning(brand_id#34, class_id#35, category_id#36, 5) : : : : +- *(6) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) : : : : :- *(6) Project [i_brand_id#2 AS brand_id#34, i_class_id#3 AS class_id#35, i_category_id#4 AS category_id#36] @@ -63,8 +63,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : +- *(1) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) - : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_category_id#4)) && isnotnull(i_brand_id#2)) && isnotnull(i_class_id#3)) + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(2) Project [d_date_sk#19] : : : : : +- *(2) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) @@ -95,25 +95,25 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(23) BroadcastHashJoin [i_item_sk#32], [ss_item_sk#33], LeftSemi, BuildRight : : :- *(23) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : +- *(23) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_category_id#4)) && isnotnull(i_brand_id#2)) - : : : +- *(23) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)], ReadSchema: struct + : : : +- *(23) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_category_id#4)) && isnotnull(i_class_id#3)) && isnotnull(i_brand_id#2)) + : : : +- *(23) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_class_id), IsNotNull(i_brand_id)], ReadSchema: struct : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(24) Project [d_date_sk#19] - : +- *(24) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1883)) && isnotnull(d_date_sk#19)) - : : +- Subquery subquery1883 + : +- *(24) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery4123)) && isnotnull(d_date_sk#19)) + : : +- Subquery subquery4123 : : +- *(1) Project [d_week_seq#39] - : : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + : : +- *(1) Filter (((((isnotnull(d_dom#40) && isnotnull(d_year#20)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct : +- *(24) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct - : +- Subquery subquery1883 + : +- Subquery subquery4123 : +- *(1) Project [d_week_seq#39] - : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + : +- *(1) Filter (((((isnotnull(d_dom#40) && isnotnull(d_year#20)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true])) +- *(51) Project [channel#7, i_brand_id#8, i_class_id#9, i_category_id#10, sales#11, number_sales#12] - +- *(51) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42 as decimal(32,6)) > cast(Subquery subquery1890 as decimal(32,6)))) - : +- Subquery subquery1890 + +- *(51) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42 as decimal(32,6)) > cast(Subquery subquery4130 as decimal(32,6)))) + : +- Subquery subquery4130 : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) : +- Exchange SinglePartition : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) @@ -140,7 +140,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(51) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - +- Exchange hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, 200) + +- Exchange hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, 5) +- *(50) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) +- *(50) Project [ss_quantity#13, ss_list_price#14, i_brand_id#8, i_class_id#9, i_category_id#10] +- *(50) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight @@ -154,13 +154,13 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : +- ReusedExchange [i_item_sk#43, i_brand_id#8, i_class_id#9, i_category_id#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(49) Project [d_date_sk#19] - +- *(49) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1889)) && isnotnull(d_date_sk#19)) - : +- Subquery subquery1889 + +- *(49) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery4129)) && isnotnull(d_date_sk#19)) + : +- Subquery subquery4129 : +- *(1) Project [d_week_seq#39] - : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct + : +- *(1) Filter (((((isnotnull(d_dom#40) && isnotnull(d_year#20)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct +- *(49) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct - +- Subquery subquery1889 + +- Subquery subquery4129 +- *(1) Project [d_week_seq#39] - +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct \ No newline at end of file + +- *(1) Filter (((((isnotnull(d_dom#40) && isnotnull(d_year#20)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt index d0ac00bfe..bc05af485 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt @@ -1,7 +1,7 @@ -TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_id,sales,sales,number_sales,i_category_id,channel,channel,i_class_id] +TakeOrderedAndProject [number_sales,sales,i_category_id,i_class_id,channel,channel,i_class_id,sales,i_brand_id,i_brand_id,i_category_id,number_sales] WholeStageCodegen - BroadcastHashJoin [i_category_id,i_category_id,i_brand_id,i_class_id,i_brand_id,i_class_id] - Project [number_sales,i_category_id,channel,sales,i_brand_id,i_class_id] + BroadcastHashJoin [i_category_id,i_brand_id,i_category_id,i_class_id,i_class_id,i_brand_id] + Project [number_sales,i_brand_id,sales,channel,i_class_id,i_category_id] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #2 WholeStageCodegen @@ -9,7 +9,7 @@ TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_ InputAdapter Exchange #12 WholeStageCodegen - HashAggregate [quantity,sum,count,list_price,count,sum] [sum,count,sum,count] + HashAggregate [list_price,sum,count,count,quantity,sum] [sum,count,sum,count] InputAdapter Union WholeStageCodegen @@ -40,14 +40,14 @@ TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_ Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #13 - HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sum,i_category_id,i_brand_id,count,i_class_id] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),number_sales,count(1),sum,channel,sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count] + HashAggregate [i_brand_id,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count(1),count,i_class_id,i_category_id] [number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sales,count(1),channel,count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #1 WholeStageCodegen - HashAggregate [sum,sum,ss_list_price,i_category_id,i_brand_id,ss_quantity,count,i_class_id,count] [sum,count,sum,count] - Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,i_class_id] + HashAggregate [i_brand_id,sum,sum,ss_quantity,count,count,i_class_id,ss_list_price,i_category_id] [sum,count,sum,count] + Project [ss_list_price,i_class_id,i_category_id,i_brand_id,ss_quantity] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,ss_sold_date_sk,i_class_id] + Project [ss_list_price,i_class_id,i_category_id,i_brand_id,ss_quantity,ss_sold_date_sk] BroadcastHashJoin [ss_item_sk,i_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] @@ -57,22 +57,22 @@ TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_ BroadcastExchange #2 WholeStageCodegen Project [i_item_sk] - BroadcastHashJoin [brand_id,category_id,i_category_id,i_brand_id,class_id,i_class_id] + BroadcastHashJoin [i_brand_id,category_id,brand_id,class_id,i_class_id,i_category_id] Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_class_id,i_brand_id,i_category_id] + Filter [i_category_id,i_brand_id,i_class_id] Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #3 WholeStageCodegen HashAggregate [brand_id,class_id,category_id] HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [i_class_id,i_category_id,class_id,category_id,i_brand_id,brand_id] + BroadcastHashJoin [brand_id,category_id,i_category_id,i_brand_id,i_class_id,class_id] HashAggregate [brand_id,class_id,category_id] InputAdapter Exchange [brand_id,class_id,category_id] #4 WholeStageCodegen HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,class_id,category_id,brand_id] + BroadcastHashJoin [brand_id,category_id,i_category_id,i_class_id,class_id,i_brand_id] Project [i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] @@ -84,7 +84,7 @@ TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_ BroadcastExchange #5 WholeStageCodegen Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_item_sk,i_class_id,i_brand_id,i_category_id] + Filter [i_item_sk,i_category_id,i_brand_id,i_class_id] Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #6 @@ -129,7 +129,7 @@ TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_ WholeStageCodegen BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_item_sk,i_class_id,i_category_id,i_brand_id] + Filter [i_item_sk,i_category_id,i_class_id,i_brand_id] Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter ReusedExchange [ss_item_sk] [ss_item_sk] #2 @@ -141,18 +141,18 @@ TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_ Subquery #1 WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_dom,d_moy] + Filter [d_dom,d_year,d_moy] Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] Subquery #1 WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_dom,d_moy] + Filter [d_dom,d_year,d_moy] Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] InputAdapter BroadcastExchange #14 WholeStageCodegen - Project [i_brand_id,i_class_id,sales,number_sales,i_category_id,channel] + Project [sales,i_class_id,channel,i_brand_id,i_category_id,number_sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #4 WholeStageCodegen @@ -160,7 +160,7 @@ TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_ InputAdapter Exchange #12 WholeStageCodegen - HashAggregate [quantity,sum,count,list_price,count,sum] [sum,count,sum,count] + HashAggregate [list_price,sum,count,count,quantity,sum] [sum,count,sum,count] InputAdapter Union WholeStageCodegen @@ -191,14 +191,14 @@ TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_ Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #13 - HashAggregate [i_brand_id,sum,i_class_id,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,i_category_id] [sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),channel] + HashAggregate [i_class_id,count(1),i_brand_id,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),i_category_id,sum,count] [sales,channel,count(1),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,number_sales] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #15 WholeStageCodegen - HashAggregate [i_brand_id,sum,sum,i_class_id,count,count,ss_list_price,i_category_id,ss_quantity] [sum,count,sum,count] - Project [ss_quantity,i_category_id,ss_list_price,i_brand_id,i_class_id] + HashAggregate [i_class_id,sum,ss_quantity,i_brand_id,i_category_id,sum,count,count,ss_list_price] [sum,count,sum,count] + Project [ss_list_price,ss_quantity,i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,i_category_id,ss_list_price,i_brand_id,ss_sold_date_sk,i_class_id] + Project [ss_list_price,ss_quantity,i_brand_id,ss_sold_date_sk,i_class_id,i_category_id] BroadcastHashJoin [ss_item_sk,i_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] @@ -216,11 +216,11 @@ TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_ Subquery #3 WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_dom,d_moy] + Filter [d_dom,d_year,d_moy] Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] Subquery #3 WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_dom,d_moy] + Filter [d_dom,d_year,d_moy] Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt index f1ebfcb6e..589464c44 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt @@ -4,7 +4,7 @@ TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], outpu +- Exchange SinglePartition +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5)), partial_count(distinct cs_order_number#6)]) +- *(7) HashAggregate(keys=[cs_order_number#6], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5))]) - +- Exchange hashpartitioning(cs_order_number#6, 200) + +- Exchange hashpartitioning(cs_order_number#6, 5) +- *(6) HashAggregate(keys=[cs_order_number#6], functions=[partial_sum(UnscaledValue(cs_ext_ship_cost#4)), partial_sum(UnscaledValue(cs_net_profit#5))]) +- *(6) Project [cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] +- *(6) BroadcastHashJoin [cs_call_center_sk#7], [cc_call_center_sk#8], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt index 57b72893e..4f13dbdf2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -1,27 +1,27 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] WholeStageCodegen - HashAggregate [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),total net profit ,order count ,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),total shipping cost ] + HashAggregate [sum,count,sum(UnscaledValue(cs_ext_ship_cost)),count(cs_order_number),sum(UnscaledValue(cs_net_profit)),sum] [sum,count,sum(UnscaledValue(cs_ext_ship_cost)),total net profit ,total shipping cost ,count(cs_order_number),order count ,sum(UnscaledValue(cs_net_profit)),sum] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number] [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] - HashAggregate [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number] [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] + HashAggregate [sum,sum(UnscaledValue(cs_ext_ship_cost)),count(cs_order_number),sum,sum(UnscaledValue(cs_net_profit)),sum,count,cs_order_number,sum] [sum,count,sum(UnscaledValue(cs_ext_ship_cost)),count(cs_order_number),sum,sum(UnscaledValue(cs_net_profit)),sum,count,sum] + HashAggregate [sum,sum(UnscaledValue(cs_ext_ship_cost)),sum,sum(UnscaledValue(cs_net_profit)),sum,cs_order_number,sum] [sum,sum(UnscaledValue(cs_ext_ship_cost)),sum,sum(UnscaledValue(cs_net_profit)),sum,sum] InputAdapter Exchange [cs_order_number] #2 WholeStageCodegen - HashAggregate [cs_ext_ship_cost,sum(UnscaledValue(cs_net_profit)),sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number,cs_net_profit] [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] + HashAggregate [cs_net_profit,sum(UnscaledValue(cs_ext_ship_cost)),sum,sum(UnscaledValue(cs_net_profit)),cs_order_number,cs_ext_ship_cost,sum] [sum,sum(UnscaledValue(cs_ext_ship_cost)),sum,sum(UnscaledValue(cs_net_profit)),sum,sum] Project [cs_order_number,cs_ext_ship_cost,cs_net_profit] BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] Project [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] BroadcastHashJoin [cs_ship_addr_sk,ca_address_sk] - Project [cs_ship_addr_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + Project [cs_call_center_sk,cs_ship_addr_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] BroadcastHashJoin [cs_ship_date_sk,d_date_sk] BroadcastHashJoin [cs_order_number,cr_order_number] - Project [cs_ship_addr_sk,cs_ship_date_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_addr_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] BroadcastHashJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] - Project [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + Project [cs_call_center_sk,cs_warehouse_sk,cs_ship_date_sk,cs_ship_addr_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk] - Scan parquet default.catalog_sales [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + Scan parquet default.catalog_sales [cs_call_center_sk,cs_warehouse_sk,cs_ship_date_sk,cs_ship_addr_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] [cs_call_center_sk,cs_warehouse_sk,cs_ship_date_sk,cs_ship_addr_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt index 62f104f27..898520b2a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_state#3 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_state#3,store_sales_quantitycount#4,store_sales_quantityave#5,store_sales_quantitystdev#6,store_sales_quantitycov#7,as_store_returns_quantitycount#8,as_store_returns_quantityave#9,as_store_returns_quantitystdev#10,store_returns_quantitycov#11,catalog_sales_quantitycount#12,catalog_sales_quantityave#13,catalog_sales_quantitystdev#14,catalog_sales_quantitycov#15]) +- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[count(ss_quantity#16), avg(cast(ss_quantity#16 as bigint)), stddev_samp(cast(ss_quantity#16 as double)), count(sr_return_quantity#17), avg(cast(sr_return_quantity#17 as bigint)), stddev_samp(cast(sr_return_quantity#17 as double)), count(cs_quantity#18), avg(cast(cs_quantity#18 as bigint)), stddev_samp(cast(cs_quantity#18 as double))]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_state#3, 200) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_state#3, 5) +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[partial_count(ss_quantity#16), partial_avg(cast(ss_quantity#16 as bigint)), partial_stddev_samp(cast(ss_quantity#16 as double)), partial_count(sr_return_quantity#17), partial_avg(cast(sr_return_quantity#17 as bigint)), partial_stddev_samp(cast(sr_return_quantity#17 as double)), partial_count(cs_quantity#18), partial_avg(cast(cs_quantity#18 as bigint)), partial_stddev_samp(cast(cs_quantity#18 as double))]) +- *(8) Project [ss_quantity#16, sr_return_quantity#17, cs_quantity#18, s_state#3, i_item_id#1, i_item_desc#2] +- *(8) BroadcastHashJoin [ss_item_sk#19], [i_item_sk#20], Inner, BuildRight @@ -22,12 +22,12 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_des : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#27,ss_item_sk#19,ss_customer_sk#33,ss_store_sk#21,ss_ticket_number#34,ss_quantity#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : : +- *(2) Filter ((isnotnull(cs_item_sk#32) && isnotnull(cs_bill_customer_sk#31)) && isnotnull(cs_sold_date_sk#23)) + : : : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#23,cs_bill_customer_sk#31,cs_item_sk#32,cs_quantity#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [d_date_sk#28] : : : : +- *(3) Filter ((isnotnull(d_quarter_name#36) && (d_quarter_name#36 = 2001Q1)) && isnotnull(d_date_sk#28)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt index 454790ddb..6db08032d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt @@ -1,38 +1,38 @@ -TakeOrderedAndProject [as_store_returns_quantityave,catalog_sales_quantitycount,i_item_id,i_item_desc,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitystdev,catalog_sales_quantitycov,s_state,store_returns_quantitycov,catalog_sales_quantityave,store_sales_quantitycov,as_store_returns_quantitystdev,as_store_returns_quantitycount,catalog_sales_quantitystdev] +TakeOrderedAndProject [as_store_returns_quantitystdev,catalog_sales_quantitycov,store_sales_quantitycount,catalog_sales_quantityave,as_store_returns_quantitycount,catalog_sales_quantitycount,store_sales_quantitycov,s_state,store_sales_quantityave,as_store_returns_quantityave,i_item_desc,store_sales_quantitystdev,i_item_id,store_returns_quantitycov,catalog_sales_quantitystdev] WholeStageCodegen - HashAggregate [count,avg(cast(ss_quantity as bigint)),i_item_id,stddev_samp(cast(ss_quantity as double)),i_item_desc,stddev_samp(cast(sr_return_quantity as double)),m2,avg(cast(sr_return_quantity as bigint)),n,count,m2,avg,avg,s_state,count(sr_return_quantity),sum,avg,n,count,m2,count(cs_quantity),sum,stddev_samp(cast(cs_quantity as double)),avg(cast(cs_quantity as bigint)),count,n,sum,count,count(ss_quantity),count] [count,avg(cast(ss_quantity as bigint)),as_store_returns_quantityave,catalog_sales_quantitycount,stddev_samp(cast(ss_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),m2,avg(cast(sr_return_quantity as bigint)),n,count,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitystdev,m2,avg,avg,catalog_sales_quantitycov,count(sr_return_quantity),store_returns_quantitycov,sum,catalog_sales_quantityave,store_sales_quantitycov,avg,n,count,as_store_returns_quantitystdev,m2,count(cs_quantity),sum,stddev_samp(cast(cs_quantity as double)),avg(cast(cs_quantity as bigint)),count,as_store_returns_quantitycount,n,sum,count,count(ss_quantity),count,catalog_sales_quantitystdev] + HashAggregate [count,sum,stddev_samp(cast(sr_return_quantity as double)),count,avg(cast(sr_return_quantity as bigint)),sum,m2,count,count,n,n,n,s_state,m2,count,avg,sum,stddev_samp(cast(ss_quantity as double)),i_item_desc,count(ss_quantity),avg(cast(ss_quantity as bigint)),count(sr_return_quantity),stddev_samp(cast(cs_quantity as double)),avg,i_item_id,m2,avg(cast(cs_quantity as bigint)),count,avg,count(cs_quantity)] [as_store_returns_quantitystdev,catalog_sales_quantitycov,store_sales_quantitycount,catalog_sales_quantityave,count,sum,stddev_samp(cast(sr_return_quantity as double)),count,avg(cast(sr_return_quantity as bigint)),as_store_returns_quantitycount,sum,m2,catalog_sales_quantitycount,count,count,n,n,n,store_sales_quantitycov,store_sales_quantityave,m2,count,avg,sum,as_store_returns_quantityave,stddev_samp(cast(ss_quantity as double)),count(ss_quantity),store_sales_quantitystdev,avg(cast(ss_quantity as bigint)),count(sr_return_quantity),stddev_samp(cast(cs_quantity as double)),avg,m2,avg(cast(cs_quantity as bigint)),store_returns_quantitycov,catalog_sales_quantitystdev,count,avg,count(cs_quantity)] InputAdapter Exchange [i_item_id,i_item_desc,s_state] #1 WholeStageCodegen - HashAggregate [count,m2,i_item_id,m2,i_item_desc,m2,count,avg,avg,m2,count,n,count,sum,n,sum,n,count,m2,avg,avg,s_state,avg,sum,sr_return_quantity,avg,count,count,count,n,count,m2,sum,count,ss_quantity,cs_quantity,n,sum,count,n,sum,count] [count,m2,m2,m2,count,avg,avg,m2,count,n,count,sum,n,sum,n,count,m2,avg,avg,avg,sum,avg,count,count,count,n,count,m2,sum,count,n,sum,count,n,sum,count] - Project [ss_quantity,cs_quantity,sr_return_quantity,s_state,i_item_desc,i_item_id] + HashAggregate [count,cs_quantity,avg,count,sum,n,sr_return_quantity,count,count,avg,sum,m2,ss_quantity,count,count,n,n,n,s_state,sum,m2,count,count,avg,sum,i_item_desc,m2,n,avg,count,n,avg,i_item_id,m2,m2,m2,sum,count,count,avg,sum,count] [count,avg,count,sum,n,count,count,avg,sum,m2,count,count,n,n,n,sum,m2,count,count,avg,sum,m2,n,avg,count,n,avg,m2,m2,m2,sum,count,count,avg,sum,count] + Project [cs_quantity,ss_quantity,i_item_id,sr_return_quantity,i_item_desc,s_state] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,sr_return_quantity,s_state] + Project [cs_quantity,ss_item_sk,ss_quantity,sr_return_quantity,s_state] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity] + Project [cs_quantity,ss_item_sk,ss_quantity,sr_return_quantity,ss_store_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk] + Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,sr_return_quantity,ss_store_sk] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,sr_returned_date_sk] + Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,sr_return_quantity,ss_store_sk,sr_returned_date_sk] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_returned_date_sk] + Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_return_quantity,ss_store_sk,sr_returned_date_sk] BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [ss_quantity,ss_item_sk,ss_store_sk,sr_return_quantity,sr_customer_sk,sr_item_sk,ss_sold_date_sk,sr_returned_date_sk] - BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] - Project [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Project [sr_item_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,ss_store_sk,sr_customer_sk,sr_returned_date_sk] + BroadcastHashJoin [ss_customer_sk,sr_item_sk,ss_item_sk,sr_ticket_number,sr_customer_sk,ss_ticket_number] + Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_sold_date_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] - Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + Project [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] + Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_bill_customer_sk,cs_sold_date_sk] Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] InputAdapter BroadcastExchange #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt index 9c1bcddcf..fd9786c06 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[ca_country#1 ASC NULLS FIRST,ca_state#2 ASC NULLS FIRST,ca_county#3 ASC NULLS FIRST,i_item_id#4 ASC NULLS FIRST], output=[i_item_id#4,ca_country#1,ca_state#2,ca_county#3,agg1#5,agg2#6,agg3#7,agg4#8,agg5#9,agg6#10,agg7#11]) +- *(8) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[avg(cast(cs_quantity#13 as decimal(12,2))), avg(cast(cs_list_price#14 as decimal(12,2))), avg(cast(cs_coupon_amt#15 as decimal(12,2))), avg(cast(cs_sales_price#16 as decimal(12,2))), avg(cast(cs_net_profit#17 as decimal(12,2))), avg(cast(c_birth_year#18 as decimal(12,2))), avg(cast(cd_dep_count#19 as decimal(12,2)))]) - +- Exchange hashpartitioning(i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12, 200) + +- Exchange hashpartitioning(i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12, 5) +- *(7) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[partial_avg(cast(cs_quantity#13 as decimal(12,2))), partial_avg(cast(cs_list_price#14 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#15 as decimal(12,2))), partial_avg(cast(cs_sales_price#16 as decimal(12,2))), partial_avg(cast(cs_net_profit#17 as decimal(12,2))), partial_avg(cast(c_birth_year#18 as decimal(12,2))), partial_avg(cast(cd_dep_count#19 as decimal(12,2)))]) +- *(7) Expand [List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, ca_county#23, 0), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, null, 1), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, null, null, 3), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, null, null, null, 7), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, null, null, null, null, 15)], [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12] +- *(7) Project [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#24 AS i_item_id#20, ca_country#25 AS ca_country#21, ca_state#26 AS ca_state#22, ca_county#27 AS ca_county#23] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt index d045bd95e..37081499f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt @@ -1,26 +1,26 @@ -TakeOrderedAndProject [ca_country,agg7,agg6,ca_county,agg2,agg1,agg4,agg3,ca_state,agg5,i_item_id] +TakeOrderedAndProject [ca_country,agg6,agg4,ca_state,agg7,agg2,i_item_id,agg1,agg3,agg5,ca_county] WholeStageCodegen - HashAggregate [ca_country,avg(cast(cs_quantity as decimal(12,2))),count,sum,avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,ca_county,avg(cast(cs_list_price as decimal(12,2))),sum,sum,count,sum,count,avg(cast(cs_net_profit as decimal(12,2))),spark_grouping_id,sum,count,ca_state,avg(cast(cd_dep_count as decimal(12,2))),count,avg(cast(cs_sales_price as decimal(12,2))),count,count,avg(cast(c_birth_year as decimal(12,2))),i_item_id] [avg(cast(cs_quantity as decimal(12,2))),count,agg7,sum,avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,agg6,avg(cast(cs_list_price as decimal(12,2))),agg2,agg1,sum,sum,count,sum,count,agg4,agg3,avg(cast(cs_net_profit as decimal(12,2))),sum,count,agg5,avg(cast(cd_dep_count as decimal(12,2))),count,avg(cast(cs_sales_price as decimal(12,2))),count,count,avg(cast(c_birth_year as decimal(12,2)))] + HashAggregate [avg(cast(cs_net_profit as decimal(12,2))),ca_country,count,count,count,sum,avg(cast(cs_list_price as decimal(12,2))),sum,avg(cast(cs_quantity as decimal(12,2))),count,ca_state,sum,count,sum,i_item_id,avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,count,spark_grouping_id,sum,avg(cast(c_birth_year as decimal(12,2))),count,ca_county,avg(cast(cs_sales_price as decimal(12,2)))] [avg(cast(cs_net_profit as decimal(12,2))),count,count,count,agg6,sum,avg(cast(cs_list_price as decimal(12,2))),sum,avg(cast(cs_quantity as decimal(12,2))),agg4,count,sum,count,sum,agg7,agg2,avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,count,agg1,agg3,sum,avg(cast(c_birth_year as decimal(12,2))),count,agg5,avg(cast(cs_sales_price as decimal(12,2)))] InputAdapter - Exchange [ca_country,ca_county,spark_grouping_id,ca_state,i_item_id] #1 + Exchange [ca_country,ca_state,i_item_id,spark_grouping_id,ca_county] #1 WholeStageCodegen - HashAggregate [ca_country,cs_list_price,cs_coupon_amt,sum,count,sum,count,count,sum,sum,count,sum,sum,count,sum,count,ca_county,sum,sum,count,sum,count,count,sum,sum,sum,spark_grouping_id,cs_net_profit,sum,count,ca_state,cs_sales_price,cs_quantity,cd_dep_count,count,count,c_birth_year,count,count,i_item_id] [sum,count,sum,count,count,sum,sum,count,sum,sum,count,sum,count,sum,sum,count,sum,count,count,sum,sum,sum,sum,count,count,count,count,count] - Expand [cs_list_price,cs_coupon_amt,ca_country,i_item_id,ca_county,cs_net_profit,cs_sales_price,cs_quantity,cd_dep_count,ca_state,c_birth_year] - Project [cs_list_price,cs_coupon_amt,i_item_id,ca_county,ca_country,cs_net_profit,cs_sales_price,cs_quantity,cd_dep_count,c_birth_year,ca_state] + HashAggregate [cs_quantity,ca_country,count,count,count,count,sum,sum,cs_net_profit,cd_dep_count,count,sum,sum,sum,cs_sales_price,count,ca_state,sum,count,cs_coupon_amt,sum,sum,count,sum,cs_list_price,i_item_id,count,count,sum,sum,count,c_birth_year,spark_grouping_id,sum,sum,sum,count,count,count,ca_county] [count,count,count,count,sum,sum,count,sum,sum,sum,count,sum,count,sum,sum,count,sum,count,count,sum,sum,count,sum,sum,sum,count,count,count] + Expand [cs_quantity,i_item_id,ca_country,cs_net_profit,cd_dep_count,cs_sales_price,cs_coupon_amt,ca_county,cs_list_price,c_birth_year,ca_state] + Project [cs_quantity,ca_country,cs_net_profit,cd_dep_count,cs_sales_price,cs_coupon_amt,cs_list_price,c_birth_year,i_item_id,ca_state,ca_county] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [ca_state,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cd_dep_count,ca_county,cs_item_sk,cs_net_profit,ca_country,cs_list_price] + Project [cs_quantity,cd_dep_count,cs_list_price,cs_item_sk,ca_state,cs_coupon_amt,ca_county,c_birth_year,ca_country,cs_net_profit,cs_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ca_state,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,ca_county,cs_item_sk,cs_net_profit,ca_country,cs_list_price] + Project [cs_quantity,cd_dep_count,cs_list_price,cs_item_sk,ca_state,cs_sold_date_sk,cs_coupon_amt,ca_county,c_birth_year,ca_country,cs_net_profit,cs_sales_price] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] + Project [cs_quantity,cd_dep_count,cs_list_price,cs_item_sk,cs_sold_date_sk,cs_coupon_amt,c_birth_year,cs_net_profit,c_current_addr_sk,cs_sales_price] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,c_current_addr_sk,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] + Project [cs_quantity,cd_dep_count,cs_list_price,cs_item_sk,c_current_cdemo_sk,cs_sold_date_sk,cs_coupon_amt,c_birth_year,cs_net_profit,c_current_addr_sk,cs_sales_price] BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_coupon_amt,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] + Project [cs_quantity,cd_dep_count,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_coupon_amt,cs_net_profit,cs_sales_price] BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] + Project [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_coupon_amt,cs_net_profit,cs_sales_price] Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk] - Scan parquet default.catalog_sales [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_coupon_amt,cs_net_profit,cs_sales_price] [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_coupon_amt,cs_net_profit,cs_sales_price] InputAdapter BroadcastExchange #2 WholeStageCodegen @@ -32,7 +32,7 @@ TakeOrderedAndProject [ca_country,agg7,agg6,ca_county,agg2,agg1,agg4,agg3,ca_sta WholeStageCodegen Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] - Scan parquet default.customer [c_current_cdemo_sk,c_current_addr_sk,c_birth_year,c_customer_sk,c_birth_month] [c_current_cdemo_sk,c_current_addr_sk,c_birth_year,c_customer_sk,c_birth_month] + Scan parquet default.customer [c_current_cdemo_sk,c_birth_year,c_customer_sk,c_current_addr_sk,c_birth_month] [c_current_cdemo_sk,c_birth_year,c_customer_sk,c_current_addr_sk,c_birth_month] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt index 97442ee59..a3cb20be7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand#2 ASC NULLS FIRST,brand_id#3 ASC NULLS FIRST,i_manufact_id#4 ASC NULLS FIRST,i_manufact#5 ASC NULLS FIRST], output=[brand_id#3,brand#2,i_manufact_id#4,i_manufact#5,ext_price#1]) +- *(7) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[sum(UnscaledValue(ss_ext_sales_price#8))]) - +- Exchange hashpartitioning(i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5, 200) + +- Exchange hashpartitioning(i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5, 5) +- *(6) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#8))]) +- *(6) Project [ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5] +- *(6) BroadcastHashJoin [ss_store_sk#9], [s_store_sk#10], Inner, BuildRight, NOT (substring(ca_zip#11, 1, 5) = substring(s_zip#12, 1, 5)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt index c008da8ae..810d37bad 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt @@ -1,17 +1,17 @@ -TakeOrderedAndProject [ext_price,brand_id,i_manufact,i_manufact_id,brand] +TakeOrderedAndProject [brand_id,brand,i_manufact,ext_price,i_manufact_id] WholeStageCodegen - HashAggregate [i_manufact,sum(UnscaledValue(ss_ext_sales_price)),i_brand,i_brand_id,i_manufact_id,sum] [ext_price,brand_id,sum(UnscaledValue(ss_ext_sales_price)),brand,sum] + HashAggregate [i_brand_id,i_manufact,sum(UnscaledValue(ss_ext_sales_price)),sum,i_manufact_id,i_brand] [brand_id,brand,sum(UnscaledValue(ss_ext_sales_price)),ext_price,sum] InputAdapter Exchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 WholeStageCodegen - HashAggregate [i_manufact,i_brand,sum,ss_ext_sales_price,i_brand_id,i_manufact_id,sum] [sum,sum] - Project [i_manufact,i_manufact_id,ss_ext_sales_price,i_brand_id,i_brand] + HashAggregate [i_brand_id,sum,i_manufact,ss_ext_sales_price,sum,i_manufact_id,i_brand] [sum,sum] + Project [i_manufact_id,i_manufact,i_brand_id,i_brand,ss_ext_sales_price] BroadcastHashJoin [ss_store_sk,s_store_sk,ca_zip,s_zip] - Project [i_manufact,i_manufact_id,ca_zip,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand] + Project [i_manufact_id,i_manufact,i_brand_id,i_brand,ca_zip,ss_store_sk,ss_ext_sales_price] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,i_manufact,i_manufact_id,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand] + Project [i_manufact_id,i_manufact,i_brand_id,i_brand,ss_store_sk,ss_ext_sales_price,c_current_addr_sk] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [i_manufact,i_manufact_id,ss_store_sk,ss_customer_sk,ss_ext_sales_price,i_brand_id,i_brand] + Project [i_manufact_id,i_manufact,ss_customer_sk,i_brand_id,i_brand,ss_store_sk,ss_ext_sales_price] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] @@ -21,15 +21,15 @@ TakeOrderedAndProject [ext_price,brand_id,i_manufact,i_manufact_id,brand] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_manufact,i_manufact_id,i_item_sk,i_brand_id,i_brand] + Project [i_manufact_id,i_manufact,i_brand_id,i_brand,i_item_sk] Filter [i_manager_id,i_item_sk] - Scan parquet default.item [i_manufact,i_manufact_id,i_item_sk,i_manager_id,i_brand_id,i_brand] [i_manufact,i_manufact_id,i_item_sk,i_manager_id,i_brand_id,i_brand] + Scan parquet default.item [i_manufact_id,i_manager_id,i_manufact,i_brand_id,i_brand,i_item_sk] [i_manufact_id,i_manager_id,i_manufact,i_brand_id,i_brand,i_item_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt index aff2e2bdb..27134fbb9 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt @@ -1,12 +1,12 @@ == Physical Plan == *(13) Sort [d_week_seq1#1 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(d_week_seq1#1 ASC NULLS FIRST, 200) ++- Exchange rangepartitioning(d_week_seq1#1 ASC NULLS FIRST, 5) +- *(12) Project [d_week_seq1#1, round(CheckOverflow((promote_precision(sun_sales1#2) / promote_precision(sun_sales2#3)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#4, round(CheckOverflow((promote_precision(mon_sales1#5) / promote_precision(mon_sales2#6)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#7, round(CheckOverflow((promote_precision(tue_sales1#8) / promote_precision(tue_sales2#9)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#10, round(CheckOverflow((promote_precision(wed_sales1#11) / promote_precision(wed_sales2#12)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#13, round(CheckOverflow((promote_precision(thu_sales1#14) / promote_precision(thu_sales2#15)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#16, round(CheckOverflow((promote_precision(fri_sales1#17) / promote_precision(fri_sales2#18)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#19, round(CheckOverflow((promote_precision(sat_sales1#20) / promote_precision(sat_sales2#21)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#22] +- *(12) BroadcastHashJoin [d_week_seq1#1], [(d_week_seq2#23 - 53)], Inner, BuildRight :- *(12) Project [d_week_seq#24 AS d_week_seq1#1, sun_sales#25 AS sun_sales1#2, mon_sales#26 AS mon_sales1#5, tue_sales#27 AS tue_sales1#8, wed_sales#28 AS wed_sales1#11, thu_sales#29 AS thu_sales1#14, fri_sales#30 AS fri_sales1#17, sat_sales#31 AS sat_sales1#20] : +- *(12) BroadcastHashJoin [d_week_seq#24], [d_week_seq#32], Inner, BuildRight : :- *(12) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) - : : +- Exchange hashpartitioning(d_week_seq#24, 200) + : : +- Exchange hashpartitioning(d_week_seq#24, 5) : : +- *(4) HashAggregate(keys=[d_week_seq#24], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) : : +- *(4) Project [sales_price#34, d_week_seq#24, d_day_name#33] : : +- *(4) BroadcastHashJoin [sold_date_sk#35], [d_date_sk#36], Inner, BuildRight @@ -29,7 +29,7 @@ +- *(11) Project [d_week_seq#24 AS d_week_seq2#23, sun_sales#25 AS sun_sales2#3, mon_sales#26 AS mon_sales2#6, tue_sales#27 AS tue_sales2#9, wed_sales#28 AS wed_sales2#12, thu_sales#29 AS thu_sales2#15, fri_sales#30 AS fri_sales2#18, sat_sales#31 AS sat_sales2#21] +- *(11) BroadcastHashJoin [d_week_seq#24], [d_week_seq#44], Inner, BuildRight :- *(11) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) - : +- ReusedExchange [d_week_seq#24, sum#45, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51], Exchange hashpartitioning(d_week_seq#24, 200) + : +- ReusedExchange [d_week_seq#24, sum#45, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51], Exchange hashpartitioning(d_week_seq#24, 5) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(10) Project [d_week_seq#44] +- *(10) Filter ((isnotnull(d_year#52) && (d_year#52 = 2002)) && isnotnull(d_week_seq#44)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt index a3121688c..6f69879cb 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt @@ -3,15 +3,15 @@ WholeStageCodegen InputAdapter Exchange [d_week_seq1] #1 WholeStageCodegen - Project [wed_sales2,mon_sales1,sat_sales2,d_week_seq1,fri_sales2,thu_sales2,sat_sales1,tue_sales1,sun_sales2,sun_sales1,tue_sales2,wed_sales1,mon_sales2,thu_sales1,fri_sales1] + Project [mon_sales1,fri_sales1,thu_sales1,wed_sales1,sun_sales2,mon_sales2,fri_sales2,thu_sales2,sat_sales2,tue_sales2,d_week_seq1,wed_sales2,sun_sales1,tue_sales1,sat_sales1] BroadcastHashJoin [d_week_seq1,d_week_seq2] - Project [sun_sales,thu_sales,sat_sales,wed_sales,fri_sales,tue_sales,d_week_seq,mon_sales] + Project [sat_sales,wed_sales,sun_sales,d_week_seq,mon_sales,fri_sales,tue_sales,thu_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),mon_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum,thu_sales,sum,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum,sat_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum,wed_sales] + HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum,sum,sum,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END))] [sun_sales,thu_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),fri_sales,mon_sales,sum,sum,sum,wed_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum,tue_sales,sat_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END))] InputAdapter Exchange [d_week_seq] #2 WholeStageCodegen - HashAggregate [d_day_name,sum,sum,sum,sum,sum,sum,sum,d_week_seq,sum,sum,sum,sum,sales_price,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + HashAggregate [sum,sum,sum,sum,sum,sum,d_week_seq,d_day_name,sum,sum,sales_price,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,d_week_seq,d_day_name] BroadcastHashJoin [sold_date_sk,d_date_sk] InputAdapter @@ -39,11 +39,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [sun_sales,thu_sales,sat_sales,wed_sales,fri_sales,tue_sales,d_week_seq,mon_sales] + Project [sat_sales,wed_sales,sun_sales,d_week_seq,mon_sales,fri_sales,tue_sales,thu_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),mon_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),thu_sales,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sun_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sat_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum,wed_sales] + HashAggregate [sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum] [sum,sun_sales,thu_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),fri_sales,mon_sales,wed_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum,sum,tue_sales,sat_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum] InputAdapter - ReusedExchange [sum,sum,sum,sum,d_week_seq,sum,sum,sum] [sum,sum,sum,sum,d_week_seq,sum,sum,sum] #2 + ReusedExchange [sum,sum,d_week_seq,sum,sum,sum,sum,sum] [sum,sum,d_week_seq,sum,sum,sum,sum,sum] #2 InputAdapter BroadcastExchange #6 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt index 186e8daef..f0c1bc89c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt @@ -3,9 +3,9 @@ TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 +- *(6) Project [i_item_desc#4, i_category#1, i_class#2, i_current_price#6, itemrevenue#7, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#5, i_item_id#3] +- Window [sum(_w1#10) windowspecdefinition(i_class#2, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#2] +- *(5) Sort [i_class#2 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_class#2, 200) + +- Exchange hashpartitioning(i_class#2, 5) +- *(4) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[sum(UnscaledValue(cs_ext_sales_price#11))]) - +- Exchange hashpartitioning(i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6, 200) + +- Exchange hashpartitioning(i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6, 5) +- *(3) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#11))]) +- *(3) Project [cs_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] +- *(3) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt index ebfef4216..bca0f3ec5 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,revenueratio,i_category,i_current_price,i_class] +TakeOrderedAndProject [i_current_price,revenueratio,i_item_desc,i_item_id,itemrevenue,i_category,i_class] WholeStageCodegen - Project [i_item_id,i_item_desc,itemrevenue,_we0,i_category,_w0,i_current_price,i_class] + Project [i_current_price,i_item_desc,i_item_id,itemrevenue,i_category,_we0,i_class,_w0] InputAdapter Window [_w1,i_class] WholeStageCodegen @@ -8,14 +8,14 @@ TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,revenueratio,i_category InputAdapter Exchange [i_class] #1 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum(UnscaledValue(cs_ext_sales_price)),i_category,sum,i_current_price,i_class] [itemrevenue,sum(UnscaledValue(cs_ext_sales_price)),_w1,sum,_w0] + HashAggregate [i_current_price,sum(UnscaledValue(cs_ext_sales_price)),sum,i_item_desc,i_item_id,i_category,i_class] [sum(UnscaledValue(cs_ext_sales_price)),sum,itemrevenue,_w1,_w0] InputAdapter - Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #2 + Exchange [i_current_price,i_item_desc,i_item_id,i_category,i_class] #2 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum,i_category,sum,i_current_price,cs_ext_sales_price,i_class] [sum,sum] - Project [i_class,i_current_price,i_category,i_item_desc,cs_ext_sales_price,i_item_id] + HashAggregate [i_current_price,sum,sum,cs_ext_sales_price,i_item_desc,i_item_id,i_category,i_class] [sum,sum] + Project [i_current_price,i_category,cs_ext_sales_price,i_item_id,i_class,i_item_desc] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [i_class,i_current_price,cs_sold_date_sk,i_category,i_item_desc,cs_ext_sales_price,i_item_id] + Project [i_current_price,i_category,cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_class,i_item_desc] BroadcastHashJoin [cs_item_sk,i_item_sk] Project [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] Filter [cs_item_sk,cs_sold_date_sk] @@ -23,9 +23,9 @@ TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,revenueratio,i_category InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Project [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] Filter [i_category,i_item_sk] - Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Scan parquet default.item [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt index 2950af5ff..7c1b02f29 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt @@ -2,7 +2,7 @@ TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST,i_item_id#2 ASC NULLS FIRST], output=[w_warehouse_name#1,i_item_id#2,inv_before#3,inv_after#4]) +- *(5) Filter ((CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END >= 0.666667) && (CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END <= 1.5)) +- *(5) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(w_warehouse_name#1, i_item_id#2, 200) + +- Exchange hashpartitioning(w_warehouse_name#1, i_item_id#2, 5) +- *(4) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[partial_sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) +- *(4) Project [inv_quantity_on_hand#6, w_warehouse_name#1, i_item_id#2, d_date#5] +- *(4) BroadcastHashJoin [inv_date_sk#7], [d_date_sk#8], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt index b74976c2f..f3c16ce10 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt @@ -1,11 +1,11 @@ TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] WholeStageCodegen Filter [inv_before,inv_after] - HashAggregate [i_item_id,sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,w_warehouse_name,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum] [sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_before,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_after] + HashAggregate [sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),w_warehouse_name,sum,sum,i_item_id,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint))] [inv_before,sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_after,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint))] InputAdapter Exchange [w_warehouse_name,i_item_id] #1 WholeStageCodegen - HashAggregate [i_item_id,sum,d_date,w_warehouse_name,sum,inv_quantity_on_hand,sum,sum] [sum,sum,sum,sum] + HashAggregate [sum,inv_quantity_on_hand,w_warehouse_name,sum,d_date,sum,i_item_id,sum] [sum,sum,sum,sum] Project [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] BroadcastHashJoin [inv_date_sk,d_date_sk] Project [inv_date_sk,inv_quantity_on_hand,w_warehouse_name,i_item_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt index 2a265ae4f..171688e3a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[qoh#1 ASC NULLS FIRST,i_product_name#2 ASC NULLS FIRST,i_brand#3 ASC NULLS FIRST,i_class#4 ASC NULLS FIRST,i_category#5 ASC NULLS FIRST], output=[i_product_name#2,i_brand#3,i_class#4,i_category#5,qoh#1]) +- *(5) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[avg(cast(inv_quantity_on_hand#7 as bigint))]) - +- Exchange hashpartitioning(i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6, 200) + +- Exchange hashpartitioning(i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6, 5) +- *(4) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[partial_avg(cast(inv_quantity_on_hand#7 as bigint))]) +- *(4) Expand [List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, i_category#11, 0), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, null, 1), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, null, null, 3), List(inv_quantity_on_hand#7, i_product_name#8, null, null, null, 7), List(inv_quantity_on_hand#7, null, null, null, null, 15)], [inv_quantity_on_hand#7, i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6] +- *(4) Project [inv_quantity_on_hand#7, i_product_name#12 AS i_product_name#8, i_brand#13 AS i_brand#9, i_class#14 AS i_class#10, i_category#15 AS i_category#11] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt index 49b575687..156a01dd0 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt @@ -1,14 +1,14 @@ -TakeOrderedAndProject [i_category,i_class,i_product_name,i_brand,qoh] +TakeOrderedAndProject [i_category,qoh,i_brand,i_class,i_product_name] WholeStageCodegen - HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),i_category,count,spark_grouping_id,sum,i_class,i_product_name,i_brand] [avg(cast(inv_quantity_on_hand as bigint)),qoh,sum,count] + HashAggregate [i_category,avg(cast(inv_quantity_on_hand as bigint)),i_brand,sum,i_class,count,spark_grouping_id,i_product_name] [avg(cast(inv_quantity_on_hand as bigint)),qoh,sum,count] InputAdapter - Exchange [i_category,spark_grouping_id,i_class,i_product_name,i_brand] #1 + Exchange [i_category,i_brand,i_class,spark_grouping_id,i_product_name] #1 WholeStageCodegen - HashAggregate [sum,i_category,count,count,spark_grouping_id,sum,i_class,i_product_name,inv_quantity_on_hand,i_brand] [sum,count,sum,count] - Expand [i_product_name,i_class,i_brand,i_category,inv_quantity_on_hand] - Project [i_brand,i_category,inv_quantity_on_hand,i_class,i_product_name] + HashAggregate [i_category,inv_quantity_on_hand,count,i_brand,sum,sum,i_class,count,spark_grouping_id,i_product_name] [sum,count,sum,count] + Expand [i_brand,i_product_name,inv_quantity_on_hand,i_category,i_class] + Project [inv_quantity_on_hand,i_category,i_brand,i_product_name,i_class] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_class,inv_quantity_on_hand,inv_warehouse_sk,i_product_name,i_category,i_brand] + Project [i_product_name,inv_quantity_on_hand,i_brand,i_category,inv_warehouse_sk,i_class] BroadcastHashJoin [inv_item_sk,i_item_sk] Project [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] BroadcastHashJoin [inv_date_sk,d_date_sk] @@ -24,9 +24,9 @@ TakeOrderedAndProject [i_category,i_class,i_product_name,i_brand,qoh] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_class,i_item_sk,i_product_name,i_category,i_brand] + Project [i_product_name,i_brand,i_category,i_item_sk,i_class] Filter [i_item_sk] - Scan parquet default.item [i_class,i_item_sk,i_product_name,i_category,i_brand] [i_class,i_item_sk,i_product_name,i_category,i_brand] + Scan parquet default.item [i_product_name,i_brand,i_category,i_item_sk,i_class] [i_product_name,i_brand,i_category,i_item_sk,i_class] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt index 5dbd09984..9e3d90e72 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt @@ -17,7 +17,7 @@ CollectLimit 100 : : : +- *(4) Project [item_sk#9] : : : +- *(4) Filter (count(1)#10 > 4) : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[count(1)]) - : : : +- Exchange hashpartitioning(substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14, 200) + : : : +- Exchange hashpartitioning(substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14, 5) : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#11, 1, 30) AS substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[partial_count(1)]) : : : +- *(3) Project [d_date#14, i_item_sk#13, i_item_desc#11] : : : +- *(3) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#13], Inner, BuildRight @@ -36,13 +36,13 @@ CollectLimit 100 : : : +- *(2) FileScan parquet default.item[i_item_sk#13,i_item_desc#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(7) Project [c_customer_sk#7] - : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3052 as decimal(32,6)))), DecimalType(38,8)))) - : : : +- Subquery subquery3052 + : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery5292 as decimal(32,6)))), DecimalType(38,8)))) + : : : +- Subquery subquery5292 : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#21)]) : : : +- Exchange SinglePartition : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#21)]) : : : +- *(4) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) - : : : +- Exchange hashpartitioning(c_customer_sk#7, 200) + : : : +- Exchange hashpartitioning(c_customer_sk#7, 5) : : : +- *(3) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) : : : +- *(3) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight @@ -60,7 +60,7 @@ CollectLimit 100 : : : +- *(2) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#5,d_year#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct : : +- *(7) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Exchange hashpartitioning(c_customer_sk#7, 200) + : : +- Exchange hashpartitioning(c_customer_sk#7, 5) : : +- *(6) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) : : +- *(6) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] : : +- *(6) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt index e3c9d92e2..3686a996b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt @@ -14,19 +14,19 @@ CollectLimit BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] BroadcastHashJoin [cs_item_sk,item_sk] - Project [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] + Project [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] + Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [item_sk] Filter [count(1)] - HashAggregate [count(1),d_date,i_item_sk,substring(i_item_desc, 1, 30),count] [count(1),item_sk,count(1),count] + HashAggregate [d_date,count,i_item_sk,count(1),substring(i_item_desc, 1, 30)] [count(1),item_sk,count(1),count] InputAdapter Exchange [substring(i_item_desc, 1, 30),i_item_sk,d_date] #3 WholeStageCodegen - HashAggregate [i_item_desc,count,d_date,i_item_sk,substring(i_item_desc, 1, 30),count] [count,substring(i_item_desc, 1, 30),count] + HashAggregate [d_date,count,i_item_desc,i_item_sk,count,substring(i_item_desc, 1, 30)] [count,substring(i_item_desc, 1, 30),count] Project [d_date,i_item_sk,i_item_desc] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_item_sk,d_date] @@ -62,7 +62,7 @@ CollectLimit InputAdapter Exchange [c_customer_sk] #10 WholeStageCodegen - HashAggregate [sum,c_customer_sk,ss_sales_price,sum,ss_quantity] [sum,sum] + HashAggregate [sum,ss_quantity,c_customer_sk,ss_sales_price,sum] [sum,sum] Project [ss_quantity,ss_sales_price,c_customer_sk] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] @@ -86,7 +86,7 @@ CollectLimit InputAdapter Exchange [c_customer_sk] #7 WholeStageCodegen - HashAggregate [sum,sum,c_customer_sk,ss_sales_price,ss_quantity] [sum,sum] + HashAggregate [ss_quantity,sum,c_customer_sk,ss_sales_price,sum] [sum,sum] Project [ss_quantity,ss_sales_price,c_customer_sk] BroadcastHashJoin [ss_customer_sk,c_customer_sk] Project [ss_customer_sk,ss_quantity,ss_sales_price] @@ -111,9 +111,9 @@ CollectLimit BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] BroadcastHashJoin [ws_item_sk,item_sk] - Project [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] InputAdapter ReusedExchange [item_sk] [item_sk] #2 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt index 462549469..134e9552e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt @@ -2,7 +2,7 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,sales#3 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,sales#3]) +- Union :- *(14) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2)))]) - : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 200) + : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 5) : +- *(13) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2)))]) : +- *(13) Project [cs_quantity#4, cs_list_price#5, c_first_name#2, c_last_name#1] : +- *(13) BroadcastHashJoin [cs_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight @@ -18,7 +18,7 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ : : : : +- *(4) Project [item_sk#12] : : : : +- *(4) Filter (count(1)#13 > 4) : : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[count(1)]) - : : : : +- Exchange hashpartitioning(substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17, 200) + : : : : +- Exchange hashpartitioning(substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17, 5) : : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#14, 1, 30) AS substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[partial_count(1)]) : : : : +- *(3) Project [d_date#17, i_item_sk#16, i_item_desc#14] : : : : +- *(3) BroadcastHashJoin [ss_item_sk#18], [i_item_sk#16], Inner, BuildRight @@ -37,13 +37,13 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ : : : : +- *(2) FileScan parquet default.item[i_item_sk#16,i_item_desc#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(7) Project [c_customer_sk#9 AS c_customer_sk#9#10] - : : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3144 as decimal(32,6)))), DecimalType(38,8)))) - : : : : +- Subquery subquery3144 + : : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery5384 as decimal(32,6)))), DecimalType(38,8)))) + : : : : +- Subquery subquery5384 : : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#24)]) : : : : +- Exchange SinglePartition : : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#24)]) : : : : +- *(4) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) - : : : : +- Exchange hashpartitioning(c_customer_sk#9, 200) + : : : : +- Exchange hashpartitioning(c_customer_sk#9, 5) : : : : +- *(3) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) : : : : +- *(3) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight @@ -61,7 +61,7 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ : : : : +- *(2) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- *(7) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) - : : : +- Exchange hashpartitioning(c_customer_sk#9, 200) + : : : +- Exchange hashpartitioning(c_customer_sk#9, 5) : : : +- *(6) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) : : : +- *(6) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] : : : +- *(6) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight @@ -83,7 +83,7 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ : +- *(12) Filter ((((isnotnull(d_year#20) && isnotnull(d_moy#26)) && (d_year#20 = 2000)) && (d_moy#26 = 2)) && isnotnull(d_date_sk#7)) : +- *(12) FileScan parquet default.date_dim[d_date_sk#7,d_year#20,d_moy#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct +- *(28) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 200) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 5) +- *(27) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) +- *(27) Project [ws_quantity#27, ws_list_price#28, c_first_name#2, c_last_name#1] +- *(27) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#7], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt index ead663422..b8e3c71f2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt @@ -5,27 +5,27 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales] InputAdapter Exchange [c_last_name,c_first_name] #1 WholeStageCodegen - HashAggregate [cs_list_price,sum,sum,c_last_name,cs_quantity,c_first_name] [sum,sum] + HashAggregate [cs_quantity,sum,c_last_name,cs_list_price,sum,c_first_name] [sum,sum] Project [cs_quantity,cs_list_price,c_first_name,c_last_name] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,c_last_name,cs_sold_date_sk,c_first_name,cs_list_price] + Project [cs_quantity,cs_list_price,cs_sold_date_sk,c_last_name,c_first_name] BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] BroadcastHashJoin [cs_item_sk,item_sk] - Project [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] + Project [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] + Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [item_sk] Filter [count(1)] - HashAggregate [d_date,count,i_item_sk,count(1),substring(i_item_desc, 1, 30)] [count(1),item_sk,count(1),count] + HashAggregate [count,d_date,substring(i_item_desc, 1, 30),i_item_sk,count(1)] [count(1),item_sk,count(1),count] InputAdapter Exchange [substring(i_item_desc, 1, 30),i_item_sk,d_date] #3 WholeStageCodegen - HashAggregate [i_item_desc,d_date,count,count,i_item_sk,substring(i_item_desc, 1, 30)] [count,substring(i_item_desc, 1, 30),count] + HashAggregate [count,d_date,count,i_item_desc,substring(i_item_desc, 1, 30),i_item_sk] [count,substring(i_item_desc, 1, 30),count] Project [d_date,i_item_sk,i_item_desc] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_item_sk,d_date] @@ -61,7 +61,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales] InputAdapter Exchange [c_customer_sk] #10 WholeStageCodegen - HashAggregate [sum,sum,c_customer_sk,ss_sales_price,ss_quantity] [sum,sum] + HashAggregate [sum,ss_quantity,c_customer_sk,sum,ss_sales_price] [sum,sum] Project [ss_quantity,ss_sales_price,c_customer_sk] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] @@ -85,7 +85,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales] InputAdapter Exchange [c_customer_sk] #7 WholeStageCodegen - HashAggregate [sum,c_customer_sk,ss_sales_price,sum,ss_quantity] [sum,sum] + HashAggregate [ss_quantity,c_customer_sk,sum,ss_sales_price,sum] [sum,sum] Project [ss_quantity,ss_sales_price,c_customer_sk] BroadcastHashJoin [ss_customer_sk,c_customer_sk] Project [ss_customer_sk,ss_quantity,ss_sales_price] @@ -117,17 +117,17 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales] InputAdapter Exchange [c_last_name,c_first_name] #14 WholeStageCodegen - HashAggregate [sum,c_last_name,ws_list_price,c_first_name,sum,ws_quantity] [sum,sum] + HashAggregate [sum,c_last_name,ws_quantity,sum,c_first_name,ws_list_price] [sum,sum] Project [ws_quantity,ws_list_price,c_first_name,c_last_name] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_quantity,c_last_name,ws_list_price,ws_sold_date_sk,c_first_name] + Project [ws_list_price,c_last_name,c_first_name,ws_quantity,ws_sold_date_sk] BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] BroadcastHashJoin [ws_item_sk,item_sk] - Project [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] InputAdapter ReusedExchange [item_sk] [item_sk] #2 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt index f1e56aef9..1103d6e02 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt @@ -1,12 +1,12 @@ == Physical Plan == *(8) Project [c_last_name#1, c_first_name#2, s_store_name#3, paid#4] -+- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery3246 as decimal(33,8)))) - : +- Subquery subquery3246 ++- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery5486 as decimal(33,8)))) + : +- Subquery subquery5486 : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) : +- Exchange SinglePartition : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight @@ -42,10 +42,10 @@ : +- *(5) Filter isnotnull(ca_zip#18) : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 200) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 5) +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt index 08aae40a6..71544ba9b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt @@ -7,25 +7,25 @@ WholeStageCodegen InputAdapter Exchange #8 WholeStageCodegen - HashAggregate [sum,count,count,netpaid,sum] [sum,count,sum,count] - HashAggregate [i_units,i_color,i_size,s_state,sum(UnscaledValue(ss_net_paid)),sum,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [count,sum,netpaid,count,sum] [sum,count,sum,count] + HashAggregate [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,sum,sum(UnscaledValue(ss_net_paid)),i_color,c_first_name,i_units,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #9 + Exchange [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] #9 WholeStageCodegen - HashAggregate [i_units,i_color,i_size,ss_net_paid,s_state,sum,i_manager_id,c_last_name,sum,i_current_price,c_first_name,s_store_name,ca_state] [sum,sum] - Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + HashAggregate [i_current_price,sum,i_manager_id,s_store_name,c_last_name,s_state,i_size,ss_net_paid,sum,i_color,c_first_name,i_units,ca_state] [sum,sum] + Project [i_manager_id,i_units,ca_state,i_current_price,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + Project [i_manager_id,c_birth_country,i_units,i_current_price,s_zip,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + Project [i_manager_id,ss_customer_sk,i_units,i_current_price,s_zip,i_size,s_store_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + Project [ss_customer_sk,ss_item_sk,s_zip,s_store_name,ss_net_paid,s_state] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -37,13 +37,13 @@ WholeStageCodegen WholeStageCodegen Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + Scan parquet default.store [s_store_sk,s_zip,s_store_name,s_market_id,s_state] [s_store_sk,s_zip,s_store_name,s_market_id,s_state] InputAdapter BroadcastExchange #10 WholeStageCodegen - Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Project [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] Filter [i_item_sk] - Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Scan parquet default.item [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] InputAdapter BroadcastExchange #6 WholeStageCodegen @@ -56,29 +56,29 @@ WholeStageCodegen Project [ca_state,ca_zip,ca_country] Filter [ca_zip] Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] - HashAggregate [c_first_name,s_store_name,c_last_name,sum(netpaid),sum] [sum(netpaid),paid,sum(netpaid),sum] + HashAggregate [c_last_name,c_first_name,s_store_name,sum,sum(netpaid)] [sum(netpaid),paid,sum(netpaid),sum] InputAdapter Exchange [c_last_name,c_first_name,s_store_name] #1 WholeStageCodegen - HashAggregate [c_first_name,s_store_name,c_last_name,sum,netpaid,sum] [sum,sum] - HashAggregate [i_units,i_color,i_size,sum,s_state,sum(UnscaledValue(ss_net_paid)),i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [c_last_name,c_first_name,sum,netpaid,s_store_name,sum] [sum,sum] + HashAggregate [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,sum(UnscaledValue(ss_net_paid)),i_color,c_first_name,sum,i_units,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #2 + Exchange [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] #2 WholeStageCodegen - HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,sum,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum,sum] - Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + HashAggregate [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,sum,ss_net_paid,i_color,c_first_name,sum,i_units,ca_state] [sum,sum] + Project [i_manager_id,i_units,ca_state,i_current_price,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + Project [i_manager_id,c_birth_country,i_units,i_current_price,s_zip,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + Project [i_manager_id,ss_customer_sk,i_units,i_current_price,s_zip,i_size,s_store_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + Project [ss_customer_sk,ss_item_sk,s_zip,s_store_name,ss_net_paid,s_state] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -90,13 +90,13 @@ WholeStageCodegen WholeStageCodegen Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + Scan parquet default.store [s_store_sk,s_zip,s_store_name,s_market_id,s_state] [s_store_sk,s_zip,s_store_name,s_market_id,s_state] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Project [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] Filter [i_color,i_item_sk] - Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Scan parquet default.item [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] InputAdapter BroadcastExchange #6 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt index fb2c9c939..8afda776c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt @@ -1,12 +1,12 @@ == Physical Plan == *(8) Project [c_last_name#1, c_first_name#2, s_store_name#3, paid#4] -+- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery3290 as decimal(33,8)))) - : +- Subquery subquery3290 ++- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery5530 as decimal(33,8)))) + : +- Subquery subquery5530 : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) : +- Exchange SinglePartition : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight @@ -42,10 +42,10 @@ : +- *(5) Filter isnotnull(ca_zip#18) : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 200) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 5) +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 200) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt index 87db312c3..3eca25bd7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt @@ -7,25 +7,25 @@ WholeStageCodegen InputAdapter Exchange #8 WholeStageCodegen - HashAggregate [count,sum,count,netpaid,sum] [sum,count,sum,count] - HashAggregate [i_units,sum,i_color,i_size,s_state,i_manager_id,c_last_name,sum(UnscaledValue(ss_net_paid)),i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [netpaid,count,sum,count,sum] [sum,count,sum,count] + HashAggregate [i_current_price,sum,sum(UnscaledValue(ss_net_paid)),i_manager_id,s_store_name,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #9 + Exchange [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] #9 WholeStageCodegen - HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,sum,s_store_name,ca_state] [sum,sum] - Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + HashAggregate [i_current_price,sum,i_manager_id,s_store_name,c_last_name,s_state,i_size,ss_net_paid,i_color,c_first_name,i_units,ca_state,sum] [sum,sum] + Project [i_manager_id,i_units,ca_state,i_current_price,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + Project [i_manager_id,c_birth_country,i_units,i_current_price,s_zip,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + Project [i_manager_id,ss_customer_sk,i_units,i_current_price,s_zip,i_size,s_store_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + Project [ss_customer_sk,ss_item_sk,s_zip,s_store_name,ss_net_paid,s_state] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -37,13 +37,13 @@ WholeStageCodegen WholeStageCodegen Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + Scan parquet default.store [s_store_sk,s_zip,s_store_name,s_market_id,s_state] [s_store_sk,s_zip,s_store_name,s_market_id,s_state] InputAdapter BroadcastExchange #10 WholeStageCodegen - Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Project [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] Filter [i_item_sk] - Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Scan parquet default.item [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] InputAdapter BroadcastExchange #6 WholeStageCodegen @@ -56,29 +56,29 @@ WholeStageCodegen Project [ca_state,ca_zip,ca_country] Filter [ca_zip] Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] - HashAggregate [c_first_name,s_store_name,c_last_name,sum(netpaid),sum] [sum(netpaid),paid,sum(netpaid),sum] + HashAggregate [c_last_name,c_first_name,sum,s_store_name,sum(netpaid)] [sum(netpaid),paid,sum(netpaid),sum] InputAdapter Exchange [c_last_name,c_first_name,s_store_name] #1 WholeStageCodegen - HashAggregate [c_first_name,s_store_name,c_last_name,netpaid,sum,sum] [sum,sum] - HashAggregate [i_units,sum,i_color,i_size,s_state,i_manager_id,c_last_name,sum(UnscaledValue(ss_net_paid)),i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [c_last_name,c_first_name,netpaid,sum,s_store_name,sum] [sum,sum] + HashAggregate [i_current_price,sum(UnscaledValue(ss_net_paid)),i_manager_id,s_store_name,sum,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #2 + Exchange [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] #2 WholeStageCodegen - HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,sum,s_store_name,ca_state] [sum,sum] - Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] + HashAggregate [i_current_price,i_manager_id,s_store_name,sum,c_last_name,s_state,i_size,ss_net_paid,sum,i_color,c_first_name,i_units,ca_state] [sum,sum] + Project [i_manager_id,i_units,ca_state,i_current_price,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] + Project [i_manager_id,c_birth_country,i_units,i_current_price,s_zip,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] + Project [i_manager_id,ss_customer_sk,i_units,i_current_price,s_zip,i_size,s_store_name,i_color,ss_net_paid,s_state] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] + Project [ss_customer_sk,ss_item_sk,s_zip,s_store_name,ss_net_paid,s_state] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -90,13 +90,13 @@ WholeStageCodegen WholeStageCodegen Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + Scan parquet default.store [s_store_sk,s_zip,s_store_name,s_market_id,s_state] [s_store_sk,s_zip,s_store_name,s_market_id,s_state] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Project [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] Filter [i_color,i_item_sk] - Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Scan parquet default.item [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] InputAdapter BroadcastExchange #6 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt index 0548dd397..afab62a0f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_store_id#3 ASC NULLS FIRST,s_store_name#4 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_store_id#3,s_store_name#4,store_sales_profit#5,store_returns_loss#6,catalog_sales_profit#7]) +- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[sum(UnscaledValue(ss_net_profit#8)), sum(UnscaledValue(sr_net_loss#9)), sum(UnscaledValue(cs_net_profit#10))]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4, 200) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4, 5) +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[partial_sum(UnscaledValue(ss_net_profit#8)), partial_sum(UnscaledValue(sr_net_loss#9)), partial_sum(UnscaledValue(cs_net_profit#10))]) +- *(8) Project [ss_net_profit#8, sr_net_loss#9, cs_net_profit#10, s_store_id#3, s_store_name#4, i_item_id#1, i_item_desc#2] +- *(8) BroadcastHashJoin [ss_item_sk#11], [i_item_sk#12], Inner, BuildRight @@ -22,12 +22,12 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_des : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_net_profit#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : : +- *(2) Filter ((isnotnull(cs_item_sk#24) && isnotnull(cs_bill_customer_sk#23)) && isnotnull(cs_sold_date_sk#15)) + : : : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#15,cs_bill_customer_sk#23,cs_item_sk#24,cs_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [d_date_sk#20] : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 4)) && (d_year#29 = 2001)) && isnotnull(d_date_sk#20)) @@ -38,8 +38,8 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_des : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [d_date_sk#16] - : : +- *(5) Filter (((((isnotnull(d_year#32) && isnotnull(d_moy#33)) && (d_moy#33 >= 4)) && (d_moy#33 <= 10)) && (d_year#32 = 2001)) && isnotnull(d_date_sk#16)) - : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32,d_moy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct + : : +- *(5) Filter (((((isnotnull(d_moy#32) && isnotnull(d_year#33)) && (d_moy#32 >= 4)) && (d_moy#32 <= 10)) && (d_year#33 = 2001)) && isnotnull(d_date_sk#16)) + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#33,d_moy#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] : +- *(6) Filter isnotnull(s_store_sk#14) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt index 20dec06b1..4d8bbb2d8 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt @@ -1,38 +1,38 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,store_sales_profit,catalog_sales_profit,s_store_id,store_returns_loss,s_store_name] +TakeOrderedAndProject [s_store_id,s_store_name,store_sales_profit,i_item_desc,catalog_sales_profit,i_item_id,store_returns_loss] WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum,sum,s_store_id,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit)),s_store_name,sum] [sum,store_sales_profit,sum,catalog_sales_profit,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),store_returns_loss,sum(UnscaledValue(ss_net_profit)),sum] + HashAggregate [sum(UnscaledValue(ss_net_profit)),s_store_id,sum(UnscaledValue(cs_net_profit)),s_store_name,sum(UnscaledValue(sr_net_loss)),sum,sum,i_item_desc,i_item_id,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),store_sales_profit,sum,sum,catalog_sales_profit,sum,store_returns_loss] InputAdapter Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum,sum,s_store_id,cs_net_profit,sum,sum,sum,s_store_name,sum,sr_net_loss,ss_net_profit] [sum,sum,sum,sum,sum,sum] - Project [s_store_id,s_store_name,ss_net_profit,cs_net_profit,i_item_desc,sr_net_loss,i_item_id] + HashAggregate [s_store_id,s_store_name,sum,sum,cs_net_profit,ss_net_profit,sum,sum,sum,i_item_desc,sr_net_loss,i_item_id,sum] [sum,sum,sum,sum,sum,sum] + Project [sr_net_loss,s_store_name,i_item_id,s_store_id,ss_net_profit,cs_net_profit,i_item_desc] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_id,s_store_name,ss_item_sk,ss_net_profit,cs_net_profit,sr_net_loss] + Project [ss_item_sk,sr_net_loss,s_store_name,s_store_id,ss_net_profit,cs_net_profit] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_net_profit,cs_net_profit,sr_net_loss] + Project [ss_item_sk,sr_net_loss,ss_store_sk,ss_net_profit,cs_net_profit] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,sr_net_loss] + Project [ss_item_sk,sr_net_loss,cs_sold_date_sk,ss_store_sk,ss_net_profit,cs_net_profit] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,sr_returned_date_sk,sr_net_loss] + Project [ss_item_sk,sr_net_loss,cs_sold_date_sk,ss_store_sk,sr_returned_date_sk,ss_net_profit,cs_net_profit] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,ss_sold_date_sk,sr_returned_date_sk,sr_net_loss] + Project [ss_item_sk,sr_net_loss,cs_sold_date_sk,ss_sold_date_sk,ss_store_sk,sr_returned_date_sk,ss_net_profit,cs_net_profit] BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [ss_item_sk,ss_store_sk,sr_customer_sk,sr_item_sk,ss_net_profit,ss_sold_date_sk,sr_returned_date_sk,sr_net_loss] - BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] - Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] + Project [sr_item_sk,ss_item_sk,sr_net_loss,ss_sold_date_sk,ss_store_sk,sr_customer_sk,sr_returned_date_sk,ss_net_profit] + BroadcastHashJoin [ss_customer_sk,sr_item_sk,ss_item_sk,sr_ticket_number,sr_customer_sk,ss_ticket_number] + Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_net_profit] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_sold_date_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_net_profit] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] - Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] + Project [sr_ticket_number,sr_item_sk,sr_net_loss,sr_customer_sk,sr_returned_date_sk] + Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_ticket_number,sr_item_sk,sr_net_loss,sr_customer_sk,sr_returned_date_sk] [sr_ticket_number,sr_item_sk,sr_net_loss,sr_customer_sk,sr_returned_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_bill_customer_sk,cs_sold_date_sk] Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] InputAdapter BroadcastExchange #4 @@ -50,7 +50,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,store_sales_profit,catalog_sales_pr BroadcastExchange #6 WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] + Filter [d_year,d_moy,d_date_sk] Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt index ac72aff5b..5937dee3e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,agg1#2,agg2#3,agg3#4,agg4#5]) +- *(6) HashAggregate(keys=[i_item_id#1], functions=[avg(cast(cs_quantity#6 as bigint)), avg(UnscaledValue(cs_list_price#7)), avg(UnscaledValue(cs_coupon_amt#8)), avg(UnscaledValue(cs_sales_price#9))]) - +- Exchange hashpartitioning(i_item_id#1, 200) + +- Exchange hashpartitioning(i_item_id#1, 5) +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_avg(cast(cs_quantity#6 as bigint)), partial_avg(UnscaledValue(cs_list_price#7)), partial_avg(UnscaledValue(cs_coupon_amt#8)), partial_avg(UnscaledValue(cs_sales_price#9))]) +- *(5) Project [cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8, i_item_id#1] +- *(5) BroadcastHashJoin [cs_promo_sk#10], [p_promo_sk#11], Inner, BuildRight @@ -16,8 +16,8 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[ : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#14,cs_bill_cdemo_sk#16,cs_item_sk#12,cs_promo_sk#10,cs_quantity#6,cs_list_price#7,cs_sales_price#9,cs_coupon_amt#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_pro..., ReadSchema: struct + : : : +- *(1) Filter ((((((isnotnull(cd_education_status#18) && isnotnull(cd_gender#19)) && isnotnull(cd_marital_status#20)) && (cd_gender#19 = M)) && (cd_marital_status#20 = S)) && (cd_education_status#18 = College)) && isnotnull(cd_demo_sk#17)) + : : : +- *(1) FileScan parquet default.customer_demographics[cd_demo_sk#17,cd_gender#19,cd_marital_status#20,cd_education_status#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_gender), IsNotNull(cd_marital_status), EqualTo(cd_g..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_date_sk#15] : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt index da9dc6cdf..667950cad 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt @@ -1,26 +1,26 @@ -TakeOrderedAndProject [i_item_id,agg3,agg1,agg2,agg4] +TakeOrderedAndProject [agg2,agg1,agg3,i_item_id,agg4] WholeStageCodegen - HashAggregate [i_item_id,count,avg(UnscaledValue(cs_list_price)),sum,sum,count,sum,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_coupon_amt)),count,avg(cast(cs_quantity as bigint)),sum,count] [count,avg(UnscaledValue(cs_list_price)),sum,sum,count,sum,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_coupon_amt)),agg3,count,agg1,agg2,agg4,avg(cast(cs_quantity as bigint)),sum,count] + HashAggregate [count,avg(UnscaledValue(cs_coupon_amt)),count,sum,sum,count,avg(cast(cs_quantity as bigint)),sum,i_item_id,sum,count,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_list_price))] [count,avg(UnscaledValue(cs_coupon_amt)),count,sum,agg2,sum,count,agg1,avg(cast(cs_quantity as bigint)),sum,agg3,sum,count,avg(UnscaledValue(cs_sales_price)),agg4,avg(UnscaledValue(cs_list_price))] InputAdapter Exchange [i_item_id] #1 WholeStageCodegen - HashAggregate [cs_list_price,cs_coupon_amt,i_item_id,count,count,sum,sum,count,sum,sum,count,count,count,sum,sum,sum,count,cs_sales_price,cs_quantity,sum,count] [count,count,sum,sum,count,sum,sum,count,count,count,sum,sum,sum,count,sum,count] - Project [cs_coupon_amt,cs_quantity,cs_sales_price,cs_list_price,i_item_id] + HashAggregate [count,cs_quantity,count,count,count,sum,cs_sales_price,sum,cs_coupon_amt,count,count,cs_list_price,count,sum,sum,i_item_id,sum,count,sum,sum,sum] [count,count,count,count,sum,sum,count,count,count,sum,sum,sum,count,sum,sum,sum] + Project [cs_quantity,cs_list_price,cs_coupon_amt,i_item_id,cs_sales_price] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_list_price,i_item_id] + Project [cs_quantity,cs_list_price,cs_promo_sk,cs_coupon_amt,i_item_id,cs_sales_price] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_item_sk,cs_list_price] + Project [cs_quantity,cs_list_price,cs_item_sk,cs_promo_sk,cs_coupon_amt,cs_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] + Project [cs_quantity,cs_list_price,cs_item_sk,cs_promo_sk,cs_sold_date_sk,cs_coupon_amt,cs_sales_price] BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] + Project [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_item_sk,cs_promo_sk,cs_sold_date_sk,cs_coupon_amt,cs_sales_price] Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_item_sk,cs_promo_sk] - Scan parquet default.catalog_sales [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_item_sk,cs_promo_sk,cs_sold_date_sk,cs_coupon_amt,cs_sales_price] [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_item_sk,cs_promo_sk,cs_sold_date_sk,cs_coupon_amt,cs_sales_price] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [cd_demo_sk] - Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + Filter [cd_education_status,cd_gender,cd_marital_status,cd_demo_sk] Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt index 8d5a61224..5f523556c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,s_state#2 ASC NULLS FIRST], output=[i_item_id#1,s_state#2,g_state#3,agg1#4,agg2#5,agg3#6,agg4#7]) +- *(6) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[avg(cast(ss_quantity#9 as bigint)), avg(UnscaledValue(ss_list_price#10)), avg(UnscaledValue(ss_coupon_amt#11)), avg(UnscaledValue(ss_sales_price#12))]) - +- Exchange hashpartitioning(i_item_id#1, s_state#2, spark_grouping_id#8, 200) + +- Exchange hashpartitioning(i_item_id#1, s_state#2, spark_grouping_id#8, 5) +- *(5) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[partial_avg(cast(ss_quantity#9 as bigint)), partial_avg(UnscaledValue(ss_list_price#10)), partial_avg(UnscaledValue(ss_coupon_amt#11)), partial_avg(UnscaledValue(ss_sales_price#12))]) +- *(5) Expand [List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, s_state#14, 0), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, null, 1), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, null, null, 3)], [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#1, s_state#2, spark_grouping_id#8] +- *(5) Project [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#15 AS i_item_id#13, s_state#16 AS s_state#14] @@ -17,8 +17,8 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,s_state#2 : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_item_sk#17,ss_cdemo_sk#23,ss_store_sk#19,ss_quantity#9,ss_list_price#10,ss_sales_price#12,ss_coupon_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- *(1) Filter ((((((isnotnull(cd_education_status#25) && isnotnull(cd_gender#26)) && isnotnull(cd_marital_status#27)) && (cd_gender#26 = M)) && (cd_marital_status#27 = S)) && (cd_education_status#25 = College)) && isnotnull(cd_demo_sk#24)) + : : : +- *(1) FileScan parquet default.customer_demographics[cd_demo_sk#24,cd_gender#26,cd_marital_status#27,cd_education_status#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_gender), IsNotNull(cd_marital_status), EqualTo(cd_g..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_date_sk#22] : : +- *(2) Filter ((isnotnull(d_year#28) && (d_year#28 = 2002)) && isnotnull(d_date_sk#22)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt index 5eead43da..a4169a745 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt @@ -1,27 +1,27 @@ -TakeOrderedAndProject [agg4,i_item_id,s_state,agg3,g_state,agg1,agg2] +TakeOrderedAndProject [g_state,i_item_id,s_state,agg4,agg1,agg3,agg2] WholeStageCodegen - HashAggregate [i_item_id,sum,count,count,avg(UnscaledValue(ss_list_price)),sum,count,avg(cast(ss_quantity as bigint)),sum,s_state,count,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),sum,spark_grouping_id] [agg4,sum,count,count,avg(UnscaledValue(ss_list_price)),sum,count,avg(cast(ss_quantity as bigint)),sum,agg3,count,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),sum,g_state,agg1,agg2] + HashAggregate [avg(cast(ss_quantity as bigint)),sum,spark_grouping_id,i_item_id,count,s_state,count,sum,avg(UnscaledValue(ss_list_price)),sum,count,avg(UnscaledValue(ss_sales_price)),sum,count,avg(UnscaledValue(ss_coupon_amt))] [g_state,avg(cast(ss_quantity as bigint)),sum,count,count,agg4,agg1,agg3,sum,avg(UnscaledValue(ss_list_price)),sum,count,avg(UnscaledValue(ss_sales_price)),sum,agg2,count,avg(UnscaledValue(ss_coupon_amt))] InputAdapter Exchange [i_item_id,s_state,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [i_item_id,sum,sum,count,sum,count,count,sum,sum,count,sum,ss_coupon_amt,count,ss_sales_price,sum,ss_list_price,count,s_state,count,count,ss_quantity,sum,spark_grouping_id] [sum,sum,count,sum,count,count,sum,sum,count,sum,count,sum,count,count,count,sum] - Expand [i_item_id,ss_coupon_amt,ss_sales_price,ss_list_price,ss_quantity,s_state] - Project [i_item_id,ss_coupon_amt,s_state,ss_sales_price,ss_list_price,ss_quantity] + HashAggregate [sum,sum,spark_grouping_id,i_item_id,count,s_state,ss_quantity,count,count,ss_coupon_amt,count,sum,sum,sum,count,count,sum,sum,sum,ss_sales_price,count,ss_list_price,count] [sum,sum,count,count,count,count,sum,sum,sum,count,count,sum,sum,sum,count,count] + Expand [ss_quantity,ss_coupon_amt,i_item_id,s_state,ss_sales_price,ss_list_price] + Project [ss_quantity,ss_coupon_amt,s_state,ss_sales_price,i_item_id,ss_list_price] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,s_state,ss_sales_price] + Project [ss_list_price,ss_sales_price,ss_item_sk,ss_quantity,ss_coupon_amt,s_state] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price] + Project [ss_list_price,ss_sales_price,ss_item_sk,ss_quantity,ss_store_sk,ss_coupon_amt] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + Project [ss_list_price,ss_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] + Project [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] Filter [ss_cdemo_sk,ss_sold_date_sk,ss_store_sk,ss_item_sk] - Scan parquet default.store_sales [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [cd_demo_sk] - Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + Filter [cd_education_status,cd_gender,cd_marital_status,cd_demo_sk] Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt index 2679a8bf5..b76609a84 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt @@ -9,7 +9,7 @@ CollectLimit 100 : : : : : +- Exchange SinglePartition : : : : : +- *(2) HashAggregate(keys=[], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1), partial_count(distinct ss_list_price#1)]) : : : : : +- *(2) HashAggregate(keys=[ss_list_price#1], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1)]) - : : : : : +- Exchange hashpartitioning(ss_list_price#1, 200) + : : : : : +- Exchange hashpartitioning(ss_list_price#1, 5) : : : : : +- *(1) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) : : : : : +- *(1) Project [ss_list_price#1] : : : : : +- *(1) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 0)) && (ss_quantity#2 <= 5)) && ((((ss_list_price#1 >= 8.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 18.00)) || ((ss_coupon_amt#3 >= 459.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1459.00))) || ((ss_wholesale_cost#4 >= 57.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 77.00)))) @@ -19,7 +19,7 @@ CollectLimit 100 : : : : +- Exchange SinglePartition : : : : +- *(5) HashAggregate(keys=[], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1), partial_count(distinct ss_list_price#1)]) : : : : +- *(5) HashAggregate(keys=[ss_list_price#1], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1)]) - : : : : +- Exchange hashpartitioning(ss_list_price#1, 200) + : : : : +- Exchange hashpartitioning(ss_list_price#1, 5) : : : : +- *(4) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) : : : : +- *(4) Project [ss_list_price#1] : : : : +- *(4) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 6)) && (ss_quantity#2 <= 10)) && ((((ss_list_price#1 >= 90.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 100.00)) || ((ss_coupon_amt#3 >= 2323.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 3323.00))) || ((ss_wholesale_cost#4 >= 31.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 51.00)))) @@ -29,7 +29,7 @@ CollectLimit 100 : : : +- Exchange SinglePartition : : : +- *(8) HashAggregate(keys=[], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1), partial_count(distinct ss_list_price#1)]) : : : +- *(8) HashAggregate(keys=[ss_list_price#1], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1)]) - : : : +- Exchange hashpartitioning(ss_list_price#1, 200) + : : : +- Exchange hashpartitioning(ss_list_price#1, 5) : : : +- *(7) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) : : : +- *(7) Project [ss_list_price#1] : : : +- *(7) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 11)) && (ss_quantity#2 <= 15)) && ((((ss_list_price#1 >= 142.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 152.00)) || ((ss_coupon_amt#3 >= 12214.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 13214.00))) || ((ss_wholesale_cost#4 >= 79.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 99.00)))) @@ -39,7 +39,7 @@ CollectLimit 100 : : +- Exchange SinglePartition : : +- *(11) HashAggregate(keys=[], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1), partial_count(distinct ss_list_price#1)]) : : +- *(11) HashAggregate(keys=[ss_list_price#1], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1)]) - : : +- Exchange hashpartitioning(ss_list_price#1, 200) + : : +- Exchange hashpartitioning(ss_list_price#1, 5) : : +- *(10) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) : : +- *(10) Project [ss_list_price#1] : : +- *(10) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 16)) && (ss_quantity#2 <= 20)) && ((((ss_list_price#1 >= 135.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 145.00)) || ((ss_coupon_amt#3 >= 6071.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 7071.00))) || ((ss_wholesale_cost#4 >= 38.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 58.00)))) @@ -49,7 +49,7 @@ CollectLimit 100 : +- Exchange SinglePartition : +- *(14) HashAggregate(keys=[], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1), partial_count(distinct ss_list_price#1)]) : +- *(14) HashAggregate(keys=[ss_list_price#1], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1)]) - : +- Exchange hashpartitioning(ss_list_price#1, 200) + : +- Exchange hashpartitioning(ss_list_price#1, 5) : +- *(13) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) : +- *(13) Project [ss_list_price#1] : +- *(13) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 21)) && (ss_quantity#2 <= 25)) && ((((ss_list_price#1 >= 122.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 132.00)) || ((ss_coupon_amt#3 >= 836.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1836.00))) || ((ss_wholesale_cost#4 >= 17.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 37.00)))) @@ -59,7 +59,7 @@ CollectLimit 100 +- Exchange SinglePartition +- *(17) HashAggregate(keys=[], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1), partial_count(distinct ss_list_price#1)]) +- *(17) HashAggregate(keys=[ss_list_price#1], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1)]) - +- Exchange hashpartitioning(ss_list_price#1, 200) + +- Exchange hashpartitioning(ss_list_price#1, 5) +- *(16) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) +- *(16) Project [ss_list_price#1] +- *(16) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 26)) && (ss_quantity#2 <= 30)) && ((((ss_list_price#1 >= 154.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 164.00)) || ((ss_coupon_amt#3 >= 7326.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 8326.00))) || ((ss_wholesale_cost#4 >= 7.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 27.00)))) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt index 2b1d35702..a64d8fda6 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt @@ -5,91 +5,91 @@ CollectLimit BroadcastNestedLoopJoin BroadcastNestedLoopJoin WholeStageCodegen - HashAggregate [count(ss_list_price),sum,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count] [count(ss_list_price),sum,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),B1_CNTD,B1_LP,B1_CNT,count,count] + HashAggregate [count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,count(ss_list_price),count] [B1_CNTD,B1_CNT,count,count(ss_list_price),B1_LP,avg(UnscaledValue(ss_list_price)),sum,count,count(ss_list_price),count] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [count(ss_list_price),count,sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),ss_list_price,count,count] [count(ss_list_price),count,sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,count] - HashAggregate [count(ss_list_price),sum,sum,count,count,avg(UnscaledValue(ss_list_price)),ss_list_price,count,count] [count(ss_list_price),sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count,count] + HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,sum,count(ss_list_price),count,ss_list_price,count] [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,count,sum,count(ss_list_price),count,count] + HashAggregate [count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,sum,count,ss_list_price,count] [count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,sum,count,count] InputAdapter Exchange [ss_list_price] #2 WholeStageCodegen - HashAggregate [count(ss_list_price),sum,count,avg(UnscaledValue(ss_list_price)),ss_list_price,count] [count(ss_list_price),sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count,count] + HashAggregate [count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,sum,ss_list_price,count] [count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,sum,count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #3 WholeStageCodegen - HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),sum] [count,B2_CNT,count,count(ss_list_price),B2_LP,avg(UnscaledValue(ss_list_price)),count,B2_CNTD,count(ss_list_price),sum] + HashAggregate [count,avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),count(ss_list_price),count,sum] [B2_LP,count,avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),B2_CNT,count(ss_list_price),count,B2_CNTD,sum] InputAdapter Exchange #4 WholeStageCodegen - HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,ss_list_price,sum,count,count,count(ss_list_price),sum] [count,count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,sum,count,count,count(ss_list_price),sum] - HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,ss_list_price,sum,count,sum] [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,sum,count,sum] + HashAggregate [count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),count,sum,sum,ss_list_price,count,count] [count,count,avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),count(ss_list_price),count,sum,sum,count,count] + HashAggregate [count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,sum,sum,ss_list_price,count] [count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,sum,sum,count] InputAdapter Exchange [ss_list_price] #5 WholeStageCodegen - HashAggregate [count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),ss_list_price,sum,count] [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,sum,count,sum] + HashAggregate [count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,ss_list_price,count] [count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,sum,sum,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #6 WholeStageCodegen - HashAggregate [count(ss_list_price),count,count,count,count(ss_list_price),sum,avg(UnscaledValue(ss_list_price))] [count(ss_list_price),B3_CNTD,B3_CNT,count,B3_LP,count,count,count(ss_list_price),sum,avg(UnscaledValue(ss_list_price))] + HashAggregate [count,avg(UnscaledValue(ss_list_price)),sum,count,count(ss_list_price),count,count(ss_list_price)] [B3_CNTD,count,avg(UnscaledValue(ss_list_price)),sum,count,B3_CNT,B3_LP,count(ss_list_price),count,count(ss_list_price)] InputAdapter Exchange #7 WholeStageCodegen - HashAggregate [count(ss_list_price),sum,count,count,ss_list_price,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] [count(ss_list_price),sum,count,count,count,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] - HashAggregate [sum,count,ss_list_price,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] [sum,count,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] + HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count,count,count,ss_list_price,count(ss_list_price),count,count,count(ss_list_price),sum] [count,avg(UnscaledValue(ss_list_price)),sum,count,count,count,count(ss_list_price),count,count,count(ss_list_price),sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count,count,count,ss_list_price,count,count(ss_list_price),sum] [avg(UnscaledValue(ss_list_price)),sum,count,count,count,count,count(ss_list_price),sum] InputAdapter Exchange [ss_list_price] #8 WholeStageCodegen - HashAggregate [sum,ss_list_price,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price))] [sum,count,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,ss_list_price,count(ss_list_price),sum] [avg(UnscaledValue(ss_list_price)),sum,count,count,count,count,count(ss_list_price),sum] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #9 WholeStageCodegen - HashAggregate [count,count,count(ss_list_price),count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum] [count,count,count(ss_list_price),count,avg(UnscaledValue(ss_list_price)),B4_LP,count(ss_list_price),B4_CNTD,sum,B4_CNT] + HashAggregate [count,sum,count,avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),count(ss_list_price)] [B4_CNT,B4_CNTD,count,sum,count,avg(UnscaledValue(ss_list_price)),count,B4_LP,count(ss_list_price),count(ss_list_price)] InputAdapter Exchange #10 WholeStageCodegen - HashAggregate [count,count,count(ss_list_price),count,count,count,avg(UnscaledValue(ss_list_price)),ss_list_price,count(ss_list_price),sum,sum] [count,count,count,count(ss_list_price),count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,sum] - HashAggregate [count,count,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price)),ss_list_price,sum,sum] [count,count,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price)),sum,sum] + HashAggregate [sum,count,count,sum,count,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),count(ss_list_price),ss_list_price] [sum,count,count,count,sum,count,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),count(ss_list_price)] + HashAggregate [sum,count,sum,count,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price] [sum,count,sum,count,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price)] InputAdapter Exchange [ss_list_price] #11 WholeStageCodegen - HashAggregate [count,count(ss_list_price),count,avg(UnscaledValue(ss_list_price)),ss_list_price,sum] [count,count,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price)),sum,sum] + HashAggregate [sum,count,avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),ss_list_price] [sum,count,sum,count,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price)] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #12 WholeStageCodegen - HashAggregate [count(ss_list_price),count,sum,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count] [count(ss_list_price),count,B5_LP,sum,avg(UnscaledValue(ss_list_price)),B5_CNT,count(ss_list_price),count,B5_CNTD,count] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,sum,count(ss_list_price),count(ss_list_price),count] [avg(UnscaledValue(ss_list_price)),count,B5_CNT,B5_CNTD,count,sum,count(ss_list_price),count(ss_list_price),count,B5_LP] InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [count(ss_list_price),sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,ss_list_price,count,count,count] [count(ss_list_price),count,sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,count,count,count] - HashAggregate [sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,ss_list_price,count,count,count] [sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,count,count] + HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count,sum,count(ss_list_price),count,count(ss_list_price),count,ss_list_price,count,count] [avg(UnscaledValue(ss_list_price)),count,sum,count,sum,count(ss_list_price),count,count(ss_list_price),count,count,count] + HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count,sum,count(ss_list_price),count,ss_list_price,count,count] [avg(UnscaledValue(ss_list_price)),sum,count,sum,count(ss_list_price),count,count,count] InputAdapter Exchange [ss_list_price] #14 WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,ss_list_price,count] [sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,count,count] + HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),ss_list_price,count,count] [avg(UnscaledValue(ss_list_price)),sum,count,sum,count(ss_list_price),count,count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #15 WholeStageCodegen - HashAggregate [count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,count(ss_list_price),sum] [count,B6_CNT,B6_LP,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,B6_CNTD,count(ss_list_price),sum] + HashAggregate [count,sum,count(ss_list_price),count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count] [count,B6_CNTD,B6_LP,sum,count(ss_list_price),count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),B6_CNT,count] InputAdapter Exchange #16 WholeStageCodegen - HashAggregate [sum,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,ss_list_price,count(ss_list_price),count,count,count,sum] [count,sum,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,count(ss_list_price),count,count,count,sum] - HashAggregate [sum,avg(UnscaledValue(ss_list_price)),count,count,ss_list_price,count(ss_list_price),count,count,sum] [sum,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),count,count,sum] + HashAggregate [count,count,count,sum,count(ss_list_price),count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,count,ss_list_price] [count,count,count,sum,count(ss_list_price),count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,count] + HashAggregate [count,count,count,sum,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,ss_list_price] [count,count,count,sum,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count] InputAdapter Exchange [ss_list_price] #17 WholeStageCodegen - HashAggregate [sum,avg(UnscaledValue(ss_list_price)),ss_list_price,count(ss_list_price),count,count] [sum,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),count,count,sum] + HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,ss_list_price] [count,count,count,sum,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt index 48094eb90..f4df69036 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_store_id#3 ASC NULLS FIRST,s_store_name#4 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_store_id#3,s_store_name#4,store_sales_quantity#5,store_returns_quantity#6,catalog_sales_quantity#7]) +- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[sum(cast(ss_quantity#8 as bigint)), sum(cast(sr_return_quantity#9 as bigint)), sum(cast(cs_quantity#10 as bigint))]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4, 200) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4, 5) +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[partial_sum(cast(ss_quantity#8 as bigint)), partial_sum(cast(sr_return_quantity#9 as bigint)), partial_sum(cast(cs_quantity#10 as bigint))]) +- *(8) Project [ss_quantity#8, sr_return_quantity#9, cs_quantity#10, s_store_id#3, s_store_name#4, i_item_id#1, i_item_desc#2] +- *(8) BroadcastHashJoin [ss_item_sk#11], [i_item_sk#12], Inner, BuildRight @@ -22,12 +22,12 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_des : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_quantity#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : : +- *(2) Filter ((isnotnull(cs_item_sk#24) && isnotnull(cs_bill_customer_sk#23)) && isnotnull(cs_sold_date_sk#15)) + : : : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#15,cs_bill_customer_sk#23,cs_item_sk#24,cs_quantity#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [d_date_sk#20] : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 9)) && (d_year#29 = 1999)) && isnotnull(d_date_sk#20)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt index 64fa8afa9..12dc662c5 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt @@ -1,38 +1,38 @@ -TakeOrderedAndProject [store_returns_quantity,i_item_id,i_item_desc,store_sales_quantity,s_store_id,catalog_sales_quantity,s_store_name] +TakeOrderedAndProject [s_store_id,store_returns_quantity,s_store_name,catalog_sales_quantity,i_item_desc,i_item_id,store_sales_quantity] WholeStageCodegen - HashAggregate [sum(cast(cs_quantity as bigint)),i_item_id,i_item_desc,sum,sum(cast(ss_quantity as bigint)),sum,sum(cast(sr_return_quantity as bigint)),s_store_id,sum,s_store_name] [sum(cast(cs_quantity as bigint)),store_returns_quantity,sum,sum(cast(ss_quantity as bigint)),store_sales_quantity,sum,sum(cast(sr_return_quantity as bigint)),sum,catalog_sales_quantity] + HashAggregate [s_store_id,s_store_name,sum,i_item_desc,sum(cast(ss_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum,sum(cast(cs_quantity as bigint)),i_item_id,sum] [store_returns_quantity,sum,catalog_sales_quantity,sum(cast(ss_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum,sum(cast(cs_quantity as bigint)),sum,store_sales_quantity] InputAdapter Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 WholeStageCodegen - HashAggregate [i_item_id,sum,i_item_desc,sum,sum,sum,s_store_id,sr_return_quantity,sum,sum,ss_quantity,cs_quantity,s_store_name] [sum,sum,sum,sum,sum,sum] - Project [s_store_id,s_store_name,ss_quantity,cs_quantity,sr_return_quantity,i_item_desc,i_item_id] + HashAggregate [cs_quantity,s_store_id,sr_return_quantity,s_store_name,sum,ss_quantity,sum,sum,i_item_desc,sum,sum,i_item_id,sum] [sum,sum,sum,sum,sum,sum] + Project [cs_quantity,ss_quantity,s_store_name,i_item_id,sr_return_quantity,s_store_id,i_item_desc] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_id,s_store_name,ss_quantity,ss_item_sk,cs_quantity,sr_return_quantity] + Project [cs_quantity,ss_item_sk,ss_quantity,s_store_name,sr_return_quantity,s_store_id] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity] + Project [cs_quantity,ss_item_sk,ss_quantity,sr_return_quantity,ss_store_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk] + Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,sr_return_quantity,ss_store_sk] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,sr_returned_date_sk] + Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,sr_return_quantity,ss_store_sk,sr_returned_date_sk] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_returned_date_sk] + Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_return_quantity,ss_store_sk,sr_returned_date_sk] BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [ss_quantity,ss_item_sk,ss_store_sk,sr_return_quantity,sr_customer_sk,sr_item_sk,ss_sold_date_sk,sr_returned_date_sk] - BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] - Project [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Project [sr_item_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,ss_store_sk,sr_customer_sk,sr_returned_date_sk] + BroadcastHashJoin [ss_customer_sk,sr_item_sk,ss_item_sk,sr_ticket_number,sr_customer_sk,ss_ticket_number] + Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_sold_date_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] - Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + Project [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] + Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_bill_customer_sk,cs_sold_date_sk] Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] InputAdapter BroadcastExchange #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt index 8d08a8b03..55173cb25 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,sum_agg#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,sum_agg#2]) +- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) - +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 200) + +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 5) +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt index a8432c52f..752b64b70 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt @@ -1,10 +1,10 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand] WholeStageCodegen - HashAggregate [d_year,i_brand,sum(UnscaledValue(ss_ext_sales_price)),i_brand_id,sum] [brand_id,sum(UnscaledValue(ss_ext_sales_price)),sum_agg,sum,brand] + HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),i_brand_id,sum,d_year,i_brand] [sum(UnscaledValue(ss_ext_sales_price)),brand,sum,sum_agg,brand_id] InputAdapter Exchange [d_year,i_brand,i_brand_id] #1 WholeStageCodegen - HashAggregate [sum,d_year,i_brand,ss_ext_sales_price,i_brand_id,sum] [sum,sum] + HashAggregate [i_brand_id,sum,sum,d_year,ss_ext_sales_price,i_brand] [sum,sum] Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [d_year,ss_item_sk,ss_ext_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt index 758cb7159..d96b8f5b0 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt @@ -8,7 +8,7 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salu : : +- *(11) BroadcastHashJoin [ctr_state#18], [ctr_state#18#19], Inner, BuildRight, (cast(ctr_total_return#13 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) : : :- *(11) Filter isnotnull(ctr_total_return#13) : : : +- *(11) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) - : : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 200) + : : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 5) : : : +- *(3) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) : : : +- *(3) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] : : : +- *(3) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight @@ -28,10 +28,10 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salu : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) : : +- *(8) HashAggregate(keys=[ctr_state#18], functions=[avg(ctr_total_return#13)]) - : : +- Exchange hashpartitioning(ctr_state#18, 200) + : : +- Exchange hashpartitioning(ctr_state#18, 5) : : +- *(7) HashAggregate(keys=[ctr_state#18], functions=[partial_avg(ctr_total_return#13)]) : : +- *(7) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) - : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 200) + : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 5) : : +- *(6) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) : : +- *(6) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] : : +- *(6) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt index 1be056d32..877ba4e42 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt @@ -1,17 +1,17 @@ -TakeOrderedAndProject [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth_month,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_birth_year,c_last_review_date] +TakeOrderedAndProject [c_last_review_date,ctr_total_return,c_email_address,c_birth_month,c_login,c_last_name,c_customer_id,c_birth_country,c_birth_day,c_birth_year,c_preferred_cust_flag,c_first_name,c_salutation] WholeStageCodegen - Project [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth_month,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_birth_year,c_last_review_date] + Project [c_last_review_date,ctr_total_return,c_email_address,c_birth_month,c_login,c_last_name,c_customer_id,c_birth_country,c_birth_day,c_birth_year,c_preferred_cust_flag,c_first_name,c_salutation] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_preferred_cust_flag,c_current_addr_sk,ctr_total_return,c_email_address,c_customer_id,c_birth_year,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] + Project [c_birth_day,c_customer_id,c_birth_country,c_login,ctr_total_return,c_birth_year,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_salutation,c_current_addr_sk,c_last_review_date,c_birth_month] BroadcastHashJoin [ctr_customer_sk,c_customer_sk] Project [ctr_customer_sk,ctr_total_return] BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] Filter [ctr_total_return] - HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [ctr_total_return,ctr_state,sum(UnscaledValue(wr_return_amt)),sum,ctr_customer_sk] + HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [sum(UnscaledValue(wr_return_amt)),ctr_state,sum,ctr_total_return,ctr_customer_sk] InputAdapter Exchange [wr_returning_customer_sk,ca_state] #1 WholeStageCodegen - HashAggregate [wr_return_amt,sum,wr_returning_customer_sk,sum,ca_state] [sum,sum] + HashAggregate [sum,sum,wr_returning_customer_sk,wr_return_amt,ca_state] [sum,sum] Project [wr_returning_customer_sk,wr_return_amt,ca_state] BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] @@ -35,16 +35,16 @@ TakeOrderedAndProject [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth BroadcastExchange #4 WholeStageCodegen Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [ctr_state,sum,count,avg(ctr_total_return)] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),sum,ctr_state,avg(ctr_total_return),count] + HashAggregate [ctr_state,sum,count,avg(ctr_total_return)] [sum,avg(ctr_total_return),count,ctr_state,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] InputAdapter Exchange [ctr_state] #5 WholeStageCodegen - HashAggregate [ctr_state,count,sum,ctr_total_return,count,sum] [sum,count,sum,count] + HashAggregate [sum,ctr_state,sum,ctr_total_return,count,count] [sum,count,sum,count] HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [sum(UnscaledValue(wr_return_amt)),ctr_state,ctr_total_return,sum] InputAdapter Exchange [wr_returning_customer_sk,ca_state] #6 WholeStageCodegen - HashAggregate [sum,wr_return_amt,sum,wr_returning_customer_sk,ca_state] [sum,sum] + HashAggregate [sum,sum,wr_returning_customer_sk,wr_return_amt,ca_state] [sum,sum] Project [wr_returning_customer_sk,wr_return_amt,ca_state] BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] @@ -59,9 +59,9 @@ TakeOrderedAndProject [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] + Project [c_birth_day,c_customer_id,c_birth_country,c_login,c_birth_year,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address,c_salutation,c_current_addr_sk,c_last_review_date,c_birth_month] Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] + Scan parquet default.customer [c_birth_day,c_customer_id,c_birth_country,c_login,c_birth_year,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address,c_salutation,c_current_addr_sk,c_last_review_date,c_birth_month] [c_birth_day,c_customer_id,c_birth_country,c_login,c_birth_year,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address,c_salutation,c_current_addr_sk,c_last_review_date,c_birth_month] InputAdapter BroadcastExchange #8 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt index 22739d4c7..991349307 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt @@ -1,6 +1,6 @@ == Physical Plan == *(25) Sort [ca_county#1 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(ca_county#1 ASC NULLS FIRST, 200) ++- Exchange rangepartitioning(ca_county#1 ASC NULLS FIRST, 5) +- *(24) Project [ca_county#1, d_year#2, CheckOverflow((promote_precision(web_sales#3) / promote_precision(web_sales#4)), DecimalType(37,20)) AS web_q1_q2_increase#5, CheckOverflow((promote_precision(store_sales#6) / promote_precision(store_sales#7)), DecimalType(37,20)) AS store_q1_q2_increase#8, CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) AS web_q2_q3_increase#10, CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) AS store_q2_q3_increase#12] +- *(24) BroadcastHashJoin [ca_county#13], [ca_county#14], Inner, BuildRight, (CASE WHEN (web_sales#3 > 0.00) THEN CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#6 > 0.00) THEN CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) ELSE null END) :- *(24) Project [ca_county#1, d_year#2, store_sales#7, store_sales#6, store_sales#11, ca_county#13, web_sales#4, web_sales#3] @@ -10,7 +10,7 @@ : : : +- *(24) BroadcastHashJoin [ca_county#16], [ca_county#17], Inner, BuildRight : : : :- *(24) BroadcastHashJoin [ca_county#1], [ca_county#16], Inner, BuildRight : : : : :- *(24) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : : : +- Exchange hashpartitioning(ca_county#1, d_qoy#18, d_year#2, 200) + : : : : : +- Exchange hashpartitioning(ca_county#1, d_qoy#18, d_year#2, 5) : : : : : +- *(3) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) : : : : : +- *(3) Project [ss_ext_sales_price#19, d_year#2, d_qoy#18, ca_county#1] : : : : : +- *(3) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#21], Inner, BuildRight @@ -29,7 +29,7 @@ : : : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#21,ca_county#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : : : : +- *(7) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : : +- Exchange hashpartitioning(ca_county#16, d_qoy#24, d_year#25, 200) + : : : : +- Exchange hashpartitioning(ca_county#16, d_qoy#24, d_year#25, 5) : : : : +- *(6) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) : : : : +- *(6) Project [ss_ext_sales_price#19, d_year#25, d_qoy#24, ca_county#16] : : : : +- *(6) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#26], Inner, BuildRight @@ -45,7 +45,7 @@ : : : : +- ReusedExchange [ca_address_sk#26, ca_county#16], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : : : +- *(11) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : +- Exchange hashpartitioning(ca_county#17, d_qoy#28, d_year#29, 200) + : : : +- Exchange hashpartitioning(ca_county#17, d_qoy#28, d_year#29, 5) : : : +- *(10) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) : : : +- *(10) Project [ss_ext_sales_price#19, d_year#29, d_qoy#28, ca_county#17] : : : +- *(10) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#30], Inner, BuildRight @@ -61,7 +61,7 @@ : : : +- ReusedExchange [ca_address_sk#30, ca_county#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : : +- *(15) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) - : : +- Exchange hashpartitioning(ca_county#13, d_qoy#32, d_year#33, 200) + : : +- Exchange hashpartitioning(ca_county#13, d_qoy#32, d_year#33, 5) : : +- *(14) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) : : +- *(14) Project [ws_ext_sales_price#34, d_year#33, d_qoy#32, ca_county#13] : : +- *(14) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#36], Inner, BuildRight @@ -74,7 +74,7 @@ : : +- ReusedExchange [ca_address_sk#36, ca_county#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : +- *(19) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) - : +- Exchange hashpartitioning(ca_county#15, d_qoy#39, d_year#40, 200) + : +- Exchange hashpartitioning(ca_county#15, d_qoy#39, d_year#40, 5) : +- *(18) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) : +- *(18) Project [ws_ext_sales_price#34, d_year#40, d_qoy#39, ca_county#15] : +- *(18) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#41], Inner, BuildRight @@ -87,7 +87,7 @@ : +- ReusedExchange [ca_address_sk#41, ca_county#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) +- *(23) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) - +- Exchange hashpartitioning(ca_county#14, d_qoy#43, d_year#44, 200) + +- Exchange hashpartitioning(ca_county#14, d_qoy#43, d_year#44, 5) +- *(22) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) +- *(22) Project [ws_ext_sales_price#34, d_year#44, d_qoy#43, ca_county#14] +- *(22) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#45], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt index 78c6383a4..ce6e8c51c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt @@ -3,19 +3,19 @@ WholeStageCodegen InputAdapter Exchange [ca_county] #1 WholeStageCodegen - Project [web_sales,store_sales,store_sales,web_sales,d_year,store_sales,web_sales,ca_county] - BroadcastHashJoin [web_sales,ca_county,store_sales,ca_county,store_sales,web_sales] - Project [d_year,store_sales,store_sales,ca_county,web_sales,ca_county,store_sales,web_sales] - BroadcastHashJoin [ca_county,ca_county,store_sales,web_sales,store_sales,web_sales] + Project [d_year,store_sales,web_sales,web_sales,store_sales,ca_county,store_sales,web_sales] + BroadcastHashJoin [web_sales,ca_county,store_sales,store_sales,ca_county,web_sales] + Project [web_sales,ca_county,store_sales,web_sales,d_year,store_sales,store_sales,ca_county] + BroadcastHashJoin [store_sales,web_sales,store_sales,ca_county,ca_county,web_sales] BroadcastHashJoin [ca_county,ca_county] - Project [d_year,store_sales,store_sales,ca_county,store_sales] + Project [store_sales,d_year,store_sales,store_sales,ca_county] BroadcastHashJoin [ca_county,ca_county] BroadcastHashJoin [ca_county,ca_county] - HashAggregate [d_year,d_qoy,ca_county,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price)),d_year,d_qoy,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #2 WholeStageCodegen - HashAggregate [d_year,d_qoy,sum,ca_county,sum,ss_ext_sales_price] [sum,sum] + HashAggregate [sum,sum,d_year,ss_ext_sales_price,d_qoy,ca_county] [sum,sum] Project [ss_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ss_addr_sk,ca_address_sk] Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] @@ -38,11 +38,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #5 WholeStageCodegen - HashAggregate [d_qoy,d_year,sum(UnscaledValue(ss_ext_sales_price)),sum,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),ca_county,d_year,d_qoy,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #6 WholeStageCodegen - HashAggregate [sum,d_qoy,d_year,sum,ss_ext_sales_price,ca_county] [sum,sum] + HashAggregate [ca_county,d_year,d_qoy,ss_ext_sales_price,sum,sum] [sum,sum] Project [ss_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ss_addr_sk,ca_address_sk] Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] @@ -61,11 +61,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #8 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price)),d_year,d_qoy,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),ca_county,d_year,d_qoy,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #9 WholeStageCodegen - HashAggregate [sum,d_year,d_qoy,sum,ss_ext_sales_price,ca_county] [sum,sum] + HashAggregate [sum,ca_county,ss_ext_sales_price,d_year,d_qoy,sum] [sum,sum] Project [ss_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ss_addr_sk,ca_address_sk] Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] @@ -84,11 +84,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #11 WholeStageCodegen - HashAggregate [sum,ca_county,d_qoy,d_year,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + HashAggregate [ca_county,d_year,sum(UnscaledValue(ws_ext_sales_price)),sum,d_qoy] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #12 WholeStageCodegen - HashAggregate [sum,ws_ext_sales_price,ca_county,d_qoy,d_year,sum] [sum,sum] + HashAggregate [ca_county,d_year,sum,d_qoy,ws_ext_sales_price,sum] [sum,sum] Project [ws_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] @@ -103,11 +103,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #13 WholeStageCodegen - HashAggregate [d_qoy,ca_county,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + HashAggregate [sum,sum(UnscaledValue(ws_ext_sales_price)),ca_county,d_qoy,d_year] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #14 WholeStageCodegen - HashAggregate [d_qoy,ca_county,d_year,ws_ext_sales_price,sum,sum] [sum,sum] + HashAggregate [sum,sum,ws_ext_sales_price,ca_county,d_qoy,d_year] [sum,sum] Project [ws_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] @@ -122,11 +122,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #15 WholeStageCodegen - HashAggregate [sum,d_qoy,ca_county,d_year,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + HashAggregate [sum,d_year,d_qoy,sum(UnscaledValue(ws_ext_sales_price)),ca_county] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #16 WholeStageCodegen - HashAggregate [sum,sum,d_qoy,ca_county,ws_ext_sales_price,d_year] [sum,sum] + HashAggregate [sum,d_year,d_qoy,ws_ext_sales_price,sum,ca_county] [sum,sum] Project [ws_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt index 26c877156..f677286fe 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt @@ -16,7 +16,7 @@ CollectLimit 100 : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) : +- *(4) HashAggregate(keys=[cs_item_sk#5], functions=[avg(UnscaledValue(cs_ext_discount_amt#7))]) - : +- Exchange hashpartitioning(cs_item_sk#5, 200) + : +- Exchange hashpartitioning(cs_item_sk#5, 5) : +- *(3) HashAggregate(keys=[cs_item_sk#5], functions=[partial_avg(UnscaledValue(cs_ext_discount_amt#7))]) : +- *(3) Project [cs_item_sk#5, cs_ext_discount_amt#7] : +- *(3) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt index 1da0b46e1..cd99b716a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt @@ -19,11 +19,11 @@ CollectLimit BroadcastExchange #2 WholeStageCodegen Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [cs_item_sk,sum,count,avg(UnscaledValue(cs_ext_discount_amt))] [avg(UnscaledValue(cs_ext_discount_amt)),(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),sum,count,cs_item_sk] + HashAggregate [cs_item_sk,sum,count,avg(UnscaledValue(cs_ext_discount_amt))] [count,avg(UnscaledValue(cs_ext_discount_amt)),sum,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),cs_item_sk] InputAdapter Exchange [cs_item_sk] #3 WholeStageCodegen - HashAggregate [cs_ext_discount_amt,count,sum,cs_item_sk,sum,count] [sum,count,sum,count] + HashAggregate [count,sum,cs_item_sk,sum,cs_ext_discount_amt,count] [sum,count,sum,count] Project [cs_item_sk,cs_ext_discount_amt] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] Project [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt index e71e76c27..30008ef45 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt @@ -1,11 +1,11 @@ == Physical Plan == *(7) Sort [c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST], true, 0 -+- Exchange rangepartitioning(c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST, 200) ++- Exchange rangepartitioning(c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST, 5) +- *(6) Project [c_last_name#1, c_first_name#2, c_salutation#3, c_preferred_cust_flag#4, ss_ticket_number#5, cnt#6] +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight :- *(6) Filter ((cnt#6 >= 15) && (cnt#6 <= 20)) : +- *(6) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[count(1)]) - : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#7, 200) + : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#7, 5) : +- *(4) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[partial_count(1)]) : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#5] : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt index 70572bb58..8518531e3 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt @@ -3,7 +3,7 @@ WholeStageCodegen InputAdapter Exchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] #1 WholeStageCodegen - Project [ss_ticket_number,c_salutation,cnt,c_last_name,c_preferred_cust_flag,c_first_name] + Project [cnt,ss_ticket_number,c_last_name,c_preferred_cust_flag,c_first_name,c_salutation] BroadcastHashJoin [ss_customer_sk,c_customer_sk] Filter [cnt] HashAggregate [ss_ticket_number,ss_customer_sk,count,count(1)] [count(1),cnt,count] @@ -17,9 +17,9 @@ WholeStageCodegen BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -41,6 +41,6 @@ WholeStageCodegen InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] + Project [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] Filter [c_customer_sk] - Scan parquet default.customer [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] + Scan parquet default.customer [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt index bce571c86..c79025c2f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[ca_state#1 ASC NULLS FIRST,cd_gender#2 ASC NULLS FIRST,cd_marital_status#3 ASC NULLS FIRST,aggOrder#4 ASC NULLS FIRST,cd_dep_employed_count#5 ASC NULLS FIRST,cd_dep_college_count#6 ASC NULLS FIRST], output=[ca_state#1,cd_gender#2,cd_marital_status#3,cnt1#7,min(cd_dep_count)#8,max(cd_dep_count)#9,avg(cd_dep_count)#10,cd_dep_employed_count#5,cnt2#11,min(cd_dep_employed_count)#12,max(cd_dep_employed_count)#13,avg(cd_dep_employed_count)#14,cd_dep_college_count#6,cnt3#15,min(cd_dep_college_count)#16,max(cd_dep_college_count)#17,avg(cd_dep_college_count)#18]) +- *(10) HashAggregate(keys=[ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6], functions=[count(1), min(cd_dep_count#19), max(cd_dep_count#19), avg(cast(cd_dep_count#19 as bigint)), min(cd_dep_employed_count#5), max(cd_dep_employed_count#5), avg(cast(cd_dep_employed_count#5 as bigint)), min(cd_dep_college_count#6), max(cd_dep_college_count#6), avg(cast(cd_dep_college_count#6 as bigint))]) - +- Exchange hashpartitioning(ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6, 200) + +- Exchange hashpartitioning(ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6, 5) +- *(9) HashAggregate(keys=[ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6], functions=[partial_count(1), partial_min(cd_dep_count#19), partial_max(cd_dep_count#19), partial_avg(cast(cd_dep_count#19 as bigint)), partial_min(cd_dep_employed_count#5), partial_max(cd_dep_employed_count#5), partial_avg(cast(cd_dep_employed_count#5 as bigint)), partial_min(cd_dep_college_count#6), partial_max(cd_dep_college_count#6), partial_avg(cast(cd_dep_college_count#6 as bigint))]) +- *(9) Project [ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6] +- *(9) BroadcastHashJoin [c_current_cdemo_sk#20], [cd_demo_sk#21], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt index 5fd7a355f..26426d01b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt @@ -1,11 +1,11 @@ -TakeOrderedAndProject [cnt3,max(cd_dep_employed_count),max(cd_dep_count),min(cd_dep_count),min(cd_dep_college_count),min(cd_dep_employed_count),max(cd_dep_college_count),ca_state,avg(cd_dep_count),avg(cd_dep_college_count),avg(cd_dep_employed_count),aggOrder,cd_dep_college_count,cnt2,cd_marital_status,cd_gender,cd_dep_employed_count,cnt1] +TakeOrderedAndProject [cd_dep_employed_count,max(cd_dep_employed_count),cnt2,max(cd_dep_count),cd_gender,cd_dep_college_count,avg(cd_dep_count),avg(cd_dep_college_count),max(cd_dep_college_count),cnt3,cd_marital_status,ca_state,aggOrder,avg(cd_dep_employed_count),min(cd_dep_employed_count),cnt1,min(cd_dep_count),min(cd_dep_college_count)] WholeStageCodegen - HashAggregate [avg(cast(cd_dep_employed_count as bigint)),min,min(cd_dep_college_count),sum,max(cd_dep_college_count),sum,sum,ca_state,count,max,count,min,avg(cast(cd_dep_college_count as bigint)),max(cd_dep_count),max,avg(cast(cd_dep_count as bigint)),min(cd_dep_employed_count),cd_dep_college_count,count(1),cd_dep_count,min(cd_dep_count),cd_marital_status,cd_gender,count,max(cd_dep_employed_count),count,max,cd_dep_employed_count,min] [cnt3,avg(cast(cd_dep_employed_count as bigint)),min,max(cd_dep_employed_count),min(cd_dep_college_count),sum,max(cd_dep_college_count),max(cd_dep_count),sum,min(cd_dep_count),sum,min(cd_dep_college_count),min(cd_dep_employed_count),max(cd_dep_college_count),count,avg(cd_dep_count),max,count,min,avg(cast(cd_dep_college_count as bigint)),max(cd_dep_count),avg(cd_dep_college_count),max,avg(cd_dep_employed_count),avg(cast(cd_dep_count as bigint)),min(cd_dep_employed_count),aggOrder,count(1),cnt2,min(cd_dep_count),count,max(cd_dep_employed_count),count,max,min,cnt1] + HashAggregate [count,min,min(cd_dep_college_count),cd_dep_employed_count,min(cd_dep_employed_count),max,cd_gender,cd_dep_college_count,count,cd_dep_count,min(cd_dep_count),count,max,avg(cast(cd_dep_employed_count as bigint)),cd_marital_status,max(cd_dep_college_count),min,max(cd_dep_count),sum,ca_state,max(cd_dep_employed_count),sum,avg(cast(cd_dep_college_count as bigint)),min,count(1),sum,max,count,avg(cast(cd_dep_count as bigint))] [count,min,min(cd_dep_college_count),max(cd_dep_employed_count),min(cd_dep_employed_count),cnt2,max(cd_dep_count),max,count,min(cd_dep_count),avg(cd_dep_count),count,max,avg(cd_dep_college_count),avg(cast(cd_dep_employed_count as bigint)),max(cd_dep_college_count),cnt3,max(cd_dep_college_count),min,max(cd_dep_count),sum,aggOrder,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum,avg(cast(cd_dep_college_count as bigint)),min,count(1),sum,min(cd_dep_employed_count),max,cnt1,min(cd_dep_count),min(cd_dep_college_count),count,avg(cast(cd_dep_count as bigint))] InputAdapter - Exchange [ca_state,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] #1 + Exchange [cd_dep_employed_count,cd_gender,cd_dep_college_count,cd_dep_count,cd_marital_status,ca_state] #1 WholeStageCodegen - HashAggregate [sum,min,sum,sum,min,sum,sum,sum,max,ca_state,max,count,count,max,count,count,min,count,min,max,max,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,count,count,min,count,max,cd_dep_employed_count,min] [sum,min,sum,sum,min,sum,sum,sum,max,max,count,count,max,count,count,min,count,min,max,max,count,count,min,count,max,min] - Project [cd_dep_college_count,ca_state,cd_dep_employed_count,cd_marital_status,cd_dep_count,cd_gender] + HashAggregate [count,min,max,min,cd_dep_employed_count,count,max,cd_gender,cd_dep_college_count,count,cd_dep_count,count,max,count,max,cd_marital_status,min,sum,ca_state,max,sum,min,sum,sum,min,sum,max,min,count,sum,count,count] [count,min,max,min,count,max,count,count,max,count,max,min,sum,max,sum,min,sum,sum,min,sum,max,min,count,sum,count,count] + Project [cd_gender,cd_dep_count,ca_state,cd_dep_college_count,cd_marital_status,cd_dep_employed_count] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk,ca_state] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] @@ -60,6 +60,6 @@ TakeOrderedAndProject [cnt3,max(cd_dep_employed_count),max(cd_dep_count),min(cd_ InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_gender] + Project [cd_gender,cd_dep_count,cd_dep_college_count,cd_marital_status,cd_demo_sk,cd_dep_employed_count] Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_gender] [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_gender] + Scan parquet default.customer_demographics [cd_gender,cd_dep_count,cd_dep_college_count,cd_marital_status,cd_demo_sk,cd_dep_employed_count] [cd_gender,cd_dep_count,cd_dep_college_count,cd_marital_status,cd_demo_sk,cd_dep_employed_count] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt index cf2d25ccb..a0ee3b344 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt @@ -3,9 +3,9 @@ TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WH +- *(7) Project [gross_margin#4, i_category#2, i_class#5, lochierarchy#1, rank_within_parent#3] +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 ASC NULLS FIRST] +- *(6) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(_w1#7, _w2#8, 200) + +- Exchange hashpartitioning(_w1#7, _w2#8, 5) +- *(5) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ss_net_profit#10)), sum(UnscaledValue(ss_ext_sales_price#11))]) - +- Exchange hashpartitioning(i_category#2, i_class#5, spark_grouping_id#9, 200) + +- Exchange hashpartitioning(i_category#2, i_class#5, spark_grouping_id#9, 5) +- *(4) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ss_net_profit#10)), partial_sum(UnscaledValue(ss_ext_sales_price#11))]) +- *(4) Expand [List(ss_ext_sales_price#11, ss_net_profit#10, i_category#12, i_class#13, 0), List(ss_ext_sales_price#11, ss_net_profit#10, i_category#12, null, 1), List(ss_ext_sales_price#11, ss_net_profit#10, null, null, 3)], [ss_ext_sales_price#11, ss_net_profit#10, i_category#2, i_class#5, spark_grouping_id#9] +- *(4) Project [ss_ext_sales_price#11, ss_net_profit#10, i_category#14 AS i_category#12, i_class#15 AS i_class#13] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt index ac9f32c0a..271b19e1b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [i_class,i_category,lochierarchy,gross_margin,rank_within_parent] +TakeOrderedAndProject [gross_margin,i_category,rank_within_parent,lochierarchy,i_class] WholeStageCodegen - Project [i_class,i_category,lochierarchy,gross_margin,rank_within_parent] + Project [gross_margin,i_category,rank_within_parent,lochierarchy,i_class] InputAdapter Window [_w3,_w1,_w2] WholeStageCodegen @@ -8,21 +8,21 @@ TakeOrderedAndProject [i_class,i_category,lochierarchy,gross_margin,rank_within_ InputAdapter Exchange [_w1,_w2] #1 WholeStageCodegen - HashAggregate [sum,i_class,i_category,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),spark_grouping_id] [_w2,sum,_w1,sum,lochierarchy,sum(UnscaledValue(ss_net_profit)),_w3,sum(UnscaledValue(ss_ext_sales_price)),gross_margin] + HashAggregate [i_category,sum,sum,sum(UnscaledValue(ss_net_profit)),i_class,sum(UnscaledValue(ss_ext_sales_price)),spark_grouping_id] [_w2,gross_margin,_w3,sum,_w1,sum,lochierarchy,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [i_category,i_class,spark_grouping_id] #2 WholeStageCodegen - HashAggregate [sum,sum,sum,i_class,i_category,sum,ss_ext_sales_price,spark_grouping_id,ss_net_profit] [sum,sum,sum,sum] + HashAggregate [i_category,sum,ss_net_profit,sum,ss_ext_sales_price,sum,i_class,sum,spark_grouping_id] [sum,sum,sum,sum] Expand [ss_ext_sales_price,ss_net_profit,i_category,i_class] Project [ss_ext_sales_price,ss_net_profit,i_category,i_class] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_class,ss_store_sk,ss_ext_sales_price,i_category,ss_net_profit] + Project [i_category,ss_store_sk,ss_ext_sales_price,i_class,ss_net_profit] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + Project [ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] [ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt index d9db47342..9ffa2c5ee 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,i_current_price#3]) +- *(5) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, i_current_price#3, 200) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, i_current_price#3, 5) +- *(4) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) +- *(4) Project [i_item_id#1, i_item_desc#2, i_current_price#3] +- *(4) BroadcastHashJoin [i_item_sk#4], [cs_item_sk#5], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt index afe830f9d..919606c5d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt @@ -9,11 +9,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] BroadcastHashJoin [i_item_sk,cs_item_sk] Project [i_item_sk,i_item_id,i_item_desc,i_current_price] BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [i_current_price,i_item_sk,inv_date_sk,i_item_desc,i_item_id] + Project [i_current_price,i_item_sk,inv_date_sk,i_item_id,i_item_desc] BroadcastHashJoin [i_item_sk,inv_item_sk] Project [i_item_sk,i_item_id,i_item_desc,i_current_price] Filter [i_current_price,i_manufact_id,i_item_sk] - Scan parquet default.item [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] + Scan parquet default.item [i_manufact_id,i_current_price,i_item_sk,i_item_id,i_item_desc] [i_manufact_id,i_current_price,i_item_sk,i_item_id,i_item_desc] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt index ac423d3c3..8072eda0e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt @@ -10,7 +10,7 @@ CollectLimit 100 : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftSemi, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 200) + : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 5) : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight @@ -29,7 +29,7 @@ CollectLimit 100 : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) - : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 200) + : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 5) : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight @@ -42,7 +42,7 @@ CollectLimit 100 : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) - +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 200) + +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 5) +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt index caec9c677..33d987376 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt @@ -7,10 +7,10 @@ CollectLimit HashAggregate [count,count] [count,count] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,d_date,c_first_name] + BroadcastHashJoin [c_first_name,d_date,c_last_name,d_date,c_last_name,c_first_name] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_first_name,d_date,d_date,c_last_name,c_last_name,c_first_name] + BroadcastHashJoin [d_date,d_date,c_last_name,c_first_name,c_first_name,c_last_name] HashAggregate [c_last_name,c_first_name,d_date] InputAdapter Exchange [c_last_name,c_first_name,d_date] #2 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt index 5d89d65da..5455e1849 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt @@ -1,11 +1,11 @@ == Physical Plan == *(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 200) ++- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 5) +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] : +- *(10) Filter (CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 200) + : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 5) : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight @@ -32,7 +32,7 @@ +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 200) + +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 5) +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt index 0fadf4ea4..1618f51b2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt @@ -1,19 +1,19 @@ WholeStageCodegen - Sort [d_moy,mean,i_item_sk,w_warehouse_sk,d_moy,mean,cov,cov] + Sort [mean,i_item_sk,mean,cov,d_moy,d_moy,cov,w_warehouse_sk] InputAdapter - Exchange [d_moy,mean,i_item_sk,w_warehouse_sk,d_moy,mean,cov,cov] #1 + Exchange [mean,i_item_sk,mean,cov,d_moy,d_moy,cov,w_warehouse_sk] #1 WholeStageCodegen BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] - Project [d_moy,mean,w_warehouse_sk,i_item_sk,stdev] + Project [w_warehouse_sk,i_item_sk,stdev,mean,d_moy] Filter [mean,stdev] - HashAggregate [m2,w_warehouse_sk,d_moy,sum,count,n,w_warehouse_name,avg,stddev_samp(cast(inv_quantity_on_hand as double)),i_item_sk,avg(cast(inv_quantity_on_hand as bigint))] [stdev,mean,m2,sum,count,n,avg,stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint))] + HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),w_warehouse_sk,count,d_moy,w_warehouse_name,avg,n,m2,sum,i_item_sk,stddev_samp(cast(inv_quantity_on_hand as double))] [avg(cast(inv_quantity_on_hand as bigint)),count,avg,n,mean,m2,sum,stdev,stddev_samp(cast(inv_quantity_on_hand as double))] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 WholeStageCodegen - HashAggregate [m2,w_warehouse_sk,d_moy,m2,sum,count,n,w_warehouse_name,sum,avg,i_item_sk,inv_quantity_on_hand,avg,count,n] [m2,m2,sum,count,n,sum,avg,avg,count,n] - Project [d_moy,inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name] + HashAggregate [sum,w_warehouse_sk,count,d_moy,inv_quantity_on_hand,w_warehouse_name,avg,n,m2,n,m2,sum,count,avg,i_item_sk] [sum,count,avg,n,m2,n,m2,sum,count,avg] + Project [w_warehouse_name,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,d_moy] BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name,inv_date_sk] + Project [w_warehouse_name,inv_quantity_on_hand,i_item_sk,inv_date_sk,w_warehouse_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] BroadcastHashJoin [inv_item_sk,i_item_sk] @@ -41,16 +41,16 @@ WholeStageCodegen InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [i_item_sk,d_moy,mean,stdev,w_warehouse_sk] + Project [d_moy,mean,i_item_sk,stdev,w_warehouse_sk] Filter [mean,stdev] - HashAggregate [m2,i_item_sk,sum,count,w_warehouse_name,n,d_moy,avg,stddev_samp(cast(inv_quantity_on_hand as double)),w_warehouse_sk,avg(cast(inv_quantity_on_hand as bigint))] [m2,sum,count,n,mean,avg,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,avg(cast(inv_quantity_on_hand as bigint))] + HashAggregate [w_warehouse_name,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,avg,n,w_warehouse_sk,m2,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg(cast(inv_quantity_on_hand as bigint)),mean,count,avg,n,m2,sum,stdev,stddev_samp(cast(inv_quantity_on_hand as double))] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 WholeStageCodegen - HashAggregate [m2,m2,i_item_sk,sum,count,w_warehouse_name,n,d_moy,sum,avg,w_warehouse_sk,inv_quantity_on_hand,avg,count,n] [m2,m2,sum,count,n,sum,avg,avg,count,n] - Project [w_warehouse_sk,inv_quantity_on_hand,w_warehouse_name,i_item_sk,d_moy] + HashAggregate [w_warehouse_name,sum,count,d_moy,i_item_sk,inv_quantity_on_hand,avg,n,w_warehouse_sk,m2,n,m2,sum,count,avg] [sum,count,avg,n,m2,n,m2,sum,count,avg] + Project [i_item_sk,w_warehouse_name,inv_quantity_on_hand,w_warehouse_sk,d_moy] BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [w_warehouse_sk,inv_quantity_on_hand,w_warehouse_name,i_item_sk,inv_date_sk] + Project [i_item_sk,w_warehouse_name,inv_quantity_on_hand,w_warehouse_sk,inv_date_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] BroadcastHashJoin [inv_item_sk,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt index bd660d199..3c845e77b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt @@ -1,11 +1,11 @@ == Physical Plan == *(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 200) ++- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 5) +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] : +- *(10) Filter ((CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) && (CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END > 1.5)) : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 200) + : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 5) : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight @@ -32,7 +32,7 @@ +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 200) + +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 5) +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt index 846b7cca1..83d28dd06 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt @@ -1,19 +1,19 @@ WholeStageCodegen - Sort [i_item_sk,mean,w_warehouse_sk,d_moy,mean,cov,d_moy,cov] + Sort [cov,mean,i_item_sk,d_moy,cov,w_warehouse_sk,d_moy,mean] InputAdapter - Exchange [i_item_sk,mean,w_warehouse_sk,d_moy,mean,cov,d_moy,cov] #1 + Exchange [cov,mean,i_item_sk,d_moy,cov,w_warehouse_sk,d_moy,mean] #1 WholeStageCodegen BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] - Project [d_moy,w_warehouse_sk,i_item_sk,stdev,mean] + Project [stdev,mean,w_warehouse_sk,i_item_sk,d_moy] Filter [mean,stdev] - HashAggregate [avg,w_warehouse_sk,d_moy,w_warehouse_name,m2,avg(cast(inv_quantity_on_hand as bigint)),i_item_sk,n,count,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg,m2,stdev,avg(cast(inv_quantity_on_hand as bigint)),n,count,mean,sum,stddev_samp(cast(inv_quantity_on_hand as double))] + HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),m2,count,w_warehouse_sk,avg,d_moy,w_warehouse_name,sum,stddev_samp(cast(inv_quantity_on_hand as double)),i_item_sk,n] [avg(cast(inv_quantity_on_hand as bigint)),m2,count,avg,mean,stdev,sum,stddev_samp(cast(inv_quantity_on_hand as double)),n] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 WholeStageCodegen - HashAggregate [avg,n,w_warehouse_sk,d_moy,count,w_warehouse_name,m2,sum,i_item_sk,inv_quantity_on_hand,m2,n,avg,count,sum] [avg,n,count,m2,sum,m2,n,avg,count,sum] - Project [d_moy,inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name] + HashAggregate [m2,sum,count,w_warehouse_sk,avg,d_moy,inv_quantity_on_hand,w_warehouse_name,m2,avg,n,sum,i_item_sk,n,count] [m2,sum,count,avg,m2,avg,n,sum,n,count] + Project [w_warehouse_name,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,d_moy] BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name,inv_date_sk] + Project [w_warehouse_name,inv_quantity_on_hand,i_item_sk,inv_date_sk,w_warehouse_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] BroadcastHashJoin [inv_item_sk,i_item_sk] @@ -41,16 +41,16 @@ WholeStageCodegen InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [mean,stdev,d_moy,w_warehouse_sk,i_item_sk] + Project [i_item_sk,d_moy,stdev,w_warehouse_sk,mean] Filter [mean,stdev] - HashAggregate [avg,m2,avg(cast(inv_quantity_on_hand as bigint)),d_moy,w_warehouse_sk,n,i_item_sk,count,w_warehouse_name,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg,stdev,m2,avg(cast(inv_quantity_on_hand as bigint)),n,count,sum,mean,stddev_samp(cast(inv_quantity_on_hand as double))] + HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),m2,count,avg,w_warehouse_name,sum,i_item_sk,stddev_samp(cast(inv_quantity_on_hand as double)),n,d_moy,w_warehouse_sk] [avg(cast(inv_quantity_on_hand as bigint)),m2,count,avg,mean,sum,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,n] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 WholeStageCodegen - HashAggregate [avg,n,count,m2,sum,d_moy,w_warehouse_sk,inv_quantity_on_hand,m2,n,i_item_sk,avg,count,w_warehouse_name,sum] [avg,n,count,m2,sum,m2,n,avg,count,sum] - Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,d_moy,w_warehouse_name] + HashAggregate [m2,sum,count,avg,inv_quantity_on_hand,m2,avg,w_warehouse_name,n,sum,i_item_sk,n,d_moy,w_warehouse_sk,count] [m2,sum,count,avg,m2,avg,n,sum,n,count] + Project [inv_quantity_on_hand,w_warehouse_sk,d_moy,i_item_sk,w_warehouse_name] BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,inv_date_sk,w_warehouse_name] + Project [inv_quantity_on_hand,w_warehouse_sk,inv_date_sk,i_item_sk,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] BroadcastHashJoin [inv_item_sk,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt index 7284720b7..16fa8a95c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt @@ -12,7 +12,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : : : :- Union : : : : : :- *(4) Filter (isnotnull(year_total#16) && (year_total#16 > 0.000000)) : : : : : : +- *(4) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) : : : : : : +- *(3) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) : : : : : : +- *(3) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight @@ -34,7 +34,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : : : : +- Union : : : : :- *(8) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) : : : : : +- *(7) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) : : : : : +- *(7) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] : : : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight @@ -55,7 +55,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : : :- LocalTableScan , [customer_id#18, year_total#10] : : : :- *(12) Filter (isnotnull(year_total#36) && (year_total#36 > 0.000000)) : : : : +- *(12) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) : : : : +- *(11) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) : : : : +- *(11) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] : : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight @@ -74,7 +74,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : +- Union : : :- LocalTableScan , [customer_id#15, year_total#11] : : :- *(16) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) : : : +- *(15) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) : : : +- *(15) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] : : : +- *(15) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight @@ -92,7 +92,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : :- LocalTableScan , [customer_id#35, year_total#36] : +- *(20) Filter (isnotnull(year_total#38) && (year_total#38 > 0.000000)) : +- *(20) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) : +- *(19) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) : +- *(19) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] : +- *(19) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight @@ -111,7 +111,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer :- LocalTableScan , [customer_id#9, year_total#13] :- LocalTableScan , [customer_id#35, year_total#36] +- *(24) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 200) + +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) +- *(23) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) +- *(23) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] +- *(23) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt index b055f5dbc..c03b8aee2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt @@ -1,36 +1,36 @@ -TakeOrderedAndProject [customer_email_address,customer_login,customer_first_name,customer_last_name,customer_birth_country,customer_id,customer_preferred_cust_flag] +TakeOrderedAndProject [customer_birth_country,customer_login,customer_last_name,customer_email_address,customer_first_name,customer_preferred_cust_flag,customer_id] WholeStageCodegen - Project [customer_email_address,customer_login,customer_first_name,customer_last_name,customer_birth_country,customer_id,customer_preferred_cust_flag] - BroadcastHashJoin [year_total,year_total,customer_id,year_total,year_total,customer_id] - Project [year_total,customer_birth_country,customer_preferred_cust_flag,year_total,customer_email_address,customer_first_name,year_total,customer_login,customer_id,customer_id,customer_last_name] + Project [customer_birth_country,customer_login,customer_last_name,customer_email_address,customer_first_name,customer_preferred_cust_flag,customer_id] + BroadcastHashJoin [year_total,customer_id,year_total,year_total,customer_id,year_total] + Project [year_total,customer_last_name,customer_first_name,customer_id,customer_login,customer_email_address,customer_id,customer_preferred_cust_flag,customer_birth_country,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] - Project [year_total,customer_birth_country,customer_preferred_cust_flag,customer_email_address,customer_first_name,year_total,customer_login,customer_id,customer_id,customer_last_name] - BroadcastHashJoin [year_total,year_total,year_total,year_total,customer_id,customer_id] - Project [customer_birth_country,customer_preferred_cust_flag,customer_email_address,customer_first_name,year_total,customer_login,customer_id,customer_id,customer_last_name,year_total,year_total] + Project [year_total,customer_last_name,customer_first_name,customer_id,customer_login,customer_email_address,customer_id,customer_preferred_cust_flag,customer_birth_country,year_total] + BroadcastHashJoin [year_total,year_total,customer_id,customer_id,year_total,year_total] + Project [year_total,year_total,customer_last_name,customer_first_name,customer_id,customer_login,year_total,customer_email_address,customer_id,customer_preferred_cust_flag,customer_birth_country] BroadcastHashJoin [customer_id,customer_id] BroadcastHashJoin [customer_id,customer_id] InputAdapter Union WholeStageCodegen Filter [year_total] - HashAggregate [d_year,c_login,c_birth_country,c_customer_id,c_last_name,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_preferred_cust_flag,sum,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [c_email_address,c_login,c_last_name,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name,sum] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #1 + Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #1 WholeStageCodegen - HashAggregate [ss_ext_wholesale_cost,d_year,ss_ext_discount_amt,c_login,sum,c_birth_country,c_customer_id,c_last_name,ss_ext_sales_price,c_preferred_cust_flag,sum,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,ss_ext_sales_price,c_last_name,c_first_name,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] + HashAggregate [c_email_address,ss_ext_list_price,c_login,c_last_name,c_customer_id,d_year,ss_ext_sales_price,c_birth_country,ss_ext_wholesale_cost,c_preferred_cust_flag,sum,c_first_name,sum,ss_ext_discount_amt] [sum,sum] + Project [d_year,c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,ss_ext_sales_price,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] + Project [c_customer_id,c_birth_country,c_login,ss_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] + Project [ss_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] [ss_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -43,52 +43,52 @@ TakeOrderedAndProject [customer_email_address,customer_login,customer_first_name BroadcastExchange #4 Union WholeStageCodegen - HashAggregate [d_year,c_login,c_birth_country,sum,c_customer_id,c_last_name,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_preferred_cust_flag,c_first_name,c_email_address] [customer_birth_country,customer_preferred_cust_flag,customer_email_address,customer_first_name,sum,customer_login,customer_id,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_last_name,year_total] + HashAggregate [sum,c_email_address,c_login,c_last_name,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] [sum,year_total,customer_last_name,customer_first_name,customer_id,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_login,customer_email_address,customer_preferred_cust_flag,customer_birth_country] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #5 + Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #5 WholeStageCodegen - HashAggregate [ss_ext_wholesale_cost,d_year,ss_ext_discount_amt,c_login,sum,c_birth_country,sum,c_customer_id,c_last_name,ss_ext_sales_price,c_preferred_cust_flag,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,ss_ext_sales_price,c_last_name,c_first_name,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] + HashAggregate [sum,c_email_address,ss_ext_list_price,c_login,c_last_name,c_customer_id,d_year,ss_ext_sales_price,c_birth_country,ss_ext_wholesale_cost,c_preferred_cust_flag,c_first_name,sum,ss_ext_discount_amt] [sum,sum] + Project [d_year,c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,ss_ext_sales_price,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] + Project [c_customer_id,c_birth_country,c_login,ss_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] InputAdapter - ReusedExchange [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] #2 + ReusedExchange [ss_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] [ss_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] #2 InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_date_sk,d_year] Filter [d_year,d_date_sk] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [customer_id,year_total,customer_preferred_cust_flag,customer_last_name,customer_email_address,customer_birth_country,customer_login,customer_first_name] [customer_id,year_total,customer_preferred_cust_flag,customer_last_name,customer_email_address,customer_birth_country,customer_login,customer_first_name] - LocalTableScan [customer_first_name,customer_birth_country,customer_preferred_cust_flag,customer_id,customer_email_address,customer_last_name,customer_login,year_total] [customer_first_name,customer_birth_country,customer_preferred_cust_flag,customer_id,customer_email_address,customer_last_name,customer_login,year_total] + LocalTableScan [year_total,customer_login,customer_id,customer_last_name,customer_email_address,customer_birth_country,customer_first_name,customer_preferred_cust_flag] [year_total,customer_login,customer_id,customer_last_name,customer_email_address,customer_birth_country,customer_first_name,customer_preferred_cust_flag] + LocalTableScan [year_total,customer_login,customer_email_address,customer_first_name,customer_id,customer_preferred_cust_flag,customer_last_name,customer_birth_country] [year_total,customer_login,customer_email_address,customer_first_name,customer_id,customer_preferred_cust_flag,customer_last_name,customer_birth_country] InputAdapter BroadcastExchange #7 Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [d_year,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [c_email_address,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),sum,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #8 + Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #8 WholeStageCodegen - HashAggregate [d_year,sum,c_login,sum,cs_ext_discount_amt,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,cs_ext_wholesale_cost,c_first_name,cs_ext_sales_price,c_email_address,cs_ext_list_price] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,cs_ext_discount_amt,c_login,c_last_name,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + HashAggregate [c_email_address,cs_ext_wholesale_cost,sum,sum,c_login,c_last_name,cs_ext_list_price,cs_ext_sales_price,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name,cs_ext_discount_amt] [sum,sum] + Project [cs_ext_list_price,d_year,c_customer_id,c_birth_country,cs_ext_wholesale_cost,c_login,cs_ext_sales_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,cs_ext_discount_amt] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,cs_ext_discount_amt,c_login,c_last_name,cs_sold_date_sk,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + Project [cs_ext_list_price,c_customer_id,c_birth_country,cs_ext_wholesale_cost,c_login,cs_ext_sales_price,cs_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,cs_ext_discount_amt] BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + Project [cs_ext_list_price,cs_bill_customer_sk,cs_ext_wholesale_cost,cs_ext_sales_price,cs_sold_date_sk,cs_ext_discount_amt] Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + Scan parquet default.catalog_sales [cs_ext_list_price,cs_bill_customer_sk,cs_ext_wholesale_cost,cs_ext_sales_price,cs_sold_date_sk,cs_ext_discount_amt] [cs_ext_list_price,cs_bill_customer_sk,cs_ext_wholesale_cost,cs_ext_sales_price,cs_sold_date_sk,cs_ext_discount_amt] InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 LocalTableScan [customer_id,year_total] [customer_id,year_total] @@ -97,20 +97,20 @@ TakeOrderedAndProject [customer_email_address,customer_login,customer_first_name Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [c_email_address,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name,sum] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #11 + Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #11 WholeStageCodegen - HashAggregate [d_year,sum,c_login,cs_ext_discount_amt,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,cs_ext_wholesale_cost,c_first_name,cs_ext_sales_price,c_email_address,cs_ext_list_price] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,cs_ext_discount_amt,c_login,c_last_name,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + HashAggregate [c_email_address,cs_ext_wholesale_cost,c_login,c_last_name,cs_ext_list_price,cs_ext_sales_price,sum,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name,sum,cs_ext_discount_amt] [sum,sum] + Project [cs_ext_list_price,d_year,c_customer_id,c_birth_country,cs_ext_wholesale_cost,c_login,cs_ext_sales_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,cs_ext_discount_amt] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,cs_ext_discount_amt,c_login,c_last_name,cs_sold_date_sk,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + Project [cs_ext_list_price,c_customer_id,c_birth_country,cs_ext_wholesale_cost,c_login,cs_ext_sales_price,cs_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,cs_ext_discount_amt] BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] InputAdapter - ReusedExchange [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] #9 + ReusedExchange [cs_ext_list_price,cs_bill_customer_sk,cs_ext_wholesale_cost,cs_ext_sales_price,cs_sold_date_sk,cs_ext_discount_amt] [cs_ext_list_price,cs_bill_customer_sk,cs_ext_wholesale_cost,cs_ext_sales_price,cs_sold_date_sk,cs_ext_discount_amt] #9 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 LocalTableScan [customer_id,year_total] [customer_id,year_total] @@ -121,24 +121,24 @@ TakeOrderedAndProject [customer_email_address,customer_login,customer_first_name LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [d_year,c_login,c_birth_country,c_customer_id,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),sum,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #13 + Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #13 WholeStageCodegen - HashAggregate [d_year,ws_ext_wholesale_cost,ws_ext_discount_amt,c_login,sum,ws_ext_sales_price,c_birth_country,c_customer_id,sum,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] + HashAggregate [c_email_address,ws_ext_list_price,c_login,c_last_name,ws_ext_wholesale_cost,ws_ext_discount_amt,c_customer_id,d_year,c_birth_country,sum,ws_ext_sales_price,sum,c_preferred_cust_flag,c_first_name] [sum,sum] + Project [d_year,c_customer_id,c_birth_country,c_login,ws_ext_list_price,ws_ext_wholesale_cost,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_sales_price,ws_ext_discount_amt] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] + Project [c_customer_id,c_birth_country,c_login,ws_ext_list_price,ws_ext_wholesale_cost,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] InputAdapter BroadcastExchange #14 WholeStageCodegen - Project [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] + Project [ws_bill_customer_sk,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 InputAdapter @@ -147,19 +147,19 @@ TakeOrderedAndProject [customer_email_address,customer_login,customer_first_name LocalTableScan [customer_id,year_total] [customer_id,year_total] LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [d_year,c_login,c_birth_country,c_customer_id,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,sum] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #16 + Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #16 WholeStageCodegen - HashAggregate [d_year,ws_ext_wholesale_cost,ws_ext_discount_amt,c_login,ws_ext_sales_price,c_birth_country,c_customer_id,sum,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address,sum] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] + HashAggregate [c_email_address,sum,ws_ext_list_price,c_login,c_last_name,ws_ext_wholesale_cost,ws_ext_discount_amt,c_customer_id,d_year,c_birth_country,sum,ws_ext_sales_price,c_preferred_cust_flag,c_first_name] [sum,sum] + Project [d_year,c_customer_id,c_birth_country,c_login,ws_ext_list_price,ws_ext_wholesale_cost,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_sales_price,ws_ext_discount_amt] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] + Project [c_customer_id,c_birth_country,c_login,ws_ext_list_price,ws_ext_wholesale_cost,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] InputAdapter - ReusedExchange [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] #14 + ReusedExchange [ws_bill_customer_sk,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] #14 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt index 7f0c5cf00..7bd7c9667 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[w_state#1 ASC NULLS FIRST,i_item_id#2 ASC NULLS FIRST], output=[w_state#1,i_item_id#2,sales_before#3,sales_after#4]) +- *(6) HashAggregate(keys=[w_state#1, i_item_id#2], functions=[sum(CASE WHEN (d_date#5 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), sum(CASE WHEN (d_date#5 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) - +- Exchange hashpartitioning(w_state#1, i_item_id#2, 200) + +- Exchange hashpartitioning(w_state#1, i_item_id#2, 5) +- *(5) HashAggregate(keys=[w_state#1, i_item_id#2], functions=[partial_sum(CASE WHEN (d_date#5 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#5 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) +- *(5) Project [cs_sales_price#6, cr_refunded_cash#7, w_state#1, i_item_id#2, d_date#5] +- *(5) BroadcastHashJoin [cs_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt index 2e480b9ea..44fc2c9ca 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt @@ -1,21 +1,21 @@ TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] WholeStageCodegen - HashAggregate [i_item_id,sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),w_state,sum] [sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sales_before,sales_after,sum] + HashAggregate [w_state,sum,i_item_id,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] [sales_before,sales_after,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] InputAdapter Exchange [w_state,i_item_id] #1 WholeStageCodegen - HashAggregate [i_item_id,sum,sum,d_date,w_state,sum,cs_sales_price,cr_refunded_cash,sum] [sum,sum,sum,sum] - Project [d_date,cs_sales_price,w_state,cr_refunded_cash,i_item_id] + HashAggregate [sum,w_state,d_date,sum,cs_sales_price,cr_refunded_cash,i_item_id,sum,sum] [sum,sum,sum,sum] + Project [cr_refunded_cash,w_state,i_item_id,d_date,cs_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sales_price,w_state,cs_sold_date_sk,cr_refunded_cash,i_item_id] + Project [cr_refunded_cash,cs_sold_date_sk,w_state,i_item_id,cs_sales_price] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_sales_price,w_state,cs_sold_date_sk,cs_item_sk,cr_refunded_cash] + Project [cr_refunded_cash,cs_item_sk,cs_sold_date_sk,w_state,cs_sales_price] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cr_refunded_cash] + Project [cs_warehouse_sk,cr_refunded_cash,cs_item_sk,cs_sold_date_sk,cs_sales_price] BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + Project [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk,cs_sales_price,cs_order_number] Filter [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + Scan parquet default.catalog_sales [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk,cs_sales_price,cs_order_number] [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk,cs_sales_price,cs_order_number] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt index 886b4e209..31bc2e514 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[i_product_name#1 ASC NULLS FIRST], output=[i_product_name#1]) +- *(4) HashAggregate(keys=[i_product_name#1], functions=[]) - +- Exchange hashpartitioning(i_product_name#1, 200) + +- Exchange hashpartitioning(i_product_name#1, 5) +- *(3) HashAggregate(keys=[i_product_name#1], functions=[]) +- *(3) Project [i_product_name#1] +- *(3) BroadcastHashJoin [i_manufact#2], [i_manufact#2#3], Inner, BuildRight @@ -12,7 +12,7 @@ TakeOrderedAndProject(limit=100, orderBy=[i_product_name#1 ASC NULLS FIRST], out +- *(2) Project [i_manufact#2#3] +- *(2) Filter (if (isnull(alwaysTrue#5)) 0 else item_cnt#6 > 0) +- *(2) HashAggregate(keys=[i_manufact#2], functions=[count(1)]) - +- Exchange hashpartitioning(i_manufact#2, 200) + +- Exchange hashpartitioning(i_manufact#2, 5) +- *(1) HashAggregate(keys=[i_manufact#2], functions=[partial_count(1)]) +- *(1) Project [i_manufact#2] +- *(1) Filter (((((i_category#7 = Women) && (((((i_color#8 = powder) || (i_color#8 = khaki)) && ((i_units#9 = Ounce) || (i_units#9 = Oz))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = brown) || (i_color#8 = honeydew)) && ((i_units#9 = Bunch) || (i_units#9 = Ton))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = floral) || (i_color#8 = deep)) && ((i_units#9 = N/A) || (i_units#9 = Dozen))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = light) || (i_color#8 = cornflower)) && ((i_units#9 = Box) || (i_units#9 = Pound))) && ((i_size#10 = medium) || (i_size#10 = extra large)))))) || (((i_category#7 = Women) && (((((i_color#8 = midnight) || (i_color#8 = snow)) && ((i_units#9 = Pallet) || (i_units#9 = Gross))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = cyan) || (i_color#8 = papaya)) && ((i_units#9 = Cup) || (i_units#9 = Dram))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = orange) || (i_color#8 = frosted)) && ((i_units#9 = Each) || (i_units#9 = Tbl))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = forest) || (i_color#8 = ghost)) && ((i_units#9 = Lb) || (i_units#9 = Bundle))) && ((i_size#10 = medium) || (i_size#10 = extra large))))))) && isnotnull(i_manufact#2)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt index caac67bf3..542a3d114 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt @@ -15,11 +15,11 @@ TakeOrderedAndProject [i_product_name] WholeStageCodegen Project [i_manufact] Filter [alwaysTrue,item_cnt] - HashAggregate [i_manufact,count,count(1)] [i_manufact,alwaysTrue,item_cnt,count,count(1)] + HashAggregate [i_manufact,count,count(1)] [i_manufact,count,count(1),item_cnt,alwaysTrue] InputAdapter Exchange [i_manufact] #3 WholeStageCodegen HashAggregate [i_manufact,count,count] [count,count] Project [i_manufact] - Filter [i_units,i_manufact,i_color,i_size,i_category] - Scan parquet default.item [i_manufact,i_units,i_size,i_category,i_color] [i_manufact,i_units,i_size,i_category,i_color] + Filter [i_manufact,i_size,i_color,i_category,i_units] + Scan parquet default.item [i_manufact,i_units,i_category,i_size,i_color] [i_manufact,i_units,i_category,i_size,i_color] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt index 6fb079e4a..1e560817c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[sum(ss_ext_sales_price)#1 DESC NULLS LAST,d_year#2 ASC NULLS FIRST,i_category_id#3 ASC NULLS FIRST,i_category#4 ASC NULLS FIRST], output=[d_year#2,i_category_id#3,i_category#4,sum(ss_ext_sales_price)#1]) +- *(4) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) - +- Exchange hashpartitioning(d_year#2, i_category_id#3, i_category#4, 200) + +- Exchange hashpartitioning(d_year#2, i_category_id#3, i_category#4, 5) +- *(3) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) +- *(3) Project [d_year#2, ss_ext_sales_price#5, i_category_id#3, i_category#4] +- *(3) BroadcastHashJoin [ss_item_sk#6], [i_item_sk#7], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt index abcbca371..016028e76 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt @@ -1,10 +1,10 @@ TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category] WholeStageCodegen - HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),sum,d_year,i_category,i_category_id] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] + HashAggregate [d_year,sum,i_category,i_category_id,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] InputAdapter Exchange [d_year,i_category_id,i_category] #1 WholeStageCodegen - HashAggregate [sum,d_year,sum,i_category,i_category_id,ss_ext_sales_price] [sum,sum] + HashAggregate [d_year,sum,ss_ext_sales_price,sum,i_category,i_category_id] [sum,sum] Project [d_year,ss_ext_sales_price,i_category_id,i_category] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [d_year,ss_item_sk,ss_ext_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt index d2b16d01d..b68348e44 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_store_id#2 ASC NULLS FIRST,sun_sales#3 ASC NULLS FIRST,mon_sales#4 ASC NULLS FIRST,tue_sales#5 ASC NULLS FIRST,wed_sales#6 ASC NULLS FIRST,thu_sales#7 ASC NULLS FIRST,fri_sales#8 ASC NULLS FIRST,sat_sales#9 ASC NULLS FIRST], output=[s_store_name#1,s_store_id#2,sun_sales#3,mon_sales#4,tue_sales#5,wed_sales#6,thu_sales#7,fri_sales#8,sat_sales#9]) +- *(4) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) - +- Exchange hashpartitioning(s_store_name#1, s_store_id#2, 200) + +- Exchange hashpartitioning(s_store_name#1, s_store_id#2, 5) +- *(3) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) +- *(3) Project [d_day_name#10, ss_sales_price#11, s_store_id#2, s_store_name#1] +- *(3) BroadcastHashJoin [ss_store_sk#12], [s_store_sk#13], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt index a5e60a60c..d1b3116b7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt @@ -1,10 +1,10 @@ -TakeOrderedAndProject [sat_sales,fri_sales,sun_sales,s_store_id,tue_sales,mon_sales,s_store_name,thu_sales,wed_sales] +TakeOrderedAndProject [thu_sales,s_store_id,tue_sales,s_store_name,mon_sales,fri_sales,sun_sales,sat_sales,wed_sales] WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum,s_store_id,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),s_store_name,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END))] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sat_sales,fri_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sun_sales,sum,sum,sum,sum,tue_sales,mon_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),thu_sales,wed_sales] + HashAggregate [sum,s_store_id,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),s_store_name,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum,sum,sum] [thu_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),tue_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),mon_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),fri_sales,sum,sum,sun_sales,sat_sales,sum,wed_sales] InputAdapter Exchange [s_store_name,s_store_id] #1 WholeStageCodegen - HashAggregate [sum,sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,s_store_id,ss_sales_price,sum,sum,sum,sum,sum,s_store_name] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + HashAggregate [sum,sum,sum,s_store_id,s_store_name,sum,sum,sum,d_day_name,sum,sum,sum,ss_sales_price,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] Project [d_day_name,ss_sales_price,s_store_id,s_store_name] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [d_day_name,ss_store_sk,ss_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt index 91c290e9f..4547d0f60 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt @@ -12,16 +12,16 @@ TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1, : : : +- *(3) Sort [rank_col#11 ASC NULLS FIRST], false, 0 : : : +- Exchange SinglePartition : : : +- *(2) Project [item_sk#8, rank_col#11] - : : : +- *(2) Filter (isnotnull(avg(ss_net_profit#12)#13) && (cast(avg(ss_net_profit#12)#13 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7089)), DecimalType(13,7)))) - : : : : +- Subquery subquery7089 + : : : +- *(2) Filter (isnotnull(avg(ss_net_profit#12)#13) && (cast(avg(ss_net_profit#12)#13 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery9329)), DecimalType(13,7)))) + : : : : +- Subquery subquery9329 : : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : : : +- Exchange hashpartitioning(ss_store_sk#14, 200) + : : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) : : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) : : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] : : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) : : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct : : : +- *(2) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : : +- Exchange hashpartitioning(ss_item_sk#16, 200) + : : : +- Exchange hashpartitioning(ss_item_sk#16, 5) : : : +- *(1) HashAggregate(keys=[ss_item_sk#16], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) : : : +- *(1) Project [ss_item_sk#16, ss_net_profit#12] : : : +- *(1) Filter (isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) @@ -33,16 +33,16 @@ TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1, : : +- *(6) Sort [rank_col#17 DESC NULLS LAST], false, 0 : : +- Exchange SinglePartition : : +- *(5) Project [item_sk#6, rank_col#17] - : : +- *(5) Filter (isnotnull(avg(ss_net_profit#12)#18) && (cast(avg(ss_net_profit#12)#18 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7094)), DecimalType(13,7)))) - : : : +- Subquery subquery7094 + : : +- *(5) Filter (isnotnull(avg(ss_net_profit#12)#18) && (cast(avg(ss_net_profit#12)#18 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery9334)), DecimalType(13,7)))) + : : : +- Subquery subquery9334 : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : : +- Exchange hashpartitioning(ss_store_sk#14, 200) + : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct : : +- *(5) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : +- ReusedExchange [ss_item_sk#16, sum#19, count#20], Exchange hashpartitioning(ss_item_sk#16, 200) + : : +- ReusedExchange [ss_item_sk#16, sum#19, count#20], Exchange hashpartitioning(ss_item_sk#16, 5) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(8) Project [i_item_sk#9, i_product_name#4] : +- *(8) Filter isnotnull(i_item_sk#9) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt index 55aab5cc2..6704d2835 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt @@ -23,15 +23,15 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing] InputAdapter Exchange [ss_store_sk] #3 WholeStageCodegen - HashAggregate [count,ss_store_sk,sum,sum,count,ss_net_profit] [sum,count,sum,count] + HashAggregate [sum,ss_net_profit,count,count,ss_store_sk,sum] [sum,count,sum,count] Project [ss_store_sk,ss_net_profit] Filter [ss_store_sk,ss_addr_sk] Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] [ss_addr_sk,ss_store_sk,ss_net_profit] - HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,avg(ss_net_profit),item_sk,count] + HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,item_sk,avg(ss_net_profit),sum,count] InputAdapter Exchange [ss_item_sk] #2 WholeStageCodegen - HashAggregate [ss_item_sk,sum,sum,count,count,ss_net_profit] [sum,count,sum,count] + HashAggregate [ss_item_sk,sum,sum,count,ss_net_profit,count] [sum,count,sum,count] Project [ss_item_sk,ss_net_profit] Filter [ss_store_sk] Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit] [ss_item_sk,ss_store_sk,ss_net_profit] @@ -55,11 +55,11 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing] InputAdapter Exchange [ss_store_sk] #6 WholeStageCodegen - HashAggregate [count,ss_store_sk,sum,count,sum,ss_net_profit] [sum,count,sum,count] + HashAggregate [count,sum,ss_net_profit,count,ss_store_sk,sum] [sum,count,sum,count] Project [ss_store_sk,ss_net_profit] Filter [ss_store_sk,ss_addr_sk] Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] [ss_addr_sk,ss_store_sk,ss_net_profit] - HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(ss_net_profit),item_sk,count,avg(UnscaledValue(ss_net_profit)),rank_col,sum] + HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [item_sk,count,rank_col,sum,avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit)] InputAdapter ReusedExchange [ss_item_sk,sum,count] [ss_item_sk,sum,count] #2 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt index 687085a12..ad97e7d1a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST,ca_city#2 ASC NULLS FIRST], output=[ca_zip#1,ca_city#2,sum(ws_sales_price)#3]) +- *(7) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[sum(UnscaledValue(ws_sales_price#4))]) - +- Exchange hashpartitioning(ca_zip#1, ca_city#2, 200) + +- Exchange hashpartitioning(ca_zip#1, ca_city#2, 5) +- *(6) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[partial_sum(UnscaledValue(ws_sales_price#4))]) +- *(6) Project [ws_sales_price#4, ca_city#2, ca_zip#1] +- *(6) Filter (substring(ca_zip#1, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) || exists#5) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt index f6d66999d..f4040fa0f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt @@ -12,7 +12,7 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] BroadcastHashJoin [ws_item_sk,i_item_sk] Project [ws_item_sk,ws_sales_price,ca_city,ca_zip] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ca_zip,ca_city,ws_sold_date_sk,ws_sales_price,ws_item_sk] + Project [ca_city,ws_sales_price,ca_zip,ws_item_sk,ws_sold_date_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] Project [ws_sold_date_sk,ws_item_sk,ws_sales_price,c_current_addr_sk] BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt index 0ae77a813..a5d4817de 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt @@ -5,7 +5,7 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ :- *(8) Project [ss_ticket_number#5, bought_city#4, amt#6, profit#7, c_current_addr_sk#8, c_first_name#2, c_last_name#1] : +- *(8) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight : :- *(8) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#13)), sum(UnscaledValue(ss_net_profit#14))]) - : : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3, 200) + : : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3, 5) : : +- *(5) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#13)), partial_sum(UnscaledValue(ss_net_profit#14))]) : : +- *(5) Project [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14, ca_city#3] : : +- *(5) BroadcastHashJoin [ss_addr_sk#12], [ca_address_sk#9], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt index 3010603c0..acac15c84 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt @@ -1,25 +1,25 @@ -TakeOrderedAndProject [bought_city,ss_ticket_number,profit,c_last_name,c_first_name,amt,ca_city] +TakeOrderedAndProject [amt,ss_ticket_number,c_last_name,bought_city,profit,c_first_name,ca_city] WholeStageCodegen - Project [bought_city,ss_ticket_number,profit,c_last_name,c_first_name,amt,ca_city] + Project [amt,ss_ticket_number,c_last_name,bought_city,profit,c_first_name,ca_city] BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] - Project [c_current_addr_sk,profit,amt,c_last_name,c_first_name,ss_ticket_number,bought_city] + Project [profit,c_last_name,amt,c_first_name,bought_city,c_current_addr_sk,ss_ticket_number] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - HashAggregate [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_coupon_amt)),sum,ca_city,ss_customer_sk,sum,ss_ticket_number,ss_addr_sk] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_coupon_amt)),profit,sum,amt,sum,bought_city] + HashAggregate [ss_customer_sk,ca_city,sum,sum(UnscaledValue(ss_coupon_amt)),sum,ss_addr_sk,sum(UnscaledValue(ss_net_profit)),ss_ticket_number] [sum,sum(UnscaledValue(ss_coupon_amt)),profit,sum,amt,sum(UnscaledValue(ss_net_profit)),bought_city] InputAdapter Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 WholeStageCodegen - HashAggregate [sum,ca_city,ss_customer_sk,ss_coupon_amt,sum,sum,sum,ss_ticket_number,ss_addr_sk,ss_net_profit] [sum,sum,sum,sum] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ca_city,ss_net_profit,ss_ticket_number] + HashAggregate [ss_customer_sk,ca_city,sum,ss_coupon_amt,ss_net_profit,sum,sum,ss_addr_sk,sum,ss_ticket_number] [sum,sum,sum,sum] + Project [ca_city,ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_store_sk,ss_coupon_amt,ss_net_profit] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] - Filter [ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_coupon_amt,ss_net_profit] + Filter [ss_hdemo_sk,ss_customer_sk,ss_store_sk,ss_sold_date_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_coupon_amt,ss_net_profit] [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_coupon_amt,ss_net_profit] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt index 914ebe9f5..47228048b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt @@ -1,51 +1,52 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,s_store_name#3,s_company_name#6,d_year#7,d_moy#8,avg_monthly_sales#2,sum_sales#1,psum#9,nsum#10]) -+- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] - +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight - :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] - : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight - : :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13] - : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) - : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] - : : +- *(7) Filter (isnotnull(d_year#7) && (d_year#7 = 1999)) - : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] - : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 200) - : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 200) - : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) - : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] - : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight - : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight - : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight - : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] - : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] - : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] - : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) - : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct ++- *(23) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] + +- *(23) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight + :- *(23) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] + : +- *(23) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight + : :- *(23) Filter (((((isnotnull(d_year#7) && isnotnull(avg_monthly_sales#2)) && (d_year#7 = 1999)) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) + : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] + : : +- *(8) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 5) + : : +- *(7) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2] + : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] + : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, 5) + : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) + : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) + : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] + : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight + : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight + : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight + : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_category#4)) && isnotnull(i_brand#5)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] + : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] + : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) + : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1))) - : +- *(14) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] - : +- *(14) Filter isnotnull(rn#23) + : +- *(15) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] + : +- *(15) Filter isnotnull(rn#23) : +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#23], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] - : +- *(13) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 200) - : +- *(12) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) - : +- ReusedExchange [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33, sum#34], Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 200) + : +- *(14) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) + : +- *(13) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : +- ReusedExchange [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33, sum#34], Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1))) - +- *(21) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] - +- *(21) Filter isnotnull(rn#18) + +- *(22) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] + +- *(22) Filter isnotnull(rn#18) +- Window [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] - +- *(20) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 - +- ReusedExchange [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35, d_moy#36, sum_sales#12], Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 200) \ No newline at end of file + +- *(21) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +- ReusedExchange [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35, d_moy#36, sum_sales#12], Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt index 656b50091..6d082f8d1 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt @@ -1,77 +1,80 @@ -TakeOrderedAndProject [d_year,d_moy,s_store_name,i_category,psum,i_brand,sum_sales,s_company_name,avg_monthly_sales,nsum] +TakeOrderedAndProject [i_category,d_year,avg_monthly_sales,s_store_name,nsum,sum_sales,psum,i_brand,s_company_name,d_moy] WholeStageCodegen - Project [s_store_name,d_year,avg_monthly_sales,d_moy,sum_sales,sum_sales,i_category,i_brand,s_company_name,sum_sales] - BroadcastHashJoin [s_store_name,s_company_name,i_brand,i_category,i_category,rn,i_brand,s_store_name,rn,s_company_name] - Project [d_year,s_company_name,rn,d_moy,sum_sales,i_brand,sum_sales,i_category,s_store_name,avg_monthly_sales] - BroadcastHashJoin [s_store_name,i_category,i_category,s_company_name,rn,i_brand,rn,s_company_name,s_store_name,i_brand] - Project [d_year,s_company_name,rn,d_moy,i_brand,sum_sales,i_category,s_store_name,avg_monthly_sales] - Filter [avg_monthly_sales,sum_sales,rn] - InputAdapter - Window [d_year,s_company_name,_w0,i_brand,i_category,s_store_name] - WholeStageCodegen - Filter [d_year] - InputAdapter - Window [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] - WholeStageCodegen - Sort [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] - InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name] #1 - WholeStageCodegen - HashAggregate [d_year,s_company_name,d_moy,sum,sum(UnscaledValue(ss_sales_price)),i_brand,i_category,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] - InputAdapter - Exchange [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] #2 - WholeStageCodegen - HashAggregate [d_year,s_company_name,d_moy,sum,i_brand,ss_sales_price,i_category,sum,s_store_name] [sum,sum] - Project [s_store_name,s_company_name,d_moy,d_year,i_category,ss_sales_price,i_brand] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [d_moy,d_year,ss_store_sk,i_category,ss_sales_price,i_brand] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_store_sk,i_category,ss_sales_price,ss_sold_date_sk,i_brand] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_item_sk,i_brand,i_category] - Filter [i_item_sk,i_brand,i_category] - Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk,d_year,d_moy] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [s_store_sk,s_store_name,s_company_name] - Filter [s_store_sk,s_store_name,s_company_name] - Scan parquet default.store [s_store_sk,s_store_name,s_company_name] [s_store_sk,s_store_name,s_company_name] + Project [i_category,d_year,sum_sales,sum_sales,avg_monthly_sales,sum_sales,s_store_name,i_brand,s_company_name,d_moy] + BroadcastHashJoin [i_category,s_store_name,i_category,s_company_name,rn,rn,s_store_name,i_brand,i_brand,s_company_name] + Project [s_store_name,rn,d_moy,sum_sales,avg_monthly_sales,sum_sales,d_year,s_company_name,i_category,i_brand] + BroadcastHashJoin [s_store_name,i_category,s_company_name,rn,i_category,i_brand,s_store_name,i_brand,s_company_name,rn] + Filter [d_year,avg_monthly_sales,sum_sales,rn] + InputAdapter + Window [s_store_name,d_moy,d_year,s_company_name,i_category,i_brand] + WholeStageCodegen + Sort [s_store_name,d_moy,d_year,s_company_name,i_category,i_brand] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen + Project [s_store_name,d_moy,sum_sales,avg_monthly_sales,d_year,s_company_name,i_category,i_brand] + InputAdapter + Window [s_store_name,_w0,d_year,s_company_name,i_category,i_brand] + WholeStageCodegen + Sort [s_store_name,d_year,s_company_name,i_category,i_brand] + InputAdapter + Exchange [s_store_name,d_year,s_company_name,i_category,i_brand] #2 + WholeStageCodegen + HashAggregate [sum(UnscaledValue(ss_sales_price)),s_store_name,d_moy,d_year,s_company_name,i_category,i_brand,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [s_store_name,d_moy,d_year,s_company_name,i_category,i_brand] #3 + WholeStageCodegen + HashAggregate [s_store_name,d_moy,d_year,ss_sales_price,s_company_name,i_category,i_brand,sum,sum] [sum,sum] + Project [d_year,ss_sales_price,i_brand,i_category,s_company_name,s_store_name,d_moy] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [d_year,ss_sales_price,i_brand,i_category,ss_store_sk,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sales_price,i_brand,i_category,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_brand,i_category] + Filter [i_item_sk,i_category,i_brand] + Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk,d_year,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_company_name] + Filter [s_store_sk,s_store_name,s_company_name] + Scan parquet default.store [s_store_sk,s_store_name,s_company_name] [s_store_sk,s_store_name,s_company_name] InputAdapter - BroadcastExchange #6 + BroadcastExchange #7 WholeStageCodegen - Project [i_category,rn,s_company_name,s_store_name,sum_sales,i_brand] + Project [i_category,sum_sales,rn,i_brand,s_company_name,s_store_name] Filter [rn] InputAdapter - Window [i_category,s_company_name,s_store_name,d_moy,d_year,i_brand] + Window [d_moy,i_category,d_year,i_brand,s_company_name,s_store_name] WholeStageCodegen - Sort [i_category,s_company_name,s_store_name,d_moy,d_year,i_brand] + Sort [d_moy,i_category,d_year,i_brand,s_company_name,s_store_name] InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name] #7 + Exchange [i_category,i_brand,s_store_name,s_company_name] #8 WholeStageCodegen - HashAggregate [i_category,s_company_name,s_store_name,sum(UnscaledValue(ss_sales_price)),d_moy,d_year,sum,i_brand] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] + HashAggregate [d_moy,i_category,sum(UnscaledValue(ss_sales_price)),sum,d_year,i_brand,s_company_name,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] InputAdapter - ReusedExchange [i_category,s_company_name,s_store_name,d_moy,d_year,sum,i_brand] [i_category,s_company_name,s_store_name,d_moy,d_year,sum,i_brand] #2 + ReusedExchange [d_moy,i_category,sum,d_year,i_brand,s_company_name,s_store_name] [d_moy,i_category,sum,d_year,i_brand,s_company_name,s_store_name] #3 InputAdapter - BroadcastExchange #8 + BroadcastExchange #9 WholeStageCodegen - Project [rn,i_category,s_store_name,i_brand,s_company_name,sum_sales] + Project [i_brand,rn,s_store_name,sum_sales,i_category,s_company_name] Filter [rn] InputAdapter - Window [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year] + Window [i_brand,d_year,s_store_name,d_moy,i_category,s_company_name] WholeStageCodegen - Sort [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year] + Sort [i_brand,d_year,s_store_name,d_moy,i_category,s_company_name] InputAdapter - ReusedExchange [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year,sum_sales] [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year,sum_sales] #7 + ReusedExchange [i_brand,d_year,s_store_name,d_moy,sum_sales,i_category,s_company_name] [i_brand,d_year,s_store_name,d_moy,sum_sales,i_category,s_company_name] #8 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt index 78fb16b88..2e8b5ecc4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt @@ -9,12 +9,12 @@ WholeStageCodegen Project [ss_sold_date_sk,ss_quantity] BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] Project [ss_sold_date_sk,ss_addr_sk,ss_quantity,ss_net_profit] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,ss_sales_price,cd_education_status,cd_marital_status] - Project [ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] + BroadcastHashJoin [cd_education_status,cd_demo_sk,cd_marital_status,ss_sales_price,ss_cdemo_sk] + Project [ss_sales_price,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_net_profit] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] + Project [ss_sales_price,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_net_profit] Filter [ss_store_sk,ss_cdemo_sk,ss_addr_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sales_price,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_sales_price,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_net_profit] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt index bf898b8ba..2b0459139 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank#2 ASC NULLS FIRST,currency_rank#3 ASC NULLS FIRST], output=[channel#1,item#4,return_ratio#5,return_rank#2,currency_rank#3]) +- *(23) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) - +- Exchange hashpartitioning(channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3, 200) + +- Exchange hashpartitioning(channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3, 5) +- *(22) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) +- Union :- *(7) Project [web AS channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3] @@ -12,15 +12,15 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank# : +- *(5) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 : +- Exchange SinglePartition : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(ws_item_sk#7, 200) + : +- Exchange hashpartitioning(ws_item_sk#7, 5) : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] - : : : +- *(3) Filter ((((((((isnotnull(ws_net_profit#17) && isnotnull(ws_quantity#9)) && isnotnull(ws_net_paid#11)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) - : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_quantity), IsNotNull(ws_net_paid), GreaterThan(ws_net_pro..., ReadSchema: struct 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_item_sk#16)) && isnotnull(wr_order_number#15)) @@ -31,45 +31,47 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank# : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct :- *(14) Project [catalog AS channel#20, item#21, return_ratio#22, return_rank#23, currency_rank#24] : +- *(14) Filter ((return_rank#23 <= 10) || (currency_rank#24 <= 10)) - : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] - : +- *(13) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 - : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] - : +- *(12) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 - : +- Exchange SinglePartition - : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(cs_item_sk#26, 200) - : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight - : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight - : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] - : : : +- *(10) Filter ((((((((isnotnull(cs_net_profit#35) && isnotnull(cs_net_paid#30)) && isnotnull(cs_quantity#28)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) - : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_item_sk#34)) && isnotnull(cr_order_number#33)) - : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_item_sk), IsNo..., ReadSchema: struct - : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] + : +- *(13) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 + : +- *(13) Project [item#21, return_ratio#22, currency_rank#24] + : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] + : +- *(12) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(cs_item_sk#26, 5) + : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight + : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight + : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] + : : : +- *(10) Filter ((((((((isnotnull(cs_net_paid#30) && isnotnull(cs_quantity#28)) && isnotnull(cs_net_profit#35)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) + : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_order_number#33)) && isnotnull(cr_item_sk#34)) + : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number),..., ReadSchema: struct + : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(21) Project [store AS channel#36, item#37, return_ratio#38, return_rank#39, currency_rank#40] +- *(21) Filter ((return_rank#39 <= 10) || (currency_rank#40 <= 10)) - +- Window [rank(currency_ratio#41) windowspecdefinition(currency_ratio#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#40], [currency_ratio#41 ASC NULLS FIRST] - +- *(20) Sort [currency_ratio#41 ASC NULLS FIRST], false, 0 - +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] - +- *(19) Sort [return_ratio#38 ASC NULLS FIRST], false, 0 - +- Exchange SinglePartition - +- *(18) HashAggregate(keys=[ss_item_sk#42], functions=[sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), sum(cast(coalesce(ss_quantity#44, 0) as bigint)), sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) - +- Exchange hashpartitioning(ss_item_sk#42, 200) - +- *(17) HashAggregate(keys=[ss_item_sk#42], functions=[partial_sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) - +- *(17) Project [ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] - +- *(17) BroadcastHashJoin [ss_sold_date_sk#47], [d_date_sk#13], Inner, BuildRight - :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] - : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight - : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] - : : +- *(17) Filter ((((((((isnotnull(ss_quantity#44) && isnotnull(ss_net_paid#46)) && isnotnull(ss_net_profit#51)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) - : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_ticket_number#49)) && isnotnull(sr_item_sk#50)) - : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNo..., ReadSchema: struct - +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file + +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] + +- *(20) Sort [return_ratio#38 ASC NULLS FIRST], false, 0 + +- *(20) Project [item#37, return_ratio#38, currency_rank#40] + +- Window [rank(currency_ratio#41) windowspecdefinition(currency_ratio#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#40], [currency_ratio#41 ASC NULLS FIRST] + +- *(19) Sort [currency_ratio#41 ASC NULLS FIRST], false, 0 + +- Exchange SinglePartition + +- *(18) HashAggregate(keys=[ss_item_sk#42], functions=[sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), sum(cast(coalesce(ss_quantity#44, 0) as bigint)), sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- Exchange hashpartitioning(ss_item_sk#42, 5) + +- *(17) HashAggregate(keys=[ss_item_sk#42], functions=[partial_sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- *(17) Project [ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + +- *(17) BroadcastHashJoin [ss_sold_date_sk#47], [d_date_sk#13], Inner, BuildRight + :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight + : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] + : : +- *(17) Filter ((((((((isnotnull(ss_net_paid#46) && isnotnull(ss_net_profit#51)) && isnotnull(ss_quantity#44)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) + : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), IsNotNull(ss_quantity), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_item_sk#50)) && isnotnull(sr_ticket_number#49)) + : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_item_sk), IsNotNull(..., ReadSchema: struct + +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt index e0a243a63..809d04622 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt @@ -1,10 +1,10 @@ -TakeOrderedAndProject [channel,currency_rank,return_rank,item,return_ratio] +TakeOrderedAndProject [return_rank,item,currency_rank,channel,return_ratio] WholeStageCodegen - HashAggregate [channel,currency_rank,return_rank,item,return_ratio] + HashAggregate [return_rank,item,currency_rank,channel,return_ratio] InputAdapter - Exchange [channel,currency_rank,return_rank,item,return_ratio] #1 + Exchange [return_rank,item,currency_rank,channel,return_ratio] #1 WholeStageCodegen - HashAggregate [channel,currency_rank,return_rank,item,return_ratio] + HashAggregate [return_rank,item,currency_rank,channel,return_ratio] InputAdapter Union WholeStageCodegen @@ -21,18 +21,18 @@ TakeOrderedAndProject [channel,currency_rank,return_rank,item,return_ratio] InputAdapter Exchange #2 WholeStageCodegen - HashAggregate [sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),ws_item_sk,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum,sum(cast(coalesce(ws_quantity, 0) as bigint))] [currency_ratio,return_ratio,sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum,sum(cast(coalesce(ws_quantity, 0) as bigint))] + HashAggregate [sum,ws_item_sk,sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum,sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),sum] [sum,return_ratio,currency_ratio,sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),item,sum,sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),sum] InputAdapter Exchange [ws_item_sk] #3 WholeStageCodegen - HashAggregate [sum,wr_return_amt,sum,ws_item_sk,sum,sum,sum,ws_net_paid,ws_quantity,wr_return_quantity,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [wr_return_amt,ws_net_paid,ws_quantity,wr_return_quantity,ws_item_sk] + HashAggregate [sum,ws_item_sk,sum,sum,wr_return_quantity,sum,sum,ws_net_paid,ws_quantity,sum,wr_return_amt,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [ws_net_paid,wr_return_quantity,wr_return_amt,ws_quantity,ws_item_sk] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [wr_return_amt,ws_net_paid,ws_quantity,ws_sold_date_sk,wr_return_quantity,ws_item_sk] + Project [ws_net_paid,wr_return_quantity,wr_return_amt,ws_quantity,ws_item_sk,ws_sold_date_sk] BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - Project [ws_net_paid,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] - Filter [ws_net_profit,ws_order_number,ws_item_sk,ws_net_paid,ws_sold_date_sk,ws_quantity] - Scan parquet default.web_sales [ws_net_paid,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_item_sk] [ws_net_paid,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_item_sk] + Project [ws_net_paid,ws_order_number,ws_quantity,ws_item_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_order_number,ws_sold_date_sk,ws_net_paid,ws_quantity,ws_net_profit] + Scan parquet default.web_sales [ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_item_sk,ws_sold_date_sk] [ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -49,67 +49,69 @@ TakeOrderedAndProject [channel,currency_rank,return_rank,item,return_ratio] Project [item,return_ratio,return_rank,currency_rank] Filter [return_rank,currency_rank] InputAdapter - Window [currency_ratio] + Window [return_ratio] WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - InputAdapter - Exchange #6 - WholeStageCodegen - HashAggregate [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum,cs_item_sk,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cs_quantity, 0) as bigint))] [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum,item,return_ratio,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cs_quantity, 0) as bigint)),currency_ratio] - InputAdapter - Exchange [cs_item_sk] #7 - WholeStageCodegen - HashAggregate [sum,cr_return_amount,cr_return_quantity,sum,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,cs_item_sk,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [cs_net_paid,cs_quantity,cr_return_quantity,cr_return_amount,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_net_paid,cs_quantity,cr_return_quantity,cs_sold_date_sk,cr_return_amount,cs_item_sk] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk] - Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_item_sk,cs_sold_date_sk,cs_order_number] - Scan parquet default.catalog_sales [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit] [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] - Filter [cr_return_amount,cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 + Sort [return_ratio] + Project [item,return_ratio,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum(cast(coalesce(cs_quantity, 0) as bigint)),sum,cs_item_sk,sum,sum] [sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cr_return_quantity, 0) as bigint)),return_ratio,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum(cast(coalesce(cs_quantity, 0) as bigint)),currency_ratio,sum,item,sum,sum] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen + HashAggregate [sum,sum,cr_return_amount,cs_net_paid,sum,cr_return_quantity,sum,cs_item_sk,cs_quantity,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_quantity,cs_net_paid,cs_item_sk,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_quantity,cs_net_paid,cs_item_sk,cs_sold_date_sk,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_quantity,cs_net_paid,cs_item_sk,cs_sold_date_sk,cs_order_number] + Filter [cs_net_profit,cs_order_number,cs_net_paid,cs_item_sk,cs_quantity,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_quantity,cs_net_paid,cs_item_sk,cs_sold_date_sk,cs_net_profit,cs_order_number] [cs_quantity,cs_net_paid,cs_item_sk,cs_sold_date_sk,cs_net_profit,cs_order_number] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_return_amount,cr_order_number,cr_item_sk] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 WholeStageCodegen Project [item,return_ratio,return_rank,currency_rank] Filter [return_rank,currency_rank] InputAdapter - Window [currency_ratio] + Window [return_ratio] WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - InputAdapter - Exchange #9 - WholeStageCodegen - HashAggregate [sum,ss_item_sk,sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum,sum] [sum,sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(sr_return_quantity, 0) as bigint)),return_ratio,sum,sum,currency_ratio] - InputAdapter - Exchange [ss_item_sk] #10 - WholeStageCodegen - HashAggregate [ss_net_paid,sum,ss_item_sk,sum,sum,sr_return_amt,sum,sum,ss_quantity,sr_return_quantity,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [ss_quantity,ss_item_sk,ss_net_paid,sr_return_quantity,sr_return_amt] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,ss_net_paid,sr_return_quantity,ss_sold_date_sk,sr_return_amt] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_quantity,ss_item_sk,ss_net_paid,ss_sold_date_sk,ss_ticket_number] - Filter [ss_net_paid,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_net_profit,ss_ticket_number] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_net_paid,ss_net_profit,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_net_paid,ss_net_profit,ss_sold_date_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] - Filter [sr_return_amt,sr_ticket_number,sr_item_sk] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 + Sort [return_ratio] + Project [item,return_ratio,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen + HashAggregate [sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),ss_item_sk,sum,sum(cast(coalesce(ss_quantity, 0) as bigint)),sum] [sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum,sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,currency_ratio,return_ratio] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,sum,ss_quantity,ss_item_sk,sum,sum,sr_return_quantity,sum,sr_return_amt,ss_net_paid] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sr_return_amt,ss_item_sk,ss_quantity,sr_return_quantity,ss_net_paid] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [sr_return_amt,ss_item_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_net_paid] + Filter [ss_quantity,ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_net_profit,ss_net_paid] + Scan parquet default.store_sales [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_net_paid,ss_net_profit] [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_net_paid,ss_net_profit] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_return_amt,sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt index 983bb86ee..0bad61dd2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt @@ -1,12 +1,12 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) +- *(21) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) - +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 200) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) +- *(20) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) +- *(20) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] +- Union :- *(6) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(sales_price#13)), sum(UnscaledValue(return_amt#14)), sum(UnscaledValue(profit#15)), sum(UnscaledValue(net_loss#16))]) - : +- Exchange hashpartitioning(s_store_id#12, 200) + : +- Exchange hashpartitioning(s_store_id#12, 5) : +- *(5) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(sales_price#13)), partial_sum(UnscaledValue(return_amt#14)), partial_sum(UnscaledValue(profit#15)), partial_sum(UnscaledValue(net_loss#16))]) : +- *(5) Project [sales_price#13, profit#15, return_amt#14, net_loss#16, s_store_id#12] : +- *(5) BroadcastHashJoin [store_sk#17], [cast(s_store_sk#18 as bigint)], Inner, BuildRight @@ -28,7 +28,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : +- *(4) Filter isnotnull(s_store_sk#18) : +- *(4) FileScan parquet default.store[s_store_sk#18,s_store_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct :- *(12) HashAggregate(keys=[cp_catalog_page_id#36], functions=[sum(UnscaledValue(sales_price#37)), sum(UnscaledValue(return_amt#38)), sum(UnscaledValue(profit#39)), sum(UnscaledValue(net_loss#40))]) - : +- Exchange hashpartitioning(cp_catalog_page_id#36, 200) + : +- Exchange hashpartitioning(cp_catalog_page_id#36, 5) : +- *(11) HashAggregate(keys=[cp_catalog_page_id#36], functions=[partial_sum(UnscaledValue(sales_price#37)), partial_sum(UnscaledValue(return_amt#38)), partial_sum(UnscaledValue(profit#39)), partial_sum(UnscaledValue(net_loss#40))]) : +- *(11) Project [sales_price#37, profit#39, return_amt#38, net_loss#40, cp_catalog_page_id#36] : +- *(11) BroadcastHashJoin [page_sk#41], [cp_catalog_page_sk#42], Inner, BuildRight @@ -50,7 +50,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : +- *(10) Filter isnotnull(cp_catalog_page_sk#42) : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#42,cp_catalog_page_id#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct +- *(19) HashAggregate(keys=[web_site_id#58], functions=[sum(UnscaledValue(sales_price#59)), sum(UnscaledValue(return_amt#60)), sum(UnscaledValue(profit#61)), sum(UnscaledValue(net_loss#62))]) - +- Exchange hashpartitioning(web_site_id#58, 200) + +- Exchange hashpartitioning(web_site_id#58, 5) +- *(18) HashAggregate(keys=[web_site_id#58], functions=[partial_sum(UnscaledValue(sales_price#59)), partial_sum(UnscaledValue(return_amt#60)), partial_sum(UnscaledValue(profit#61)), partial_sum(UnscaledValue(net_loss#62))]) +- *(18) Project [sales_price#59, profit#61, return_amt#60, net_loss#62, web_site_id#58] +- *(18) BroadcastHashJoin [wsr_web_site_sk#63], [web_site_sk#64], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt index a3baee3e1..c30a48e5e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt @@ -1,22 +1,22 @@ -TakeOrderedAndProject [profit,channel,sales,returns,id] +TakeOrderedAndProject [profit,id,returns,sales,channel] WholeStageCodegen - HashAggregate [sum(returns),channel,spark_grouping_id,sum,sum(profit),sum(sales),sum,sum,id] [sum(returns),profit,sum,sum(profit),sum(sales),sales,sum,sum,returns] + HashAggregate [sum(profit),sum,id,sum,sum,sum(sales),spark_grouping_id,sum(returns),channel] [sum(profit),sum,profit,sum,sum,sum(sales),returns,sum(returns),sales] InputAdapter Exchange [channel,id,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [sum,channel,spark_grouping_id,sum,sum,sum,profit,returns,sum,sum,id,sales] [sum,sum,sum,sum,sum,sum] - Expand [channel,id,profit,returns,sales] + HashAggregate [sum,sum,id,sales,sum,sum,profit,spark_grouping_id,sum,returns,channel,sum] [sum,sum,sum,sum,sum,sum] + Expand [channel,sales,id,profit,returns] InputAdapter Union WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),sum,sum(UnscaledValue(profit)),s_store_id,sum(UnscaledValue(sales_price)),sum,sum] [sales,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),RETURNS,sum,sum(UnscaledValue(profit)),channel,profit,sum(UnscaledValue(sales_price)),id,sum,sum] + HashAggregate [sum(UnscaledValue(net_loss)),sum(UnscaledValue(sales_price)),s_store_id,sum(UnscaledValue(profit)),sum,sum,sum,sum,sum(UnscaledValue(return_amt))] [sum(UnscaledValue(net_loss)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,channel,RETURNS,id,sum,profit,sum,sum,sum(UnscaledValue(return_amt)),sales] InputAdapter Exchange [s_store_id] #2 WholeStageCodegen - HashAggregate [sum,sum,return_amt,net_loss,sum,sum,s_store_id,sum,sales_price,sum,sum,sum,profit] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [s_store_id,sales_price,net_loss,return_amt,profit] + HashAggregate [s_store_id,profit,return_amt,sum,sum,sum,net_loss,sum,sales_price,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [profit,sales_price,return_amt,s_store_id,net_loss] BroadcastHashJoin [store_sk,s_store_sk] - Project [store_sk,sales_price,net_loss,return_amt,profit] + Project [profit,sales_price,return_amt,store_sk,net_loss] BroadcastHashJoin [date_sk,d_date_sk] InputAdapter Union @@ -41,14 +41,14 @@ TakeOrderedAndProject [profit,channel,sales,returns,id] Filter [s_store_sk] Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] WholeStageCodegen - HashAggregate [cp_catalog_page_id,sum(UnscaledValue(profit)),sum,sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum,sum,sum(UnscaledValue(net_loss)),sum] [sum(UnscaledValue(profit)),sum,sum(UnscaledValue(sales_price)),channel,sum(UnscaledValue(return_amt)),sales,sum,profit,sum,id,sum(UnscaledValue(net_loss)),sum,RETURNS] + HashAggregate [sum,sum(UnscaledValue(profit)),cp_catalog_page_id,sum,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),sum(UnscaledValue(sales_price)),sum] [sum,RETURNS,sum(UnscaledValue(profit)),sum,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),profit,sales,channel,sum(UnscaledValue(sales_price)),sum,id] InputAdapter Exchange [cp_catalog_page_id] #5 WholeStageCodegen - HashAggregate [cp_catalog_page_id,sum,sum,sum,sales_price,return_amt,net_loss,sum,profit,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,cp_catalog_page_id,profit,net_loss,return_amt] + HashAggregate [sum,sum,sum,sales_price,return_amt,cp_catalog_page_id,sum,net_loss,sum,sum,sum,profit,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [profit,return_amt,net_loss,cp_catalog_page_id,sales_price] BroadcastHashJoin [page_sk,cp_catalog_page_sk] - Project [sales_price,profit,page_sk,net_loss,return_amt] + Project [page_sk,profit,return_amt,net_loss,sales_price] BroadcastHashJoin [date_sk,d_date_sk] InputAdapter Union @@ -73,14 +73,14 @@ TakeOrderedAndProject [profit,channel,sales,returns,id] Filter [cp_catalog_page_sk] Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] [cp_catalog_page_sk,cp_catalog_page_id] WholeStageCodegen - HashAggregate [web_site_id,sum,sum,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,sum(UnscaledValue(net_loss))] [sales,profit,sum,sum,RETURNS,channel,id,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,sum(UnscaledValue(net_loss))] + HashAggregate [sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum,sum(UnscaledValue(sales_price)),web_site_id,sum(UnscaledValue(net_loss)),sum,sum,sum] [sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),profit,sum,sum(UnscaledValue(sales_price)),RETURNS,sales,id,channel,sum(UnscaledValue(net_loss)),sum,sum,sum] InputAdapter Exchange [web_site_id] #8 WholeStageCodegen - HashAggregate [web_site_id,sum,profit,sum,sum,net_loss,sales_price,sum,sum,sum,sum,sum,return_amt] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [web_site_id,profit,sales_price,return_amt,net_loss] + HashAggregate [sum,sum,sum,sum,net_loss,profit,return_amt,web_site_id,sum,sum,sales_price,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [web_site_id,return_amt,sales_price,profit,net_loss] BroadcastHashJoin [wsr_web_site_sk,web_site_sk] - Project [profit,wsr_web_site_sk,sales_price,return_amt,net_loss] + Project [wsr_web_site_sk,return_amt,sales_price,profit,net_loss] BroadcastHashJoin [date_sk,d_date_sk] InputAdapter Union @@ -91,9 +91,9 @@ TakeOrderedAndProject [profit,channel,sales,returns,id] WholeStageCodegen Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] + Project [wr_order_number,wr_returned_date_sk,wr_net_loss,wr_return_amt,wr_item_sk] Filter [wr_returned_date_sk] - Scan parquet default.web_returns [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] + Scan parquet default.web_returns [wr_order_number,wr_returned_date_sk,wr_net_loss,wr_return_amt,wr_item_sk] [wr_order_number,wr_returned_date_sk,wr_net_loss,wr_return_amt,wr_item_sk] InputAdapter BroadcastExchange #9 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt index f1df63c4f..32ec9b5f7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_company_id#2 ASC NULLS FIRST,s_street_number#3 ASC NULLS FIRST,s_street_name#4 ASC NULLS FIRST,s_street_type#5 ASC NULLS FIRST,s_suite_number#6 ASC NULLS FIRST,s_city#7 ASC NULLS FIRST,s_county#8 ASC NULLS FIRST,s_state#9 ASC NULLS FIRST,s_zip#10 ASC NULLS FIRST], output=[s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10,30 days #11,31 - 60 days #12,61 - 90 days #13,91 - 120 days #14,>120 days #15]) +- *(6) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10, 200) + +- Exchange hashpartitioning(s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10, 5) +- *(5) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) +- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] +- *(5) BroadcastHashJoin [sr_returned_date_sk#16], [cast(d_date_sk#18 as bigint)], Inner, BuildRight @@ -16,8 +16,8 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_compa : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_item_sk#23,ss_customer_sk#24,ss_store_sk#20,ss_ticket_number#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_stor..., ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[3, bigint, true], input[1, bigint, true], input[2, bigint, true])) : : : +- *(1) Project [sr_returned_date_sk#16, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#25] - : : : +- *(1) Filter (((isnotnull(sr_ticket_number#25) && isnotnull(sr_customer_sk#27)) && isnotnull(sr_item_sk#26)) && isnotnull(sr_returned_date_sk#16)) - : : : +- *(1) FileScan parquet default.store_returns[sr_returned_date_sk#16,sr_item_sk#26,sr_customer_sk#27,sr_ticket_number#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_retu..., ReadSchema: struct + : : : +- *(1) Filter (((isnotnull(sr_ticket_number#25) && isnotnull(sr_item_sk#26)) && isnotnull(sr_customer_sk#27)) && isnotnull(sr_returned_date_sk#16)) + : : : +- *(1) FileScan parquet default.store_returns[sr_returned_date_sk#16,sr_item_sk#26,sr_customer_sk#27,sr_ticket_number#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk), IsNotNull(sr_customer_sk), IsNotNull(sr_retu..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [s_store_sk#21, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] : : +- *(2) Filter isnotnull(s_store_sk#21) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt index fcbf25c1d..6a6c856c3 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt @@ -1,33 +1,33 @@ -TakeOrderedAndProject [s_street_number,s_street_type,s_company_id,31 - 60 days ,61 - 90 days ,s_county,>120 days ,30 days ,91 - 120 days ,s_state,s_street_name,s_city,s_suite_number,s_store_name,s_zip] +TakeOrderedAndProject [s_street_number,s_store_name,s_zip,31 - 60 days ,s_state,s_suite_number,s_company_id,61 - 90 days ,30 days ,s_street_name,s_county,>120 days ,s_city,s_street_type,91 - 120 days ] WholeStageCodegen - HashAggregate [s_street_number,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),s_street_type,sum,s_company_id,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),s_county,sum,s_state,s_street_name,sum,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum,s_city,s_suite_number,s_store_name,s_zip,sum] [sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum,31 - 60 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum,>120 days ,30 days ,91 - 120 days ,sum,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum,sum] + HashAggregate [s_street_number,s_store_name,s_zip,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),s_state,s_suite_number,s_company_id,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum,s_street_name,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),s_county,sum,sum,s_city,s_street_type,sum] [sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),31 - 60 days ,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum,30 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,>120 days ,sum,91 - 120 days ,sum] InputAdapter - Exchange [s_street_number,s_street_type,s_company_id,s_county,s_state,s_street_name,s_city,s_suite_number,s_store_name,s_zip] #1 + Exchange [s_street_number,s_store_name,s_zip,s_state,s_suite_number,s_company_id,s_street_name,s_county,s_city,s_street_type] #1 WholeStageCodegen - HashAggregate [sum,sum,s_street_number,ss_sold_date_sk,s_street_type,sum,s_company_id,sum,s_county,sum,s_state,s_street_name,sum,sum,sr_returned_date_sk,sum,sum,s_city,s_suite_number,s_store_name,s_zip,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] + HashAggregate [s_street_number,s_store_name,s_zip,sum,sum,sum,s_state,s_suite_number,s_company_id,sum,sum,ss_sold_date_sk,s_street_name,s_county,sum,sum,sum,sr_returned_date_sk,s_city,sum,s_street_type,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,ss_sold_date_sk,s_store_name,s_city,sr_returned_date_sk,s_state,s_company_id] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] + Project [s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,ss_sold_date_sk,s_store_name,s_city,sr_returned_date_sk,s_state,s_company_id] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] + Project [s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,ss_sold_date_sk,s_store_name,s_city,sr_returned_date_sk,s_state,s_company_id] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_sold_date_sk,ss_store_sk,sr_returned_date_sk] - BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] - Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [ss_customer_sk,sr_item_sk,ss_item_sk,sr_ticket_number,sr_customer_sk,ss_ticket_number] + Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_sold_date_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] - Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] + Project [s_store_sk,s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,s_store_name,s_city,s_state,s_company_id] Filter [s_store_sk] - Scan parquet default.store [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] + Scan parquet default.store [s_store_sk,s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,s_store_name,s_city,s_state,s_company_id] [s_store_sk,s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,s_store_name,s_city,s_state,s_company_id] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt index 004057dbc..548263929 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt @@ -3,17 +3,17 @@ TakeOrderedAndProject(limit=100, orderBy=[item_sk#1 ASC NULLS FIRST,d_date#2 ASC +- *(15) Filter ((isnotnull(web_cumulative#5) && isnotnull(store_cumulative#6)) && (web_cumulative#5 > store_cumulative#6)) +- Window [max(web_sales#3) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#5, max(store_sales#4) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#6], [item_sk#1], [d_date#2 ASC NULLS FIRST] +- *(14) Sort [item_sk#1 ASC NULLS FIRST, d_date#2 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(item_sk#1, 200) + +- Exchange hashpartitioning(item_sk#1, 5) +- *(13) Project [CASE WHEN isnotnull(item_sk#7) THEN item_sk#7 ELSE item_sk#8 END AS item_sk#1, CASE WHEN isnotnull(d_date#9) THEN d_date#9 ELSE d_date#10 END AS d_date#2, cume_sales#11 AS web_sales#3, cume_sales#12 AS store_sales#4] +- SortMergeJoin [item_sk#7, d_date#9], [item_sk#8, d_date#10], FullOuter :- *(6) Sort [item_sk#7 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(item_sk#7, d_date#9, 200) + : +- Exchange hashpartitioning(item_sk#7, d_date#9, 5) : +- *(5) Project [item_sk#7, d_date#9, cume_sales#11] : +- Window [sum(_w0#13) windowspecdefinition(ws_item_sk#14, d_date#9 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#11], [ws_item_sk#14], [d_date#9 ASC NULLS FIRST] : +- *(4) Sort [ws_item_sk#14 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(ws_item_sk#14, 200) + : +- Exchange hashpartitioning(ws_item_sk#14, 5) : +- *(3) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[sum(UnscaledValue(ws_sales_price#15))]) - : +- Exchange hashpartitioning(ws_item_sk#14, d_date#9, 200) + : +- Exchange hashpartitioning(ws_item_sk#14, d_date#9, 5) : +- *(2) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[partial_sum(UnscaledValue(ws_sales_price#15))]) : +- *(2) Project [ws_item_sk#14, ws_sales_price#15, d_date#9] : +- *(2) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight @@ -25,13 +25,13 @@ TakeOrderedAndProject(limit=100, orderBy=[item_sk#1 ASC NULLS FIRST,d_date#2 ASC : +- *(1) Filter (((isnotnull(d_month_seq#18) && (d_month_seq#18 >= 1200)) && (d_month_seq#18 <= 1211)) && isnotnull(d_date_sk#17)) : +- *(1) FileScan parquet default.date_dim[d_date_sk#17,d_date#9,d_month_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct +- *(12) Sort [item_sk#8 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(item_sk#8, d_date#10, 200) + +- Exchange hashpartitioning(item_sk#8, d_date#10, 5) +- *(11) Project [item_sk#8, d_date#10, cume_sales#12] +- Window [sum(_w0#19) windowspecdefinition(ss_item_sk#20, d_date#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#12], [ss_item_sk#20], [d_date#10 ASC NULLS FIRST] +- *(10) Sort [ss_item_sk#20 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(ss_item_sk#20, 200) + +- Exchange hashpartitioning(ss_item_sk#20, 5) +- *(9) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[sum(UnscaledValue(ss_sales_price#21))]) - +- Exchange hashpartitioning(ss_item_sk#20, d_date#10, 200) + +- Exchange hashpartitioning(ss_item_sk#20, d_date#10, 5) +- *(8) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[partial_sum(UnscaledValue(ss_sales_price#21))]) +- *(8) Project [ss_item_sk#20, ss_sales_price#21, d_date#10] +- *(8) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt index af45acaf5..69b3733b7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt @@ -1,4 +1,4 @@ -TakeOrderedAndProject [item_sk,store_cumulative,store_sales,web_sales,d_date,web_cumulative] +TakeOrderedAndProject [store_cumulative,item_sk,web_cumulative,d_date,store_sales,web_sales] WholeStageCodegen Filter [web_cumulative,store_cumulative] InputAdapter @@ -8,7 +8,7 @@ TakeOrderedAndProject [item_sk,store_cumulative,store_sales,web_sales,d_date,web InputAdapter Exchange [item_sk] #1 WholeStageCodegen - Project [d_date,d_date,item_sk,item_sk,cume_sales,cume_sales] + Project [d_date,cume_sales,d_date,item_sk,item_sk,cume_sales] InputAdapter SortMergeJoin [item_sk,d_date,item_sk,d_date] WholeStageCodegen @@ -28,7 +28,7 @@ TakeOrderedAndProject [item_sk,store_cumulative,store_sales,web_sales,d_date,web InputAdapter Exchange [ws_item_sk,d_date] #4 WholeStageCodegen - HashAggregate [sum,ws_item_sk,sum,d_date,ws_sales_price] [sum,sum] + HashAggregate [d_date,sum,sum,ws_sales_price,ws_item_sk] [sum,sum] Project [ws_item_sk,ws_sales_price,d_date] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Project [ws_sold_date_sk,ws_item_sk,ws_sales_price] @@ -57,7 +57,7 @@ TakeOrderedAndProject [item_sk,store_cumulative,store_sales,web_sales,d_date,web InputAdapter Exchange [ss_item_sk,d_date] #8 WholeStageCodegen - HashAggregate [d_date,sum,ss_sales_price,sum,ss_item_sk] [sum,sum] + HashAggregate [sum,sum,ss_item_sk,d_date,ss_sales_price] [sum,sum] Project [ss_item_sk,ss_sales_price,d_date] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_item_sk,ss_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt index aefc40ed5..7a9e19e6c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,ext_price#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,ext_price#2]) +- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) - +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 200) + +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 5) +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt index 00c2d4b14..19793f4d1 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt @@ -1,10 +1,10 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand] WholeStageCodegen - HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),d_year,i_brand,i_brand_id,sum] [brand_id,sum(UnscaledValue(ss_ext_sales_price)),brand,sum,ext_price] + HashAggregate [i_brand_id,d_year,sum,sum(UnscaledValue(ss_ext_sales_price)),i_brand] [brand_id,ext_price,brand,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [d_year,i_brand,i_brand_id] #1 WholeStageCodegen - HashAggregate [sum,d_year,i_brand,ss_ext_sales_price,i_brand_id,sum] [sum,sum] + HashAggregate [i_brand_id,sum,d_year,ss_ext_sales_price,sum,i_brand] [sum,sum] Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [d_year,ss_item_sk,ss_ext_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt index 80e2ae182..f31d6ec98 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt @@ -4,9 +4,9 @@ TakeOrderedAndProject(limit=100, orderBy=[avg_quarterly_sales#1 ASC NULLS FIRST, +- *(7) Filter (CASE WHEN (avg_quarterly_sales#1 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#2 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) +- Window [avg(_w0#4) windowspecdefinition(i_manufact_id#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#1], [i_manufact_id#3] +- *(6) Sort [i_manufact_id#3 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_manufact_id#3, 200) + +- Exchange hashpartitioning(i_manufact_id#3, 5) +- *(5) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) - +- Exchange hashpartitioning(i_manufact_id#3, d_qoy#5, 200) + +- Exchange hashpartitioning(i_manufact_id#3, d_qoy#5, 5) +- *(4) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) +- *(4) Project [i_manufact_id#3, ss_sales_price#6, d_qoy#5] +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt index 6a8804ae2..6dc6fb614 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt @@ -13,7 +13,7 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] InputAdapter Exchange [i_manufact_id,d_qoy] #2 WholeStageCodegen - HashAggregate [d_qoy,sum,ss_sales_price,sum,i_manufact_id] [sum,sum] + HashAggregate [sum,sum,d_qoy,ss_sales_price,i_manufact_id] [sum,sum] Project [i_manufact_id,ss_sales_price,d_qoy] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] @@ -22,7 +22,7 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_manufact_id] Filter [i_category,i_class,i_brand,i_item_sk] - Scan parquet default.item [i_class,i_manufact_id,i_item_sk,i_category,i_brand] [i_class,i_manufact_id,i_item_sk,i_category,i_brand] + Scan parquet default.item [i_manufact_id,i_brand,i_category,i_item_sk,i_class] [i_manufact_id,i_brand,i_category,i_item_sk,i_class] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt index 142360af3..217fd8f04 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt @@ -1,10 +1,10 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customers#2 ASC NULLS FIRST], output=[segment#1,num_customers#2,segment_base#3]) +- *(13) HashAggregate(keys=[segment#1], functions=[count(1)]) - +- Exchange hashpartitioning(segment#1, 200) + +- Exchange hashpartitioning(segment#1, 5) +- *(12) HashAggregate(keys=[segment#1], functions=[partial_count(1)]) +- *(12) HashAggregate(keys=[c_customer_sk#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) - +- Exchange hashpartitioning(c_customer_sk#4, 200) + +- Exchange hashpartitioning(c_customer_sk#4, 5) +- *(11) HashAggregate(keys=[c_customer_sk#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) +- *(11) Project [c_customer_sk#4, ss_ext_sales_price#5] +- *(11) BroadcastHashJoin [ss_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight @@ -15,7 +15,7 @@ TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customer : : :- *(11) Project [c_customer_sk#4, c_current_addr_sk#12, ss_sold_date_sk#6, ss_ext_sales_price#5] : : : +- *(11) BroadcastHashJoin [c_customer_sk#4], [ss_customer_sk#14], Inner, BuildRight : : : :- *(11) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) - : : : : +- Exchange hashpartitioning(c_customer_sk#4, c_current_addr_sk#12, 200) + : : : : +- Exchange hashpartitioning(c_customer_sk#4, c_current_addr_sk#12, 5) : : : : +- *(6) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) : : : : +- *(6) Project [c_customer_sk#4, c_current_addr_sk#12] : : : : +- *(6) BroadcastHashJoin [customer_sk#15], [c_customer_sk#4], Inner, BuildRight @@ -56,32 +56,32 @@ TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customer : +- *(9) FileScan parquet default.store[s_county#10,s_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(10) Project [d_date_sk#7] - +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery10089)) && (d_month_seq#32 <= Subquery subquery10090)) && isnotnull(d_date_sk#7)) - : :- Subquery subquery10089 + +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery12329)) && (d_month_seq#32 <= Subquery subquery12330)) && isnotnull(d_date_sk#7)) + : :- Subquery subquery12329 : : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) - : : +- Exchange hashpartitioning((d_month_seq + 1)#33, 200) + : : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) : : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] : : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) : : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - : +- Subquery subquery10090 + : +- Subquery subquery12330 : +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) - : +- Exchange hashpartitioning((d_month_seq + 3)#34, 200) + : +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) : +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) : +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct +- *(10) FileScan parquet default.date_dim[d_date_sk#7,d_month_seq#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct - :- Subquery subquery10089 + :- Subquery subquery12329 : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) - : +- Exchange hashpartitioning((d_month_seq + 1)#33, 200) + : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - +- Subquery subquery10090 + +- Subquery subquery12330 +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) - +- Exchange hashpartitioning((d_month_seq + 3)#34, 200) + +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt index 3707aa2f8..b86ab1b6a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt @@ -14,7 +14,7 @@ TakeOrderedAndProject [segment,num_customers,segment_base] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price] BroadcastHashJoin [ca_county,ca_state,s_county,s_state] - Project [ca_state,c_customer_sk,ss_ext_sales_price,ca_county,ss_sold_date_sk] + Project [ca_state,c_customer_sk,ss_sold_date_sk,ca_county,ss_ext_sales_price] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] Project [c_customer_sk,c_current_addr_sk,ss_sold_date_sk,ss_ext_sales_price] BroadcastHashJoin [c_customer_sk,ss_customer_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt index eccd937f1..32402fb5f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand_id#2 ASC NULLS FIRST], output=[brand_id#2,brand#3,ext_price#1]) +- *(4) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[sum(UnscaledValue(ss_ext_sales_price#6))]) - +- Exchange hashpartitioning(i_brand#4, i_brand_id#5, 200) + +- Exchange hashpartitioning(i_brand#4, i_brand_id#5, 5) +- *(3) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#6))]) +- *(3) Project [ss_ext_sales_price#6, i_brand_id#5, i_brand#4] +- *(3) BroadcastHashJoin [ss_item_sk#7], [i_item_sk#8], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt index e5ff08959..33903ea48 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt @@ -1,10 +1,10 @@ TakeOrderedAndProject [ext_price,brand_id,brand] WholeStageCodegen - HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,ext_price,sum(UnscaledValue(ss_ext_sales_price)),sum,brand_id] + HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,sum(UnscaledValue(ss_ext_sales_price)),ext_price,brand_id,sum] InputAdapter Exchange [i_brand,i_brand_id] #1 WholeStageCodegen - HashAggregate [i_brand,sum,sum,ss_ext_sales_price,i_brand_id] [sum,sum] + HashAggregate [i_brand_id,ss_ext_sales_price,sum,sum,i_brand] [sum,sum] Project [ss_ext_sales_price,i_brand_id,i_brand] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_item_sk,ss_ext_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt index 458642360..eb5871bab 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt @@ -10,9 +10,9 @@ TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast( : : +- *(7) Filter (isnotnull(d_year#6) && (d_year#6 = 1999)) : : +- Window [rank(d_year#6, d_moy#7) windowspecdefinition(i_category#4, i_brand#5, cc_name#3, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#12], [i_category#4, i_brand#5, cc_name#3], [d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST] : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, cc_name#3 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST], false, 0 - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, 200) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, 5) : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[sum(UnscaledValue(cs_sales_price#22))]) - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 200) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 5) : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[partial_sum(UnscaledValue(cs_sales_price#22))]) : : +- *(4) Project [i_brand#5, i_category#4, cs_sales_price#22, d_year#6, d_moy#7, cc_name#3] : : +- *(4) BroadcastHashJoin [cs_call_center_sk#23], [cc_call_center_sk#24], Inner, BuildRight @@ -21,8 +21,8 @@ TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast( : : : :- *(4) Project [i_brand#5, i_category#4, cs_sold_date_sk#25, cs_call_center_sk#23, cs_sales_price#22] : : : : +- *(4) BroadcastHashJoin [i_item_sk#27], [cs_item_sk#28], Inner, BuildRight : : : : :- *(4) Project [i_item_sk#27, i_brand#5, i_category#4] - : : : : : +- *(4) Filter ((isnotnull(i_item_sk#27) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#27,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#27) && isnotnull(i_category#4)) && isnotnull(i_brand#5)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#27,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) : : : : +- *(1) Project [cs_sold_date_sk#25, cs_call_center_sk#23, cs_item_sk#28, cs_sales_price#22] : : : : +- *(1) Filter ((isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#25)) && isnotnull(cs_call_center_sk#23)) @@ -40,12 +40,12 @@ TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast( : +- *(14) Filter isnotnull(rn#20) : +- Window [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#17, i_brand#18, cc_name#19, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#20], [i_category#17, i_brand#18, cc_name#19], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] : +- *(13) Sort [i_category#17 ASC NULLS FIRST, i_brand#18 ASC NULLS FIRST, cc_name#19 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 200) + : +- Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 5) : +- *(12) HashAggregate(keys=[i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30], functions=[sum(UnscaledValue(cs_sales_price#22))]) - : +- ReusedExchange [i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30, sum#31], Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 200) + : +- ReusedExchange [i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30, sum#31], Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 5) +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] - 1))) +- *(21) Project [i_category#13, i_brand#14, cc_name#15, sum_sales#11, rn#16] +- *(21) Filter isnotnull(rn#16) +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#13, i_brand#14, cc_name#15, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#16], [i_category#13, i_brand#14, cc_name#15], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] +- *(20) Sort [i_category#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#15 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 - +- ReusedExchange [i_category#13, i_brand#14, cc_name#15, d_year#32, d_moy#33, sum_sales#11], Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 200) \ No newline at end of file + +- ReusedExchange [i_category#13, i_brand#14, cc_name#15, d_year#32, d_moy#33, sum_sales#11], Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 5) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt index d41aaf1e2..b482f3f5f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt @@ -1,35 +1,35 @@ -TakeOrderedAndProject [d_year,avg_monthly_sales,nsum,d_moy,sum_sales,i_category,i_brand,cc_name,psum] +TakeOrderedAndProject [i_category,nsum,cc_name,d_year,psum,sum_sales,avg_monthly_sales,i_brand,d_moy] WholeStageCodegen - Project [sum_sales,d_year,sum_sales,d_moy,sum_sales,i_category,i_brand,cc_name,avg_monthly_sales] - BroadcastHashJoin [cc_name,i_brand,rn,rn,i_category,i_category,i_brand,cc_name] - Project [d_year,d_moy,sum_sales,rn,i_brand,cc_name,sum_sales,i_category,avg_monthly_sales] - BroadcastHashJoin [i_category,cc_name,rn,i_category,rn,i_brand,cc_name,i_brand] - Project [d_year,d_moy,sum_sales,rn,i_brand,cc_name,i_category,avg_monthly_sales] + Project [i_category,d_year,avg_monthly_sales,sum_sales,sum_sales,sum_sales,i_brand,cc_name,d_moy] + BroadcastHashJoin [i_category,i_brand,i_category,rn,cc_name,rn,i_brand,cc_name] + Project [cc_name,d_moy,sum_sales,sum_sales,d_year,rn,avg_monthly_sales,i_category,i_brand] + BroadcastHashJoin [i_category,i_category,i_brand,rn,cc_name,rn,i_brand,cc_name] + Project [cc_name,d_moy,sum_sales,d_year,rn,avg_monthly_sales,i_category,i_brand] Filter [avg_monthly_sales,sum_sales,rn] InputAdapter - Window [d_year,i_brand,cc_name,i_category,_w0] + Window [cc_name,d_year,i_category,i_brand,_w0] WholeStageCodegen Filter [d_year] InputAdapter - Window [d_year,d_moy,i_brand,cc_name,i_category] + Window [cc_name,d_moy,d_year,i_category,i_brand] WholeStageCodegen - Sort [d_year,d_moy,i_brand,cc_name,i_category] + Sort [cc_name,d_moy,d_year,i_category,i_brand] InputAdapter Exchange [i_category,i_brand,cc_name] #1 WholeStageCodegen - HashAggregate [d_year,d_moy,sum(UnscaledValue(cs_sales_price)),i_brand,sum,cc_name,i_category] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] + HashAggregate [sum(UnscaledValue(cs_sales_price)),cc_name,d_moy,sum,d_year,i_category,i_brand] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] InputAdapter - Exchange [d_year,d_moy,i_brand,cc_name,i_category] #2 + Exchange [cc_name,d_moy,d_year,i_category,i_brand] #2 WholeStageCodegen - HashAggregate [d_year,d_moy,i_brand,sum,sum,cc_name,i_category,cs_sales_price] [sum,sum] - Project [d_moy,d_year,cs_sales_price,i_category,cc_name,i_brand] + HashAggregate [cc_name,d_moy,cs_sales_price,sum,sum,d_year,i_category,i_brand] [sum,sum] + Project [d_year,i_brand,i_category,cc_name,d_moy,cs_sales_price] BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] - Project [d_moy,d_year,cs_sales_price,i_category,i_brand,cs_call_center_sk] + Project [cs_call_center_sk,d_year,i_brand,i_category,d_moy,cs_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sales_price,cs_sold_date_sk,i_category,i_brand,cs_call_center_sk] + Project [cs_call_center_sk,i_brand,i_category,cs_sold_date_sk,cs_sales_price] BroadcastHashJoin [i_item_sk,cs_item_sk] Project [i_item_sk,i_brand,i_category] - Filter [i_item_sk,i_brand,i_category] + Filter [i_item_sk,i_category,i_brand] Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] InputAdapter BroadcastExchange #3 @@ -52,26 +52,26 @@ TakeOrderedAndProject [d_year,avg_monthly_sales,nsum,d_moy,sum_sales,i_category, InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [cc_name,sum_sales,i_brand,rn,i_category] + Project [i_category,sum_sales,rn,cc_name,i_brand] Filter [rn] InputAdapter - Window [cc_name,i_brand,d_moy,d_year,i_category] + Window [i_category,d_year,d_moy,cc_name,i_brand] WholeStageCodegen - Sort [cc_name,i_brand,d_moy,d_year,i_category] + Sort [i_category,d_year,d_moy,cc_name,i_brand] InputAdapter Exchange [i_category,i_brand,cc_name] #7 WholeStageCodegen - HashAggregate [sum(UnscaledValue(cs_sales_price)),sum,cc_name,i_brand,d_moy,d_year,i_category] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] + HashAggregate [sum(UnscaledValue(cs_sales_price)),i_category,d_year,d_moy,sum,cc_name,i_brand] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] InputAdapter - ReusedExchange [cc_name,i_brand,d_moy,sum,d_year,i_category] [cc_name,i_brand,d_moy,sum,d_year,i_category] #2 + ReusedExchange [i_category,sum,d_year,d_moy,cc_name,i_brand] [i_category,sum,d_year,d_moy,cc_name,i_brand] #2 InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [cc_name,rn,i_brand,i_category,sum_sales] + Project [i_brand,i_category,cc_name,sum_sales,rn] Filter [rn] InputAdapter - Window [cc_name,d_year,i_brand,i_category,d_moy] + Window [i_brand,i_category,cc_name,d_moy,d_year] WholeStageCodegen - Sort [cc_name,d_year,i_brand,i_category,d_moy] + Sort [i_brand,i_category,cc_name,d_moy,d_year] InputAdapter - ReusedExchange [cc_name,d_year,i_brand,i_category,d_moy,sum_sales] [cc_name,d_year,i_brand,i_category,d_moy,sum_sales] #7 + ReusedExchange [i_brand,i_category,cc_name,d_moy,sum_sales,d_year] [i_brand,i_category,cc_name,d_moy,sum_sales,d_year] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt index 5b1f75cde..dacb7f6b6 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt @@ -6,7 +6,7 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : +- *(15) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight, ((((cast(ss_item_rev#2 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#4)), DecimalType(19,3))) && (cast(ss_item_rev#2 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#4)), DecimalType(20,3)))) && (cast(cs_item_rev#4 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#2)), DecimalType(19,3)))) && (cast(cs_item_rev#4 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#2)), DecimalType(20,3)))) : :- *(15) Filter isnotnull(ss_item_rev#2) : : +- *(15) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ss_ext_sales_price#12))]) - : : +- Exchange hashpartitioning(i_item_id#11, 200) + : : +- Exchange hashpartitioning(i_item_id#11, 5) : : +- *(4) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#12))]) : : +- *(4) Project [ss_ext_sales_price#12, i_item_id#11] : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight @@ -27,20 +27,20 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) : : +- *(2) Project [d_date#17 AS d_date#17#18] - : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12416)) - : : : +- Subquery subquery12416 + : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery14656)) + : : : +- Subquery subquery14656 : : : +- *(1) Project [d_week_seq#19] : : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - : : +- Subquery subquery12416 + : : +- Subquery subquery14656 : : +- *(1) Project [d_week_seq#19] : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : +- *(9) Filter isnotnull(cs_item_rev#4) : +- *(9) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(cs_ext_sales_price#20))]) - : +- Exchange hashpartitioning(i_item_id#11, 200) + : +- Exchange hashpartitioning(i_item_id#11, 5) : +- *(8) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#20))]) : +- *(8) Project [cs_ext_sales_price#20, i_item_id#11] : +- *(8) BroadcastHashJoin [cs_sold_date_sk#21], [d_date_sk#14], Inner, BuildRight @@ -58,20 +58,20 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : : +- *(7) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) : +- *(6) Project [d_date#17 AS d_date#17#23] - : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12420)) - : : +- Subquery subquery12420 + : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery14660)) + : : +- Subquery subquery14660 : : +- *(1) Project [d_week_seq#19] : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct : +- *(6) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - : +- Subquery subquery12420 + : +- Subquery subquery14660 : +- *(1) Project [d_week_seq#19] : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) +- *(14) Filter isnotnull(ws_item_rev#6) +- *(14) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ws_ext_sales_price#24))]) - +- Exchange hashpartitioning(i_item_id#11, 200) + +- Exchange hashpartitioning(i_item_id#11, 5) +- *(13) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#24))]) +- *(13) Project [ws_ext_sales_price#24, i_item_id#11] +- *(13) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#14], Inner, BuildRight @@ -89,13 +89,13 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : +- *(12) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) +- *(11) Project [d_date#17 AS d_date#17#27] - +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12424)) - : +- Subquery subquery12424 + +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery14664)) + : +- Subquery subquery14664 : +- *(1) Project [d_week_seq#19] : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct +- *(11) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - +- Subquery subquery12424 + +- Subquery subquery14664 +- *(1) Project [d_week_seq#19] +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt index fd4503647..c6611f36d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt @@ -1,7 +1,7 @@ -TakeOrderedAndProject [cs_item_rev,item_id,ws_dev,ws_item_rev,average,cs_dev,ss_item_rev,ss_dev] +TakeOrderedAndProject [ws_dev,cs_dev,item_id,ws_item_rev,cs_item_rev,average,ss_dev,ss_item_rev] WholeStageCodegen Project [item_id,ss_item_rev,cs_item_rev,ws_item_rev] - BroadcastHashJoin [cs_item_rev,item_id,ws_item_rev,item_id,ss_item_rev] + BroadcastHashJoin [item_id,ws_item_rev,item_id,cs_item_rev,ss_item_rev] Project [item_id,ss_item_rev,cs_item_rev] BroadcastHashJoin [item_id,item_id,ss_item_rev,cs_item_rev] Filter [ss_item_rev] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt index 56e7c1173..9236b71dd 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt @@ -7,7 +7,7 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name1#1 ASC NULLS FIRST,s_stor : :- *(10) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29, s_store_name#27] : : +- *(10) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight : : :- *(10) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) - : : : +- Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 200) + : : : +- Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 5) : : : +- *(2) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) : : : +- *(2) Project [ss_store_sk#38, ss_sales_price#41, d_week_seq#28, d_day_name#40] : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#42], [d_date_sk#43], Inner, BuildRight @@ -32,7 +32,7 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name1#1 ASC NULLS FIRST,s_stor :- *(9) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29] : +- *(9) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight : :- *(9) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) - : : +- ReusedExchange [d_week_seq#28, ss_store_sk#38, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51, sum#52], Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 200) + : : +- ReusedExchange [d_week_seq#28, ss_store_sk#38, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51, sum#52], Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 5) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(7) Project [s_store_sk#39, s_store_id#29] : +- *(7) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt index 0ffe4eb4e..e7542c50d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt @@ -1,16 +1,16 @@ -TakeOrderedAndProject [(wed_sales1 / wed_sales2),d_week_seq1,(mon_sales1 / mon_sales2),(thu_sales1 / thu_sales2),(sat_sales1 / sat_sales2),s_store_name1,(sun_sales1 / sun_sales2),s_store_id1,(fri_sales1 / fri_sales2),(tue_sales1 / tue_sales2)] +TakeOrderedAndProject [(sun_sales1 / sun_sales2),s_store_name1,d_week_seq1,(thu_sales1 / thu_sales2),(fri_sales1 / fri_sales2),(tue_sales1 / tue_sales2),(sat_sales1 / sat_sales2),(wed_sales1 / wed_sales2),(mon_sales1 / mon_sales2),s_store_id1] WholeStageCodegen - Project [sat_sales1,sat_sales2,sun_sales2,tue_sales1,d_week_seq1,mon_sales1,fri_sales1,wed_sales1,thu_sales1,mon_sales2,s_store_name1,s_store_id1,wed_sales2,thu_sales2,fri_sales2,tue_sales2,sun_sales1] + Project [sun_sales2,thu_sales1,s_store_name1,sat_sales1,tue_sales2,mon_sales2,d_week_seq1,thu_sales2,wed_sales2,wed_sales1,sat_sales2,mon_sales1,sun_sales1,fri_sales2,fri_sales1,tue_sales1,s_store_id1] BroadcastHashJoin [s_store_id1,d_week_seq1,s_store_id2,d_week_seq2] - Project [fri_sales,tue_sales,sat_sales,s_store_id,sun_sales,d_week_seq,wed_sales,mon_sales,s_store_name,thu_sales] + Project [sat_sales,s_store_id,mon_sales,sun_sales,s_store_name,fri_sales,wed_sales,tue_sales,d_week_seq,thu_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - Project [s_store_id,s_store_name,thu_sales,sun_sales,fri_sales,tue_sales,d_week_seq,wed_sales,sat_sales,mon_sales] + Project [fri_sales,d_week_seq,thu_sales,wed_sales,mon_sales,s_store_name,sat_sales,s_store_id,sun_sales,tue_sales] BroadcastHashJoin [ss_store_sk,s_store_sk] - HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,sum,ss_store_sk,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),thu_sales,sun_sales,sum,fri_sales,tue_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,wed_sales,sat_sales,sum,mon_sales,sum] + HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),d_week_seq,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,ss_store_sk,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum,sum] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),fri_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),thu_sales,sum,wed_sales,mon_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sat_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sun_sales,sum,sum,tue_sales] InputAdapter Exchange [d_week_seq,ss_store_sk] #1 WholeStageCodegen - HashAggregate [sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,ss_store_sk,d_week_seq,ss_sales_price,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + HashAggregate [sum,d_week_seq,sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,ss_store_sk,sum,ss_sales_price,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] Project [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_store_sk,ss_sales_price] @@ -37,13 +37,13 @@ TakeOrderedAndProject [(wed_sales1 / wed_sales2),d_week_seq1,(mon_sales1 / mon_s InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [fri_sales,tue_sales,sat_sales,s_store_id,sun_sales,d_week_seq,wed_sales,mon_sales,thu_sales] + Project [sat_sales,s_store_id,mon_sales,sun_sales,fri_sales,wed_sales,tue_sales,d_week_seq,thu_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - Project [s_store_id,thu_sales,sun_sales,fri_sales,tue_sales,d_week_seq,wed_sales,sat_sales,mon_sales] + Project [fri_sales,d_week_seq,thu_sales,wed_sales,mon_sales,sat_sales,s_store_id,sun_sales,tue_sales] BroadcastHashJoin [ss_store_sk,s_store_sk] - HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,ss_store_sk,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),thu_sales,sum,sun_sales,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,wed_sales,sat_sales,sum,mon_sales] + HashAggregate [sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),d_week_seq,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),ss_store_sk,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum,sum] [sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),fri_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,thu_sales,wed_sales,mon_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sat_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum,sun_sales,sum,tue_sales] InputAdapter - ReusedExchange [sum,sum,sum,ss_store_sk,d_week_seq,sum,sum,sum,sum] [sum,sum,sum,ss_store_sk,d_week_seq,sum,sum,sum,sum] #1 + ReusedExchange [sum,d_week_seq,sum,sum,sum,ss_store_sk,sum,sum,sum] [sum,d_week_seq,sum,sum,sum,ss_store_sk,sum,sum,sum] #1 InputAdapter BroadcastExchange #6 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt index 410992cf5..c8985e93a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt @@ -3,7 +3,7 @@ TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state# +- *(8) Project [state#2, cnt#1] +- *(8) Filter (count(1)#3 >= 10) +- *(8) HashAggregate(keys=[ca_state#4], functions=[count(1)]) - +- Exchange hashpartitioning(ca_state#4, 200) + +- Exchange hashpartitioning(ca_state#4, 5) +- *(7) HashAggregate(keys=[ca_state#4], functions=[partial_count(1)]) +- *(7) Project [ca_state#4] +- *(7) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight @@ -26,18 +26,18 @@ TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state# : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#5,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [d_date_sk#8] - : +- *(3) Filter ((isnotnull(d_month_seq#13) && (d_month_seq#13 = Subquery subquery982)) && isnotnull(d_date_sk#8)) - : : +- Subquery subquery982 + : +- *(3) Filter ((isnotnull(d_month_seq#13) && (d_month_seq#13 = Subquery subquery3222)) && isnotnull(d_date_sk#8)) + : : +- Subquery subquery3222 : : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) - : : +- Exchange hashpartitioning(d_month_seq#13, 200) + : : +- Exchange hashpartitioning(d_month_seq#13, 5) : : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) : : +- *(1) Project [d_month_seq#13] : : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) : : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct - : +- Subquery subquery982 + : +- Subquery subquery3222 : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) - : +- Exchange hashpartitioning(d_month_seq#13, 200) + : +- Exchange hashpartitioning(d_month_seq#13, 5) : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) : +- *(1) Project [d_month_seq#13] : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) @@ -51,7 +51,7 @@ TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state# : +- *(6) FileScan parquet default.item[i_item_sk#6,i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) +- *(5) HashAggregate(keys=[i_category#18], functions=[avg(UnscaledValue(i_current_price#16))]) - +- Exchange hashpartitioning(i_category#18, 200) + +- Exchange hashpartitioning(i_category#18, 5) +- *(4) HashAggregate(keys=[i_category#18], functions=[partial_avg(UnscaledValue(i_current_price#16))]) +- *(4) Project [i_current_price#16, i_category#18] +- *(4) Filter isnotnull(i_category#18) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt index 59ea8c0d9..72f771ed1 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt @@ -2,7 +2,7 @@ TakeOrderedAndProject [cnt,state] WholeStageCodegen Project [state,cnt] Filter [count(1)] - HashAggregate [ca_state,count,count(1)] [state,cnt,count(1),count,count(1)] + HashAggregate [ca_state,count,count(1)] [cnt,count(1),count(1),state,count] InputAdapter Exchange [ca_state] #1 WholeStageCodegen @@ -68,11 +68,11 @@ TakeOrderedAndProject [cnt,state] InputAdapter BroadcastExchange #7 WholeStageCodegen - HashAggregate [i_category,sum,count,avg(UnscaledValue(i_current_price))] [avg(i_current_price),sum,i_category,count,avg(UnscaledValue(i_current_price))] + HashAggregate [i_category,sum,count,avg(UnscaledValue(i_current_price))] [count,i_category,avg(UnscaledValue(i_current_price)),avg(i_current_price),sum] InputAdapter Exchange [i_category] #8 WholeStageCodegen - HashAggregate [sum,sum,count,i_category,i_current_price,count] [sum,count,sum,count] + HashAggregate [count,count,i_category,i_current_price,sum,sum] [sum,count,sum,count] Project [i_current_price,i_category] Filter [i_category] Scan parquet default.item [i_current_price,i_category] [i_current_price,i_category] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt index 39a31d2ee..7d6d31c1d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt @@ -19,11 +19,11 @@ TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(1 BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] + Project [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_ext_sales_price] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] - Filter [ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_item_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] + Project [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] + Filter [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_store_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #2 WholeStageCodegen @@ -77,9 +77,9 @@ TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(1 BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] InputAdapter ReusedExchange [s_store_sk] [s_store_sk] #2 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt index 40133f982..4e61b82c8 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[substring(w_warehouse_name, 1, 20)#1 ASC NULLS FIRST,sm_type#2 ASC NULLS FIRST,web_name#3 ASC NULLS FIRST], output=[substring(w_warehouse_name, 1, 20)#1,sm_type#2,web_name#3,30 days #4,31 - 60 days #5,61 - 90 days #6,91 - 120 days #7,>120 days #8]) +- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3, 200) + +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3, 5) +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) +- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, w_warehouse_name#9, sm_type#2, web_name#3] +- *(5) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt index a6a12999a..f487c91aa 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt @@ -1,21 +1,21 @@ -TakeOrderedAndProject [web_name,substring(w_warehouse_name, 1, 20),91 - 120 days ,31 - 60 days ,>120 days ,30 days ,sm_type,61 - 90 days ] +TakeOrderedAndProject [91 - 120 days ,web_name,>120 days ,30 days ,61 - 90 days ,substring(w_warehouse_name, 1, 20),31 - 60 days ,sm_type] WholeStageCodegen - HashAggregate [web_name,sum,sum,sum,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum,sm_type,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint))] [substring(w_warehouse_name, 1, 20),sum,sum,91 - 120 days ,31 - 60 days ,>120 days ,sum,30 days ,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint))] + HashAggregate [sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum,sum,web_name,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,sum,sum,sm_type,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint))] [91 - 120 days ,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum,sum,>120 days ,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),30 days ,61 - 90 days ,substring(w_warehouse_name, 1, 20),31 - 60 days ,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,sum,sum,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint))] InputAdapter Exchange [substring(w_warehouse_name, 1, 20),sm_type,web_name] #1 WholeStageCodegen - HashAggregate [web_name,sum,sum,ws_sold_date_sk,w_warehouse_name,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sm_type,sum,sum,ws_ship_date_sk] [sum,sum,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum] - Project [web_name,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] + HashAggregate [sum,sum,w_warehouse_name,sum,sum,web_name,sum,substring(w_warehouse_name, 1, 20),sum,ws_ship_date_sk,ws_sold_date_sk,sum,sum,sum,sum,sm_type] [sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum] + Project [web_name,ws_ship_date_sk,w_warehouse_name,sm_type,ws_sold_date_sk] BroadcastHashJoin [ws_ship_date_sk,d_date_sk] - Project [web_name,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] + Project [web_name,ws_ship_date_sk,w_warehouse_name,sm_type,ws_sold_date_sk] BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_web_site_sk,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] + Project [ws_ship_date_sk,ws_web_site_sk,w_warehouse_name,sm_type,ws_sold_date_sk] BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] - Project [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,w_warehouse_name] + Project [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,w_warehouse_name,ws_sold_date_sk] BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] - Project [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] + Project [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk] - Scan parquet default.web_sales [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] + Scan parquet default.web_sales [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt index c1e90579f..98e27704a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt @@ -4,9 +4,9 @@ TakeOrderedAndProject(limit=100, orderBy=[i_manager_id#1 ASC NULLS FIRST,avg_mon +- *(7) Filter (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#3 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) +- Window [avg(_w0#4) windowspecdefinition(i_manager_id#1, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_manager_id#1] +- *(6) Sort [i_manager_id#1 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_manager_id#1, 200) + +- Exchange hashpartitioning(i_manager_id#1, 5) +- *(5) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) - +- Exchange hashpartitioning(i_manager_id#1, d_moy#5, 200) + +- Exchange hashpartitioning(i_manager_id#1, d_moy#5, 5) +- *(4) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) +- *(4) Project [i_manager_id#1, ss_sales_price#6, d_moy#5] +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt index 941d3fe37..1bc6b52a7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt @@ -13,7 +13,7 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] InputAdapter Exchange [i_manager_id,d_moy] #2 WholeStageCodegen - HashAggregate [d_moy,sum,ss_sales_price,i_manager_id,sum] [sum,sum] + HashAggregate [sum,i_manager_id,d_moy,sum,ss_sales_price] [sum,sum] Project [i_manager_id,ss_sales_price,d_moy] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [i_manager_id,ss_store_sk,ss_sales_price,d_moy] @@ -22,7 +22,7 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_manager_id] Filter [i_category,i_class,i_brand,i_item_sk] - Scan parquet default.item [i_class,i_item_sk,i_manager_id,i_category,i_brand] [i_class,i_item_sk,i_manager_id,i_category,i_brand] + Scan parquet default.item [i_manager_id,i_brand,i_category,i_item_sk,i_class] [i_manager_id,i_brand,i_category,i_item_sk,i_class] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt index fde217dca..9319775c4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt @@ -1,10 +1,10 @@ == Physical Plan == *(43) Sort [product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST, 200) ++- Exchange rangepartitioning(product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST, 5) +- *(42) Project [product_name#1, store_name#2, store_zip#4, b_street_number#5, b_streen_name#6, b_city#7, b_zip#8, c_street_number#9, c_street_name#10, c_city#11, c_zip#12, syear#13, cnt#14, s1#15, s2#16, s3#17, s1#18, s2#19, s3#20, syear#21, cnt#3] +- *(42) BroadcastHashJoin [item_sk#22, store_name#2, store_zip#4], [item_sk#23, store_name#24, store_zip#25], Inner, BuildRight, (cnt#3 <= cnt#14) :- *(42) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) - : +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 200) + : +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 5) : +- *(20) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) : +- *(20) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] : +- *(20) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight @@ -51,7 +51,7 @@ : : : : : : : : : : : : : : : : +- *(4) Project [cs_item_sk#75] : : : : : : : : : : : : : : : : +- *(4) Filter (isnotnull(sum(cs_ext_list_price#79)#80) && (cast(sum(cs_ext_list_price#79)#80 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))#84)), DecimalType(21,2)))) : : : : : : : : : : : : : : : : +- *(4) HashAggregate(keys=[cs_item_sk#75], functions=[sum(UnscaledValue(cs_ext_list_price#79)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) - : : : : : : : : : : : : : : : : +- Exchange hashpartitioning(cs_item_sk#75, 200) + : : : : : : : : : : : : : : : : +- Exchange hashpartitioning(cs_item_sk#75, 5) : : : : : : : : : : : : : : : : +- *(3) HashAggregate(keys=[cs_item_sk#75], functions=[partial_sum(UnscaledValue(cs_ext_list_price#79)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) : : : : : : : : : : : : : : : : +- *(3) Project [cs_item_sk#75, cs_ext_list_price#79, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] : : : : : : : : : : : : : : : : +- *(3) BroadcastHashJoin [cs_item_sk#75, cs_order_number#85], [cr_item_sk#86, cr_order_number#87], Inner, BuildRight @@ -109,7 +109,7 @@ : +- *(19) FileScan parquet default.item[i_item_sk#27,i_current_price#88,i_color#89,i_product_name#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), Greater..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, string, true], input[2, string, true])) +- *(41) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) - +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 200) + +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 5) +- *(40) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) +- *(40) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] +- *(40) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt index 7336416f6..9ff14d371 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt @@ -3,50 +3,50 @@ WholeStageCodegen InputAdapter Exchange [product_name,store_name,cnt] #1 WholeStageCodegen - Project [cnt,b_streen_name,product_name,b_city,b_zip,syear,c_street_name,store_zip,c_street_number,s1,s3,c_zip,s3,s1,b_street_number,s2,store_name,cnt,s2,syear,c_city] - BroadcastHashJoin [cnt,store_zip,store_name,store_zip,store_name,item_sk,cnt,item_sk] - HashAggregate [d_year,ca_street_name,count,sum(UnscaledValue(ss_coupon_amt)),sum,ca_street_number,ca_street_name,sum(UnscaledValue(ss_list_price)),ca_zip,count(1),d_year,ca_city,sum,i_item_sk,sum,ca_street_number,d_year,ca_city,s_store_name,s_zip,sum(UnscaledValue(ss_wholesale_cost)),ca_zip,i_product_name] [c_city,b_streen_name,count,b_zip,sum(UnscaledValue(ss_coupon_amt)),sum,b_city,c_street_name,s3,sum(UnscaledValue(ss_list_price)),cnt,b_street_number,c_street_number,item_sk,count(1),store_zip,s2,product_name,sum,sum,store_name,c_zip,sum(UnscaledValue(ss_wholesale_cost)),s1,syear] + Project [b_streen_name,s2,s3,b_street_number,c_street_name,b_city,s1,syear,cnt,s2,b_zip,c_city,c_street_number,s1,syear,product_name,c_zip,store_zip,s3,cnt,store_name] + BroadcastHashJoin [store_zip,item_sk,cnt,item_sk,store_name,store_zip,cnt,store_name] + HashAggregate [ca_street_number,sum(UnscaledValue(ss_list_price)),ca_street_name,ca_city,s_store_name,s_zip,count(1),ca_street_number,ca_zip,d_year,i_item_sk,ca_city,d_year,sum,ca_street_name,sum,ca_zip,sum,d_year,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_wholesale_cost)),i_product_name,count] [store_name,sum(UnscaledValue(ss_list_price)),s3,b_streen_name,c_city,count(1),product_name,b_street_number,c_street_number,b_city,s1,store_zip,cnt,s2,item_sk,b_zip,c_zip,sum,sum,sum,c_street_name,syear,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_wholesale_cost)),count] InputAdapter - Exchange [d_year,ca_street_name,ca_street_number,ca_street_name,ca_zip,d_year,ca_city,i_item_sk,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] #2 + Exchange [ca_street_number,ca_street_name,ca_city,s_store_name,s_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_city,d_year,ca_street_name,ca_zip,d_year,i_product_name] #2 WholeStageCodegen - HashAggregate [d_year,ca_street_name,count,sum,sum,ca_street_number,ss_wholesale_cost,ca_street_name,sum,count,ca_zip,ss_coupon_amt,d_year,sum,ca_city,ss_list_price,sum,i_item_sk,sum,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] [count,sum,sum,sum,count,sum,sum,sum] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,d_year,ca_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_street_name,ca_city,i_product_name,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + HashAggregate [ca_street_number,count,ca_street_name,sum,ca_city,s_store_name,s_zip,ss_wholesale_cost,ss_coupon_amt,ca_street_number,sum,ca_zip,d_year,sum,i_item_sk,ca_city,d_year,sum,ca_street_name,sum,ca_zip,sum,ss_list_price,d_year,i_product_name,count] [count,sum,sum,sum,sum,sum,sum,count] + Project [ca_city,d_year,i_product_name,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,s_zip,i_item_sk,ca_street_number,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,hd_income_band_sk,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,hd_income_band_sk,d_year,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [d_year,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,hd_income_band_sk,d_year,s_zip,hd_income_band_sk,d_year,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_addr_sk,ss_item_sk,s_zip,hd_income_band_sk,hd_income_band_sk,s_store_name,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,hd_income_band_sk,d_year,s_zip,d_year,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_addr_sk,ss_item_sk,s_zip,hd_income_band_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_addr_sk,ss_item_sk,s_zip,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,cd_marital_status,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,cd_marital_status,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,c_first_shipto_date_sk,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,c_current_addr_sk] BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,c_first_sales_date_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,c_first_shipto_date_sk,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,c_first_sales_date_sk,ss_coupon_amt,c_current_addr_sk] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ss_customer_sk,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,ss_coupon_amt] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,d_year,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_store_sk,ss_coupon_amt] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] BroadcastHashJoin [ss_item_sk,cs_item_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] - Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_ticket_number,ss_addr_sk,ss_item_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] + Filter [ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_store_sk,ss_sold_date_sk,ss_addr_sk,ss_cdemo_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -58,12 +58,12 @@ WholeStageCodegen WholeStageCodegen Project [cs_item_sk] Filter [sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2)))] - HashAggregate [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),cs_item_sk,sum,sum(UnscaledValue(cs_ext_list_price)),sum] [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum,sum(UnscaledValue(cs_ext_list_price)),sum] + HashAggregate [sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),cs_item_sk,sum,sum(UnscaledValue(cs_ext_list_price))] [sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(cs_ext_list_price),sum,sum(UnscaledValue(cs_ext_list_price))] InputAdapter Exchange [cs_item_sk] #5 WholeStageCodegen - HashAggregate [sum,sum,cs_item_sk,sum,cr_refunded_cash,cr_reversed_charge,cr_store_credit,sum,cs_ext_list_price] [sum,sum,sum,sum] - Project [cr_store_credit,cs_item_sk,cr_refunded_cash,cs_ext_list_price,cr_reversed_charge] + HashAggregate [sum,cs_ext_list_price,sum,sum,cs_item_sk,cr_store_credit,cr_refunded_cash,sum,cr_reversed_charge] [sum,sum,sum,sum] + Project [cs_ext_list_price,cr_refunded_cash,cr_store_credit,cs_item_sk,cr_reversed_charge] BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] Project [cs_item_sk,cs_order_number,cs_ext_list_price] Filter [cs_item_sk,cs_order_number] @@ -71,9 +71,9 @@ WholeStageCodegen InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] + Project [cr_order_number,cr_refunded_cash,cr_store_credit,cr_item_sk,cr_reversed_charge] Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] + Scan parquet default.catalog_returns [cr_order_number,cr_refunded_cash,cr_store_credit,cr_item_sk,cr_reversed_charge] [cr_order_number,cr_refunded_cash,cr_store_credit,cr_item_sk,cr_reversed_charge] InputAdapter BroadcastExchange #7 WholeStageCodegen @@ -89,9 +89,9 @@ WholeStageCodegen InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] - Filter [c_current_cdemo_sk,c_customer_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_addr_sk] - Scan parquet default.customer [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] + Project [c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_current_addr_sk] + Filter [c_first_shipto_date_sk,c_current_hdemo_sk,c_current_cdemo_sk,c_customer_sk,c_current_addr_sk,c_first_sales_date_sk] + Scan parquet default.customer [c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_current_addr_sk] [c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_current_addr_sk] InputAdapter BroadcastExchange #10 WholeStageCodegen @@ -125,11 +125,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #14 WholeStageCodegen - Project [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] + Project [ca_address_sk,ca_city,ca_street_number,ca_zip,ca_street_name] Filter [ca_address_sk] - Scan parquet default.customer_address [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_street_number,ca_zip,ca_street_name] [ca_address_sk,ca_city,ca_street_number,ca_zip,ca_street_name] InputAdapter - ReusedExchange [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] #14 + ReusedExchange [ca_city,ca_street_name,ca_address_sk,ca_street_number,ca_zip] [ca_city,ca_street_name,ca_address_sk,ca_street_number,ca_zip] #14 InputAdapter BroadcastExchange #15 WholeStageCodegen @@ -147,48 +147,48 @@ WholeStageCodegen InputAdapter BroadcastExchange #17 WholeStageCodegen - HashAggregate [d_year,ca_street_name,count,sum(UnscaledValue(ss_coupon_amt)),ca_street_number,sum,ca_street_name,sum(UnscaledValue(ss_list_price)),ca_zip,count(1),d_year,ca_city,sum,i_item_sk,ca_street_number,sum,d_year,ca_city,s_store_name,s_zip,sum(UnscaledValue(ss_wholesale_cost)),ca_zip,i_product_name] [count,sum(UnscaledValue(ss_coupon_amt)),syear,s3,sum,sum(UnscaledValue(ss_list_price)),count(1),store_name,cnt,sum,s2,item_sk,s1,sum,store_zip,sum(UnscaledValue(ss_wholesale_cost))] + HashAggregate [ca_street_number,sum(UnscaledValue(ss_list_price)),ca_street_name,ca_city,s_store_name,s_zip,count(1),sum,ca_street_number,ca_zip,d_year,sum,sum,i_item_sk,ca_city,d_year,ca_street_name,ca_zip,d_year,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_wholesale_cost)),i_product_name,count] [sum(UnscaledValue(ss_list_price)),s2,s3,count(1),sum,syear,cnt,store_zip,sum,item_sk,sum,store_name,s1,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_wholesale_cost)),count] InputAdapter - Exchange [d_year,ca_street_name,ca_street_number,ca_street_name,ca_zip,d_year,ca_city,i_item_sk,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] #18 + Exchange [ca_street_number,ca_street_name,ca_city,s_store_name,s_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_city,d_year,ca_street_name,ca_zip,d_year,i_product_name] #18 WholeStageCodegen - HashAggregate [d_year,ca_street_name,count,ca_street_number,ss_wholesale_cost,sum,sum,ca_street_name,count,ca_zip,ss_coupon_amt,d_year,ca_city,ss_list_price,sum,sum,i_item_sk,sum,ca_street_number,sum,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] [count,sum,sum,count,sum,sum,sum,sum] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,d_year,ca_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_street_name,ca_city,i_product_name,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + HashAggregate [ca_street_number,count,ca_street_name,sum,ca_city,s_store_name,s_zip,sum,ss_wholesale_cost,ss_coupon_amt,ca_street_number,ca_zip,d_year,sum,sum,i_item_sk,ca_city,d_year,ca_street_name,ca_zip,sum,ss_list_price,sum,d_year,i_product_name,count] [count,sum,sum,sum,sum,sum,sum,count] + Project [ca_city,d_year,i_product_name,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,s_zip,i_item_sk,ca_street_number,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,hd_income_band_sk,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,hd_income_band_sk,d_year,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [d_year,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,hd_income_band_sk,d_year,s_zip,hd_income_band_sk,d_year,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_addr_sk,ss_item_sk,s_zip,hd_income_band_sk,hd_income_band_sk,s_store_name,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,hd_income_band_sk,d_year,s_zip,d_year,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_addr_sk,ss_item_sk,s_zip,hd_income_band_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_addr_sk,ss_item_sk,s_zip,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,cd_marital_status,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,cd_marital_status,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,c_first_shipto_date_sk,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,c_current_addr_sk] BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,c_first_sales_date_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,c_first_shipto_date_sk,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,c_first_sales_date_sk,ss_coupon_amt,c_current_addr_sk] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ss_customer_sk,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,ss_coupon_amt] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,d_year,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [d_year,ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_store_sk,ss_coupon_amt] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] BroadcastHashJoin [ss_item_sk,cs_item_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] - Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_ticket_number,ss_addr_sk,ss_item_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] + Filter [ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_store_sk,ss_sold_date_sk,ss_addr_sk,ss_cdemo_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] InputAdapter ReusedExchange [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] #3 InputAdapter @@ -202,7 +202,7 @@ WholeStageCodegen InputAdapter ReusedExchange [s_store_sk,s_store_name,s_zip] [s_store_sk,s_store_name,s_zip] #8 InputAdapter - ReusedExchange [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] #9 + ReusedExchange [c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_current_addr_sk] [c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_current_addr_sk] #9 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 InputAdapter @@ -218,9 +218,9 @@ WholeStageCodegen InputAdapter ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 InputAdapter - ReusedExchange [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] #14 + ReusedExchange [ca_address_sk,ca_city,ca_street_number,ca_zip,ca_street_name] [ca_address_sk,ca_city,ca_street_number,ca_zip,ca_street_name] #14 InputAdapter - ReusedExchange [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] #14 + ReusedExchange [ca_city,ca_street_name,ca_address_sk,ca_street_number,ca_zip] [ca_city,ca_street_name,ca_address_sk,ca_street_number,ca_zip] #14 InputAdapter ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt index b06bc7ded..90e6d0221 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt @@ -12,7 +12,7 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,i_item_ : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Filter isnotnull(revenue#3) : : +- *(3) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[sum(UnscaledValue(ss_sales_price#13))]) - : : +- Exchange hashpartitioning(ss_store_sk#7, ss_item_sk#10, 200) + : : +- Exchange hashpartitioning(ss_store_sk#7, ss_item_sk#10, 5) : : +- *(2) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[partial_sum(UnscaledValue(ss_sales_price#13))]) : : +- *(2) Project [ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight @@ -29,10 +29,10 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,i_item_ : +- *(4) FileScan parquet default.item[i_item_sk#11,i_item_desc#2,i_current_price#4,i_wholesale_cost#5,i_brand#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct +- *(12) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) - +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 200) + +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 5) +- *(11) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) +- *(11) Project [cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] +- *(11) BroadcastHashJoin [cs_ship_mode_sk#55], [sm_ship_mode_sk#43], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt index f053b48d2..d17316c39 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt @@ -1,35 +1,35 @@ -TakeOrderedAndProject [dec_net,w_city,oct_sales_per_sq_foot,apr_sales_per_sq_foot,mar_sales,jul_sales_per_sq_foot,feb_sales_per_sq_foot,apr_net,ship_carriers,feb_net,may_sales_per_sq_foot,aug_sales,jun_net,w_county,sep_sales_per_sq_foot,feb_sales,mar_sales_per_sq_foot,w_country,year,w_state,oct_net,w_warehouse_sq_ft,dec_sales,may_sales,aug_net,sep_net,mar_net,nov_net,nov_sales_per_sq_foot,oct_sales,jun_sales,w_warehouse_name,may_net,jul_sales,dec_sales_per_sq_foot,jul_net,jan_net,sep_sales,jun_sales_per_sq_foot,jan_sales,nov_sales,apr_sales,jan_sales_per_sq_foot,aug_sales_per_sq_foot] +TakeOrderedAndProject [jan_net,aug_net,jul_sales,sep_net,mar_sales,may_net,feb_net,apr_sales,apr_net,oct_net,nov_net,jun_sales,jul_sales_per_sq_foot,w_warehouse_name,w_warehouse_sq_ft,oct_sales_per_sq_foot,ship_carriers,sep_sales,may_sales_per_sq_foot,mar_net,may_sales,mar_sales_per_sq_foot,w_county,jan_sales,feb_sales,w_country,nov_sales,jun_sales_per_sq_foot,dec_net,jul_net,aug_sales,w_state,dec_sales,year,w_city,oct_sales,jun_net,sep_sales_per_sq_foot,apr_sales_per_sq_foot,feb_sales_per_sq_foot,aug_sales_per_sq_foot,nov_sales_per_sq_foot,jan_sales_per_sq_foot,dec_sales_per_sq_foot] WholeStageCodegen - HashAggregate [sum(may_net),sum,w_city,sum,sum(sep_sales),sum(dec_net),sum(jul_net),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,sum,sum(oct_sales),sum,sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(mar_sales),sum(sep_net),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(oct_net),sum,sum(apr_sales),sum(jan_net),ship_carriers,sum(nov_net),sum,sum(jun_sales),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(dec_sales),w_county,sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(may_sales),w_country,year,sum,sum,sum,sum,w_state,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,w_warehouse_sq_ft,sum,sum,sum,sum(aug_sales),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(nov_sales),sum(aug_net),sum(jul_sales),sum,sum(jan_sales),sum,sum,sum(feb_sales),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,w_warehouse_name,sum,sum,sum(jun_net),sum(mar_net),sum,sum,sum,sum,sum,sum(apr_net),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_net)] [sum(may_net),dec_net,sum,sum,sum(sep_sales),sum(dec_net),sum(jul_net),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),oct_sales_per_sq_foot,apr_sales_per_sq_foot,sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,mar_sales,sum,sum(oct_sales),sum,sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(mar_sales),sum(sep_net),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(oct_net),jul_sales_per_sq_foot,feb_sales_per_sq_foot,sum,sum(apr_sales),sum(jan_net),apr_net,feb_net,sum(nov_net),may_sales_per_sq_foot,aug_sales,sum,jun_net,sum(jun_sales),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(dec_sales),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sep_sales_per_sq_foot,feb_sales,sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(may_sales),mar_sales_per_sq_foot,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),oct_net,sum,dec_sales,sum,sum,may_sales,sum,sum(aug_sales),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(nov_sales),aug_net,sum(aug_net),sum(jul_sales),sum,sep_net,mar_net,sum(jan_sales),nov_net,nov_sales_per_sq_foot,sum,oct_sales,sum,sum(feb_sales),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,jun_sales,sum,may_net,jul_sales,sum,dec_sales_per_sq_foot,jul_net,jan_net,sum,sum(jun_net),sum(mar_net),sum,sum,sep_sales,sum,jun_sales_per_sq_foot,jan_sales,nov_sales,apr_sales,sum,sum,jan_sales_per_sq_foot,aug_sales_per_sq_foot,sum(apr_net),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_net)] + HashAggregate [sum,sum,sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(jan_net),sum,sum,sum(oct_net),sum,sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),w_warehouse_name,w_warehouse_sq_ft,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),ship_carriers,sum(jul_sales),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,sum(sep_sales),sum(sep_net),sum,sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),w_county,sum(nov_net),sum,sum,sum,sum(mar_sales),sum,sum,w_country,sum,sum(nov_sales),sum(may_net),sum(jun_sales),sum(aug_sales),sum(jul_net),sum(oct_sales),sum,sum(dec_sales),w_state,sum(aug_net),sum,sum(jun_net),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(mar_net),sum,year,w_city,sum,sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_sales),sum,sum(jan_sales),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_net),sum,sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(dec_net),sum,sum(apr_net),sum,sum(apr_sales),sum(may_sales)] [jan_net,sum,sum,aug_net,jul_sales,sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sep_net,sum(jan_net),sum,mar_sales,sum,sum(oct_net),may_net,feb_net,sum,apr_sales,sum,apr_net,sum,sum,oct_net,nov_net,jun_sales,sum,sum,sum,sum,jul_sales_per_sq_foot,sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),oct_sales_per_sq_foot,sum(jul_sales),sep_sales,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,may_sales_per_sq_foot,mar_net,sum,sum(sep_sales),sum(sep_net),may_sales,sum,sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),mar_sales_per_sq_foot,sum(nov_net),sum,sum,sum,sum(mar_sales),jan_sales,sum,sum,feb_sales,nov_sales,sum,jun_sales_per_sq_foot,sum(nov_sales),sum(may_net),sum(jun_sales),sum(aug_sales),sum(jul_net),sum(oct_sales),sum,dec_net,jul_net,sum(dec_sales),aug_sales,sum(aug_net),dec_sales,sum,sum(jun_net),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(mar_net),sum,sum,oct_sales,jun_net,sep_sales_per_sq_foot,sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),apr_sales_per_sq_foot,sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_sales),sum,sum(jan_sales),feb_sales_per_sq_foot,sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_net),sum,sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,aug_sales_per_sq_foot,nov_sales_per_sq_foot,sum(dec_net),sum,sum(apr_net),sum,jan_sales_per_sq_foot,sum(apr_sales),dec_sales_per_sq_foot,sum(may_sales)] InputAdapter - Exchange [w_city,ship_carriers,w_county,w_country,year,w_state,w_warehouse_sq_ft,w_warehouse_name] #1 + Exchange [w_warehouse_name,w_warehouse_sq_ft,ship_carriers,w_county,w_country,w_state,year,w_city] #1 WholeStageCodegen - HashAggregate [sum,sum,w_city,sum,may_sales,apr_net,sum,sum,sum,jul_net,sum,sum,jan_net,sum,sum,sep_sales,sum,sum,sum,sum,jan_sales,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,aug_net,dec_sales,jun_sales,sum,sum,sum,feb_net,sum,sep_net,ship_carriers,sum,mar_sales,sum,sum,sum,dec_net,w_county,sum,nov_sales,jul_sales,may_net,w_country,sum,oct_sales,sum,year,sum,sum,sum,sum,sum,w_state,sum,sum,mar_net,sum,jun_net,w_warehouse_sq_ft,sum,sum,sum,aug_sales,sum,sum,sum,sum,sum,feb_sales,sum,oct_net,sum,apr_sales,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,nov_net] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + HashAggregate [sum,sum,apr_net,sum,sum,sum,dec_sales,sum,sum,sep_net,sum,sum,sum,sum,jan_sales,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,jul_sales,w_warehouse_name,sum,w_warehouse_sq_ft,sum,sum,sum,sum,sum,sum,ship_carriers,sum,sum,sum,sum,sum,feb_sales,w_county,sum,sum,sum,sum,sum,sum,nov_net,nov_sales,sum,may_sales,sum,w_country,sum,sum,sum,sep_sales,sum,mar_net,jun_sales,may_net,sum,aug_sales,sum,sum,jun_net,sum,jul_net,sum,w_state,mar_sales,jan_net,sum,apr_sales,sum,feb_net,sum,year,w_city,sum,sum,sum,sum,dec_net,sum,sum,sum,oct_net,sum,sum,sum,sum,aug_net,sum,sum,sum,sum,sum,oct_sales] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] InputAdapter Union WholeStageCodegen - HashAggregate [sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,d_year,sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_warehouse_sq_ft,w_county,sum,sum,sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_warehouse_name,sum,sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_city,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_state,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_country] [sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,may_sales,oct_sales,nov_net,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),apr_net,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,ship_carriers,sum,sum,sum,dec_sales,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,apr_sales,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,nov_sales,sum,sum,jul_net,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jan_sales,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jun_net,sep_net,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,feb_net,aug_sales,jul_sales,sum,sum,sum,sum,year,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sep_sales,jan_net,sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,may_net,mar_net,sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),aug_net,mar_sales,oct_net,feb_sales,jun_sales,dec_net] + HashAggregate [sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,w_state,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_warehouse_name,sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum,sum,w_city,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,d_year,sum,sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,w_warehouse_sq_ft,sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_country,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_county,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] [sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,aug_sales,sum,oct_sales,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,nov_sales,may_sales,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),oct_net,sep_net,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),aug_net,sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum,jan_sales,sum,mar_sales,year,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),feb_sales,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jul_sales,sum,sum,nov_net,sep_sales,jul_net,may_net,apr_sales,jun_net,dec_net,jan_net,sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,mar_net,feb_net,sum,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),ship_carriers,apr_net,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jun_sales,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),dec_sales,sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] InputAdapter - Exchange [d_year,w_warehouse_sq_ft,w_county,w_warehouse_name,w_city,w_state,w_country] #2 + Exchange [w_state,w_warehouse_name,w_city,d_year,w_warehouse_sq_ft,w_country,w_county] #2 WholeStageCodegen - HashAggregate [sum,sum,sum,sum,d_year,sum,sum,sum,sum,d_moy,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_sq_ft,w_county,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name,sum,sum,sum,sum,ws_ext_sales_price,sum,w_city,sum,ws_net_paid,sum,sum,sum,w_state,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,ws_quantity,sum,w_country,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_moy,ws_net_paid,d_year,ws_ext_sales_price,ws_quantity,w_county,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] + HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,d_moy,sum,sum,w_state,sum,w_warehouse_name,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,sum,sum,d_year,sum,sum,sum,sum,w_warehouse_sq_ft,ws_ext_sales_price,ws_quantity,sum,ws_net_paid,sum,sum,sum,sum,w_country,w_county,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ws_net_paid,d_year,w_city,w_warehouse_name,w_county,w_state,w_warehouse_sq_ft,ws_quantity,ws_ext_sales_price,d_moy,w_country] BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] - Project [d_moy,ws_net_paid,d_year,ws_ext_sales_price,ws_quantity,w_county,w_country,ws_ship_mode_sk,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] + Project [ws_net_paid,d_year,w_city,ws_ship_mode_sk,w_warehouse_name,w_county,w_state,w_warehouse_sq_ft,ws_quantity,ws_ext_sales_price,d_moy,w_country] BroadcastHashJoin [ws_sold_time_sk,t_time_sk] - Project [d_moy,ws_net_paid,d_year,ws_ext_sales_price,ws_quantity,w_county,w_country,ws_ship_mode_sk,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city,ws_sold_time_sk] + Project [ws_net_paid,d_year,w_city,ws_ship_mode_sk,w_warehouse_name,w_county,ws_sold_time_sk,w_state,w_warehouse_sq_ft,ws_quantity,ws_ext_sales_price,d_moy,w_country] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_net_paid,ws_ext_sales_price,ws_quantity,w_county,w_country,ws_ship_mode_sk,w_state,ws_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,ws_sold_time_sk] + Project [ws_net_paid,w_city,ws_ship_mode_sk,w_warehouse_name,w_county,ws_sold_time_sk,w_state,w_warehouse_sq_ft,ws_quantity,ws_ext_sales_price,w_country,ws_sold_date_sk] BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] - Project [ws_net_paid,ws_ext_sales_price,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_sold_time_sk] + Project [ws_net_paid,ws_ship_mode_sk,ws_sold_time_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] Filter [ws_warehouse_sk,ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk] - Scan parquet default.web_sales [ws_net_paid,ws_ext_sales_price,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_sold_time_sk] [ws_net_paid,ws_ext_sales_price,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_sold_time_sk] + Scan parquet default.web_sales [ws_net_paid,ws_ship_mode_sk,ws_sold_time_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] [ws_net_paid,ws_ship_mode_sk,ws_sold_time_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] + Project [w_city,w_warehouse_name,w_county,w_state,w_warehouse_sk,w_warehouse_sq_ft,w_country] Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] + Scan parquet default.warehouse [w_city,w_warehouse_name,w_county,w_state,w_warehouse_sk,w_warehouse_sq_ft,w_country] [w_city,w_warehouse_name,w_county,w_state,w_warehouse_sk,w_warehouse_sq_ft,w_country] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -49,24 +49,24 @@ TakeOrderedAndProject [dec_net,w_city,oct_sales_per_sq_foot,apr_sales_per_sq_foo Filter [sm_carrier,sm_ship_mode_sk] Scan parquet default.ship_mode [sm_ship_mode_sk,sm_carrier] [sm_ship_mode_sk,sm_carrier] WholeStageCodegen - HashAggregate [sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),d_year,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,w_warehouse_sq_ft,sum,w_county,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_warehouse_name,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_city,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_state,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_country] [sum,jan_sales,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),oct_net,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),feb_sales,sum,sum,sum,feb_net,sum,sum,sep_sales,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,apr_net,sum,ship_carriers,sum,nov_net,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),may_net,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),dec_sales,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),nov_sales,sum,sum,jun_net,oct_sales,sep_net,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jul_sales,aug_sales,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jul_net,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),dec_net,aug_net,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jan_net,mar_net,mar_sales,jun_sales,sum,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),may_sales,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,year,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),apr_sales] + HashAggregate [sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_state,w_warehouse_name,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_city,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),d_year,sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_warehouse_sq_ft,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_country,sum,w_county,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] [apr_net,ship_carriers,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum,aug_sales,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sep_net,sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),may_net,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,aug_net,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),feb_net,sum,mar_sales,apr_sales,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),nov_sales,oct_net,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jan_sales,nov_net,year,sum,dec_net,feb_sales,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,jun_sales,sum,sep_sales,jul_net,may_sales,sum,jan_net,sum,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,jun_net,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jul_sales,dec_sales,sum,oct_sales,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),mar_net,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] InputAdapter - Exchange [d_year,w_warehouse_sq_ft,w_county,w_warehouse_name,w_city,w_state,w_country] #7 + Exchange [w_state,w_warehouse_name,w_city,d_year,w_warehouse_sq_ft,w_country,w_county] #7 WholeStageCodegen - HashAggregate [sum,sum,sum,d_year,sum,sum,sum,sum,sum,sum,sum,sum,d_moy,sum,sum,sum,sum,w_warehouse_sq_ft,sum,sum,w_county,sum,sum,sum,sum,w_warehouse_name,sum,sum,sum,sum,sum,sum,w_city,sum,sum,sum,sum,w_state,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,cs_sales_price,cs_quantity,sum,sum,cs_net_paid_inc_tax,sum,sum,sum,sum,w_country,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [cs_quantity,d_moy,d_year,w_county,w_country,cs_sales_price,w_state,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city] + HashAggregate [sum,sum,cs_quantity,sum,sum,sum,sum,sum,sum,sum,sum,sum,d_moy,w_state,w_warehouse_name,sum,sum,sum,sum,sum,sum,cs_net_paid_inc_tax,sum,sum,sum,cs_sales_price,sum,sum,sum,sum,w_city,sum,sum,d_year,sum,sum,w_warehouse_sq_ft,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_country,sum,sum,w_county,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_year,cs_net_paid_inc_tax,w_city,cs_quantity,w_warehouse_name,w_county,w_state,w_warehouse_sq_ft,d_moy,cs_sales_price,w_country] BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] - Project [cs_quantity,d_moy,d_year,w_county,w_country,cs_sales_price,w_state,cs_net_paid_inc_tax,cs_ship_mode_sk,w_warehouse_name,w_warehouse_sq_ft,w_city] + Project [d_year,cs_ship_mode_sk,cs_net_paid_inc_tax,w_city,cs_quantity,w_warehouse_name,w_county,w_state,w_warehouse_sq_ft,d_moy,cs_sales_price,w_country] BroadcastHashJoin [cs_sold_time_sk,t_time_sk] - Project [cs_sold_time_sk,cs_quantity,d_moy,d_year,w_county,w_country,cs_sales_price,w_state,cs_net_paid_inc_tax,cs_ship_mode_sk,w_warehouse_name,w_warehouse_sq_ft,w_city] + Project [d_year,cs_ship_mode_sk,cs_net_paid_inc_tax,w_city,cs_quantity,w_warehouse_name,w_county,cs_sold_time_sk,w_state,w_warehouse_sq_ft,d_moy,cs_sales_price,w_country] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_time_sk,cs_quantity,w_county,w_country,cs_sales_price,w_state,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk,w_warehouse_name,w_warehouse_sq_ft,w_city] + Project [cs_ship_mode_sk,cs_net_paid_inc_tax,w_city,cs_quantity,w_warehouse_name,w_county,cs_sold_date_sk,cs_sold_time_sk,w_state,w_warehouse_sq_ft,cs_sales_price,w_country] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_sold_time_sk,cs_quantity,cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk] + Project [cs_ship_mode_sk,cs_net_paid_inc_tax,cs_warehouse_sk,cs_quantity,cs_sold_date_sk,cs_sold_time_sk,cs_sales_price] Filter [cs_warehouse_sk,cs_sold_date_sk,cs_sold_time_sk,cs_ship_mode_sk] - Scan parquet default.catalog_sales [cs_sold_time_sk,cs_quantity,cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk] [cs_sold_time_sk,cs_quantity,cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk] + Scan parquet default.catalog_sales [cs_ship_mode_sk,cs_net_paid_inc_tax,cs_warehouse_sk,cs_quantity,cs_sold_date_sk,cs_sold_time_sk,cs_sales_price] [cs_ship_mode_sk,cs_net_paid_inc_tax,cs_warehouse_sk,cs_quantity,cs_sold_date_sk,cs_sold_time_sk,cs_sales_price] InputAdapter - ReusedExchange [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] #3 + ReusedExchange [w_city,w_warehouse_name,w_county,w_state,w_warehouse_sk,w_warehouse_sq_ft,w_country] [w_city,w_warehouse_name,w_county,w_state,w_warehouse_sk,w_warehouse_sq_ft,w_country] #3 InputAdapter ReusedExchange [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] #4 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt index 7893d6e91..a3ea2a0f2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt @@ -3,9 +3,9 @@ TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 +- *(7) Filter (isnotnull(rk#10) && (rk#10 <= 100)) +- Window [rank(sumsales#9) windowspecdefinition(i_category#1, sumsales#9 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#10], [i_category#1], [sumsales#9 DESC NULLS LAST] +- *(6) Sort [i_category#1 ASC NULLS FIRST, sumsales#9 DESC NULLS LAST], false, 0 - +- Exchange hashpartitioning(i_category#1, 200) + +- Exchange hashpartitioning(i_category#1, 5) +- *(5) HashAggregate(keys=[i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11], functions=[sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#12 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00))]) - +- Exchange hashpartitioning(i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11, 200) + +- Exchange hashpartitioning(i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11, 5) +- *(4) HashAggregate(keys=[i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11], functions=[partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#12 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00))]) +- *(4) Expand [List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, d_qoy#19, d_moy#20, s_store_id#21, 0), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, d_qoy#19, d_moy#20, null, 1), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, d_qoy#19, null, null, 3), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, null, null, null, 7), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, null, null, null, null, 15), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, null, null, null, null, null, 31), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, null, null, null, null, null, null, 63), List(ss_quantity#13, ss_sales_price#12, i_category#14, null, null, null, null, null, null, null, 127), List(ss_quantity#13, ss_sales_price#12, null, null, null, null, null, null, null, null, 255)], [ss_quantity#13, ss_sales_price#12, i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11] +- *(4) Project [ss_quantity#13, ss_sales_price#12, d_year#18, d_moy#20, d_qoy#19, i_category#22 AS i_category#14, i_class#23 AS i_class#15, i_brand#24 AS i_brand#16, i_product_name#25 AS i_product_name#17, d_year#18, d_qoy#19, d_moy#20, s_store_id#26 AS s_store_id#21] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt index 9381a8892..7858fb88a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt @@ -1,4 +1,4 @@ -TakeOrderedAndProject [d_qoy,i_category,i_brand,i_class,d_year,d_moy,rk,i_product_name,s_store_id,sumsales] +TakeOrderedAndProject [i_category,d_moy,i_brand,d_qoy,d_year,s_store_id,sumsales,i_product_name,i_class,rk] WholeStageCodegen Filter [rk] InputAdapter @@ -8,27 +8,27 @@ TakeOrderedAndProject [d_qoy,i_category,i_brand,i_class,d_year,d_moy,rk,i_produc InputAdapter Exchange [i_category] #1 WholeStageCodegen - HashAggregate [s_store_id,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),i_category,sum,i_brand,i_product_name,spark_grouping_id,d_moy,i_class,d_year,d_qoy] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum] + HashAggregate [sum,spark_grouping_id,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),i_class,s_store_id,i_category,i_brand,d_qoy,i_product_name,d_year,d_moy] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum] InputAdapter - Exchange [s_store_id,i_category,i_brand,i_product_name,spark_grouping_id,d_moy,i_class,d_year,d_qoy] #2 + Exchange [spark_grouping_id,i_class,s_store_id,i_category,i_brand,d_qoy,i_product_name,d_year,d_moy] #2 WholeStageCodegen - HashAggregate [s_store_id,i_category,sum,i_brand,ss_sales_price,i_product_name,spark_grouping_id,d_moy,i_class,ss_quantity,d_year,d_qoy,sum] [sum,sum] - Expand [d_year,i_class,d_qoy,d_moy,i_brand,i_category,s_store_id,i_product_name,ss_sales_price,ss_quantity] - Project [d_year,d_qoy,d_moy,i_brand,s_store_id,ss_sales_price,i_category,ss_quantity,i_class,i_product_name] + HashAggregate [sum,spark_grouping_id,sum,ss_quantity,i_class,s_store_id,i_category,i_brand,d_qoy,ss_sales_price,i_product_name,d_year,d_moy] [sum,sum] + Expand [i_class,i_product_name,d_moy,ss_quantity,s_store_id,d_year,i_category,i_brand,d_qoy,ss_sales_price] + Project [s_store_id,d_moy,ss_quantity,d_year,d_qoy,ss_sales_price,i_category,i_brand,i_product_name,i_class] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_id,d_qoy,ss_quantity,ss_item_sk,d_moy,d_year,ss_sales_price] + Project [d_year,ss_sales_price,ss_item_sk,ss_quantity,d_qoy,s_store_id,d_moy] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [d_qoy,ss_quantity,ss_item_sk,d_moy,d_year,ss_store_sk,ss_sales_price] + Project [d_year,ss_sales_price,ss_item_sk,ss_quantity,d_qoy,ss_store_sk,d_moy] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + Project [ss_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + Scan parquet default.store_sales [ss_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] [ss_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk,d_year,d_moy,d_qoy] Filter [d_month_seq,d_date_sk] - Scan parquet default.date_dim [d_qoy,d_moy,d_year,d_month_seq,d_date_sk] [d_qoy,d_moy,d_year,d_month_seq,d_date_sk] + Scan parquet default.date_dim [d_year,d_date_sk,d_month_seq,d_qoy,d_moy] [d_year,d_date_sk,d_month_seq,d_qoy,d_moy] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -38,6 +38,6 @@ TakeOrderedAndProject [d_qoy,i_category,i_brand,i_class,d_year,d_moy,rk,i_produc InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_class,i_item_sk,i_product_name,i_category,i_brand] + Project [i_product_name,i_brand,i_category,i_item_sk,i_class] Filter [i_item_sk] - Scan parquet default.item [i_class,i_item_sk,i_product_name,i_category,i_brand] [i_class,i_item_sk,i_product_name,i_category,i_brand] + Scan parquet default.item [i_product_name,i_brand,i_category,i_item_sk,i_class] [i_product_name,i_brand,i_category,i_item_sk,i_class] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt index c0d696ed3..3d2fcd498 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt @@ -5,7 +5,7 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,ss_ticke :- *(8) Project [ss_ticket_number#2, bought_city#5, extended_price#6, list_price#8, extended_tax#7, c_current_addr_sk#9, c_first_name#3, c_last_name#1] : +- *(8) BroadcastHashJoin [ss_customer_sk#11], [c_customer_sk#12], Inner, BuildRight : :- *(8) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[sum(UnscaledValue(ss_ext_sales_price#14)), sum(UnscaledValue(ss_ext_list_price#15)), sum(UnscaledValue(ss_ext_tax#16))]) - : : +- Exchange hashpartitioning(ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4, 200) + : : +- Exchange hashpartitioning(ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4, 5) : : +- *(5) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#14)), partial_sum(UnscaledValue(ss_ext_list_price#15)), partial_sum(UnscaledValue(ss_ext_tax#16))]) : : +- *(5) Project [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16, ca_city#4] : : +- *(5) BroadcastHashJoin [ss_addr_sk#13], [ca_address_sk#10], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt index 1e9ce42b5..f9955f4e6 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt @@ -1,25 +1,25 @@ -TakeOrderedAndProject [bought_city,ss_ticket_number,extended_price,list_price,c_last_name,c_first_name,extended_tax,ca_city] +TakeOrderedAndProject [list_price,ss_ticket_number,extended_price,c_last_name,bought_city,extended_tax,c_first_name,ca_city] WholeStageCodegen - Project [bought_city,ss_ticket_number,extended_price,list_price,c_last_name,c_first_name,extended_tax,ca_city] + Project [list_price,ss_ticket_number,extended_price,c_last_name,bought_city,extended_tax,c_first_name,ca_city] BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] - Project [c_current_addr_sk,extended_price,c_last_name,list_price,c_first_name,ss_ticket_number,extended_tax,bought_city] + Project [list_price,extended_tax,c_last_name,c_first_name,bought_city,extended_price,c_current_addr_sk,ss_ticket_number] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - HashAggregate [sum,sum,ca_city,ss_customer_sk,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_sales_price)),ss_ticket_number,ss_addr_sk,sum(UnscaledValue(ss_ext_list_price)),sum] [sum,extended_price,sum,list_price,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_sales_price)),extended_tax,sum(UnscaledValue(ss_ext_list_price)),sum,bought_city] + HashAggregate [ss_customer_sk,sum(UnscaledValue(ss_ext_tax)),ca_city,sum(UnscaledValue(ss_ext_list_price)),sum,sum,sum(UnscaledValue(ss_ext_sales_price)),ss_addr_sk,sum,ss_ticket_number] [list_price,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_list_price)),extended_tax,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),bought_city,extended_price,sum] InputAdapter Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 WholeStageCodegen - HashAggregate [sum,ss_ext_tax,sum,ca_city,ss_customer_sk,sum,sum,sum,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number,ss_addr_sk,sum] [sum,sum,sum,sum,sum,sum] - Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ca_city,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number] + HashAggregate [ss_customer_sk,ca_city,ss_ext_list_price,sum,ss_ext_sales_price,sum,sum,sum,sum,ss_ext_tax,ss_addr_sk,sum,ss_ticket_number] [sum,sum,sum,sum,sum,sum] + Project [ca_city,ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_ext_sales_price,ss_ext_list_price] BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_ext_sales_price,ss_ext_list_price] BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ss_ext_sales_price,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_ext_sales_price,ss_ext_list_price] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_store_sk,ss_ext_sales_price,ss_ext_list_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] - Filter [ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_ext_list_price] + Filter [ss_hdemo_sk,ss_customer_sk,ss_store_sk,ss_sold_date_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_ext_list_price] [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_ext_list_price] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt index 76eb92a0d..ad4a155f0 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#6,cd_purchase_estimate#4,cnt2#7,cd_credit_rating#5,cnt3#8]) +- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[count(1)]) - +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, 200) + +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, 5) +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[partial_count(1)]) +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] +- *(9) BroadcastHashJoin [c_current_cdemo_sk#9], [cd_demo_sk#10], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt index d6e6152f7..260f8bf20 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt @@ -1,11 +1,11 @@ -TakeOrderedAndProject [cnt3,cnt1,cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender,cnt2] +TakeOrderedAndProject [cnt2,cd_gender,cd_education_status,cd_credit_rating,cd_purchase_estimate,cd_marital_status,cnt3,cnt1] WholeStageCodegen - HashAggregate [count(1),cd_purchase_estimate,count,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender] [cnt3,count(1),cnt1,count,cnt2] + HashAggregate [count,cd_gender,cd_education_status,cd_credit_rating,cd_purchase_estimate,cd_marital_status,count(1)] [cnt2,count,cnt3,cnt1,count(1)] InputAdapter - Exchange [cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender] #1 + Exchange [cd_gender,cd_education_status,cd_credit_rating,cd_purchase_estimate,cd_marital_status] #1 WholeStageCodegen - HashAggregate [count,cd_purchase_estimate,count,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender] [count,count] - Project [cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + HashAggregate [count,cd_gender,cd_education_status,cd_credit_rating,cd_purchase_estimate,cd_marital_status,count] [count,count] + Project [cd_gender,cd_education_status,cd_marital_status,cd_credit_rating,cd_purchase_estimate] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] @@ -59,6 +59,6 @@ TakeOrderedAndProject [cnt3,cnt1,cd_purchase_estimate,cd_education_status,cd_cre InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [cd_demo_sk,cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + Project [cd_gender,cd_education_status,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate] Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] [cd_demo_sk,cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + Scan parquet default.customer_demographics [cd_gender,cd_education_status,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate] [cd_gender,cd_education_status,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt index fd314ae14..1a0f07f81 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,agg1#2,agg2#3,agg3#4,agg4#5]) +- *(6) HashAggregate(keys=[i_item_id#1], functions=[avg(cast(ss_quantity#6 as bigint)), avg(UnscaledValue(ss_list_price#7)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#9))]) - +- Exchange hashpartitioning(i_item_id#1, 200) + +- Exchange hashpartitioning(i_item_id#1, 5) +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_avg(cast(ss_quantity#6 as bigint)), partial_avg(UnscaledValue(ss_list_price#7)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#9))]) +- *(5) Project [ss_quantity#6, ss_list_price#7, ss_sales_price#9, ss_coupon_amt#8, i_item_id#1] +- *(5) BroadcastHashJoin [ss_promo_sk#10], [p_promo_sk#11], Inner, BuildRight @@ -16,8 +16,8 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[ : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#14,ss_item_sk#12,ss_cdemo_sk#16,ss_promo_sk#10,ss_quantity#6,ss_list_price#7,ss_sales_price#9,ss_coupon_amt#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)], ReadSchema: struct + : : : +- *(1) Filter ((((((isnotnull(cd_education_status#18) && isnotnull(cd_gender#19)) && isnotnull(cd_marital_status#20)) && (cd_gender#19 = M)) && (cd_marital_status#20 = S)) && (cd_education_status#18 = College)) && isnotnull(cd_demo_sk#17)) + : : : +- *(1) FileScan parquet default.customer_demographics[cd_demo_sk#17,cd_gender#19,cd_marital_status#20,cd_education_status#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_gender), IsNotNull(cd_marital_status), EqualTo(cd_g..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_date_sk#15] : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt index b72978a57..81ffc9845 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt @@ -1,26 +1,26 @@ -TakeOrderedAndProject [agg2,i_item_id,agg1,agg4,agg3] +TakeOrderedAndProject [agg3,agg1,agg4,agg2,i_item_id] WholeStageCodegen - HashAggregate [i_item_id,count,sum,count,avg(UnscaledValue(ss_sales_price)),sum,avg(cast(ss_quantity as bigint)),sum,sum,avg(UnscaledValue(ss_coupon_amt)),count,count,avg(UnscaledValue(ss_list_price))] [agg2,agg1,count,sum,count,avg(UnscaledValue(ss_sales_price)),sum,agg4,avg(cast(ss_quantity as bigint)),sum,sum,avg(UnscaledValue(ss_coupon_amt)),count,agg3,count,avg(UnscaledValue(ss_list_price))] + HashAggregate [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),count,sum,count,sum,avg(cast(ss_quantity as bigint)),count,i_item_id,sum,count,avg(UnscaledValue(ss_list_price)),sum] [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),agg3,agg1,count,sum,count,sum,agg4,agg2,avg(cast(ss_quantity as bigint)),count,sum,count,avg(UnscaledValue(ss_list_price)),sum] InputAdapter Exchange [i_item_id] #1 WholeStageCodegen - HashAggregate [count,i_item_id,count,count,sum,count,sum,count,sum,sum,sum,ss_coupon_amt,count,ss_sales_price,sum,sum,ss_list_price,sum,count,ss_quantity,count] [count,count,count,sum,count,sum,count,sum,sum,sum,count,sum,sum,sum,count,count] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_sales_price,i_item_id] + HashAggregate [count,count,count,sum,count,ss_quantity,ss_coupon_amt,sum,sum,sum,count,count,sum,count,ss_sales_price,i_item_id,sum,count,ss_list_price,sum,sum] [count,count,count,sum,count,sum,sum,sum,count,count,sum,count,sum,count,sum,sum] + Project [ss_list_price,ss_sales_price,ss_quantity,i_item_id,ss_coupon_amt] BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_sales_price,ss_promo_sk,i_item_id] + Project [ss_list_price,ss_sales_price,ss_quantity,ss_promo_sk,i_item_id,ss_coupon_amt] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_promo_sk] + Project [ss_list_price,ss_sales_price,ss_item_sk,ss_quantity,ss_promo_sk,ss_coupon_amt] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_promo_sk] + Project [ss_list_price,ss_sales_price,ss_item_sk,ss_quantity,ss_promo_sk,ss_sold_date_sk,ss_coupon_amt] BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] + Project [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_promo_sk,ss_sold_date_sk,ss_coupon_amt] Filter [ss_cdemo_sk,ss_sold_date_sk,ss_item_sk,ss_promo_sk] - Scan parquet default.store_sales [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] + Scan parquet default.store_sales [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_promo_sk,ss_sold_date_sk,ss_coupon_amt] [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_promo_sk,ss_sold_date_sk,ss_coupon_amt] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [cd_demo_sk] - Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + Filter [cd_education_status,cd_gender,cd_marital_status,cd_demo_sk] Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt index 030bc811b..e0113e54d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt @@ -3,9 +3,9 @@ TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WH +- *(11) Project [total_sum#4, s_state#2, s_county#5, lochierarchy#1, rank_within_parent#3] +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] +- *(10) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 - +- Exchange hashpartitioning(_w1#7, _w2#8, 200) + +- Exchange hashpartitioning(_w1#7, _w2#8, 5) +- *(9) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ss_net_profit#10))]) - +- Exchange hashpartitioning(s_state#2, s_county#5, spark_grouping_id#9, 200) + +- Exchange hashpartitioning(s_state#2, s_county#5, spark_grouping_id#9, 5) +- *(8) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) +- *(8) Expand [List(ss_net_profit#10, s_state#11, s_county#12, 0), List(ss_net_profit#10, s_state#11, null, 1), List(ss_net_profit#10, null, null, 3)], [ss_net_profit#10, s_state#2, s_county#5, spark_grouping_id#9] +- *(8) Project [ss_net_profit#10, s_state#13 AS s_state#11, s_county#14 AS s_county#12] @@ -30,7 +30,7 @@ TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WH +- Window [rank(_w2#22) windowspecdefinition(s_state#13, _w2#22 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#21], [s_state#13], [_w2#22 DESC NULLS LAST] +- *(5) Sort [s_state#13 ASC NULLS FIRST, _w2#22 DESC NULLS LAST], false, 0 +- *(5) HashAggregate(keys=[s_state#13], functions=[sum(UnscaledValue(ss_net_profit#10))]) - +- Exchange hashpartitioning(s_state#13, 200) + +- Exchange hashpartitioning(s_state#13, 5) +- *(4) HashAggregate(keys=[s_state#13], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) +- *(4) Project [ss_net_profit#10, s_state#13] +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt index 2ef724de4..7808992d6 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [total_sum,lochierarchy,s_state,s_county,rank_within_parent] +TakeOrderedAndProject [total_sum,lochierarchy,s_state,rank_within_parent,s_county] WholeStageCodegen - Project [total_sum,lochierarchy,s_state,s_county,rank_within_parent] + Project [total_sum,lochierarchy,s_state,rank_within_parent,s_county] InputAdapter Window [_w3,_w1,_w2] WholeStageCodegen @@ -8,11 +8,11 @@ TakeOrderedAndProject [total_sum,lochierarchy,s_state,s_county,rank_within_paren InputAdapter Exchange [_w1,_w2] #1 WholeStageCodegen - HashAggregate [sum,s_state,s_county,spark_grouping_id,sum(UnscaledValue(ss_net_profit))] [total_sum,sum,lochierarchy,_w3,sum(UnscaledValue(ss_net_profit)),_w2,_w1] + HashAggregate [sum(UnscaledValue(ss_net_profit)),s_state,spark_grouping_id,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,_w1,lochierarchy,_w3,_w2,sum] InputAdapter Exchange [s_state,s_county,spark_grouping_id] #2 WholeStageCodegen - HashAggregate [sum,s_state,s_county,spark_grouping_id,sum,ss_net_profit] [sum,sum] + HashAggregate [s_state,ss_net_profit,sum,spark_grouping_id,s_county,sum] [sum,sum] Expand [ss_net_profit,s_state,s_county] Project [ss_net_profit,s_state,s_county] BroadcastHashJoin [ss_store_sk,s_store_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt index 32f2c64c4..f11328994 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt @@ -1,8 +1,8 @@ == Physical Plan == *(11) Sort [ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST, 200) ++- Exchange rangepartitioning(ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST, 5) +- *(10) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[sum(UnscaledValue(ext_price#7))]) - +- Exchange hashpartitioning(i_brand#3, i_brand_id#4, t_hour#5, t_minute#6, 200) + +- Exchange hashpartitioning(i_brand#3, i_brand_id#4, t_hour#5, t_minute#6, 5) +- *(9) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[partial_sum(UnscaledValue(ext_price#7))]) +- *(9) Project [i_brand_id#4, i_brand#3, ext_price#7, t_hour#5, t_minute#6] +- *(9) BroadcastHashJoin [time_sk#8], [t_time_sk#9], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt index 0153205a5..5877ab3c2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt @@ -3,12 +3,12 @@ WholeStageCodegen InputAdapter Exchange [ext_price,brand_id] #1 WholeStageCodegen - HashAggregate [sum(UnscaledValue(ext_price)),sum,t_minute,t_hour,i_brand,i_brand_id] [sum(UnscaledValue(ext_price)),sum,brand,ext_price,brand_id] + HashAggregate [i_brand_id,sum,t_minute,t_hour,sum(UnscaledValue(ext_price)),i_brand] [ext_price,sum,brand,sum(UnscaledValue(ext_price)),brand_id] InputAdapter Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 WholeStageCodegen - HashAggregate [ext_price,sum,t_minute,t_hour,i_brand,i_brand_id,sum] [sum,sum] - Project [ext_price,t_minute,i_brand_id,i_brand,t_hour] + HashAggregate [i_brand_id,sum,t_minute,t_hour,sum,ext_price,i_brand] [sum,sum] + Project [ext_price,t_minute,t_hour,i_brand_id,i_brand] BroadcastHashJoin [time_sk,t_time_sk] Project [i_brand_id,i_brand,ext_price,time_sk] BroadcastHashJoin [i_item_sk,sold_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt index 5ad7c14ac..40feaedbd 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[total_cnt#1 DESC NULLS LAST,i_item_desc#2 ASC NULLS FIRST,w_warehouse_name#3 ASC NULLS FIRST,d_week_seq#4 ASC NULLS FIRST], output=[i_item_desc#2,w_warehouse_name#3,d_week_seq#4,no_promo#5,promo#6,total_cnt#1]) +- *(12) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[count(1)]) - +- Exchange hashpartitioning(i_item_desc#2, w_warehouse_name#3, d_week_seq#4, 200) + +- Exchange hashpartitioning(i_item_desc#2, w_warehouse_name#3, d_week_seq#4, 5) +- *(11) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[partial_count(1)]) +- *(11) Project [w_warehouse_name#3, i_item_desc#2, d_week_seq#4] +- *(11) BroadcastHashJoin [cs_item_sk#7, cs_order_number#8], [cr_item_sk#9, cr_order_number#10], LeftOuter, BuildRight @@ -56,8 +56,8 @@ TakeOrderedAndProject(limit=100, orderBy=[total_cnt#1 DESC NULLS LAST,i_item_des : : : +- *(7) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(8) Project [d_date_sk#14, d_date#15] - : : +- *(8) Filter (isnotnull(d_date#15) && isnotnull(d_date_sk#14)) - : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(8) Filter (isnotnull(d_date_sk#14) && isnotnull(d_date#15)) + : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_date)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(9) Project [p_promo_sk#12] : +- *(9) Filter isnotnull(p_promo_sk#12) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt index ee1925612..f62c47de4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt @@ -1,33 +1,33 @@ -TakeOrderedAndProject [i_item_desc,w_warehouse_name,promo,no_promo,d_week_seq,total_cnt] +TakeOrderedAndProject [no_promo,w_warehouse_name,promo,i_item_desc,total_cnt,d_week_seq] WholeStageCodegen - HashAggregate [i_item_desc,count(1),w_warehouse_name,d_week_seq,count] [count(1),promo,no_promo,count,total_cnt] + HashAggregate [w_warehouse_name,i_item_desc,count,d_week_seq,count(1)] [no_promo,promo,total_cnt,count,count(1)] InputAdapter Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1 WholeStageCodegen - HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count,count] [count,count] + HashAggregate [w_warehouse_name,i_item_desc,count,count,d_week_seq] [count,count] Project [w_warehouse_name,i_item_desc,d_week_seq] BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] - Project [d_week_seq,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] + Project [cs_item_sk,w_warehouse_name,d_week_seq,i_item_desc,cs_order_number] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [d_week_seq,cs_promo_sk,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] + Project [cs_item_sk,w_warehouse_name,d_week_seq,cs_promo_sk,i_item_desc,cs_order_number] BroadcastHashJoin [cs_ship_date_sk,d_date_sk,d_date,d_date] - Project [d_date,d_week_seq,cs_promo_sk,cs_ship_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] + Project [cs_item_sk,w_warehouse_name,d_week_seq,cs_promo_sk,cs_ship_date_sk,d_date,i_item_desc,cs_order_number] BroadcastHashJoin [d_week_seq,inv_date_sk,d_week_seq,d_date_sk] - Project [d_date,d_week_seq,cs_promo_sk,cs_ship_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc] + Project [cs_item_sk,w_warehouse_name,d_week_seq,cs_promo_sk,cs_ship_date_sk,inv_date_sk,d_date,i_item_desc,cs_order_number] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc] + Project [cs_item_sk,w_warehouse_name,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,i_item_desc,cs_order_number] BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] - Project [cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc,cs_bill_hdemo_sk] + Project [cs_item_sk,w_warehouse_name,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,i_item_desc,cs_order_number] BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc,cs_bill_hdemo_sk] + Project [cs_bill_cdemo_sk,cs_item_sk,w_warehouse_name,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,i_item_desc,cs_order_number] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,cs_bill_hdemo_sk] + Project [cs_bill_cdemo_sk,cs_item_sk,w_warehouse_name,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,cs_order_number] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,inv_warehouse_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,inv_date_sk,cs_bill_hdemo_sk] + Project [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,inv_warehouse_sk,cs_order_number] BroadcastHashJoin [cs_item_sk,inv_item_sk,inv_quantity_on_hand,cs_quantity] - Project [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] - Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk] - Scan parquet default.catalog_sales [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] + Project [cs_bill_cdemo_sk,cs_quantity,cs_item_sk,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number] + Filter [cs_quantity,cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_sold_date_sk,cs_bill_cdemo_sk] + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_quantity,cs_item_sk,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number] [cs_bill_cdemo_sk,cs_quantity,cs_item_sk,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number] InputAdapter BroadcastExchange #2 WholeStageCodegen @@ -68,13 +68,13 @@ TakeOrderedAndProject [i_item_desc,w_warehouse_name,promo,no_promo,d_week_seq,to BroadcastExchange #8 WholeStageCodegen Project [d_date_sk,d_week_seq] - Filter [d_date_sk,d_week_seq] + Filter [d_week_seq,d_date_sk] Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] InputAdapter BroadcastExchange #9 WholeStageCodegen Project [d_date_sk,d_date] - Filter [d_date,d_date_sk] + Filter [d_date_sk,d_date] Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] InputAdapter BroadcastExchange #10 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt index 1f917c14b..898ff3075 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt @@ -1,11 +1,11 @@ == Physical Plan == *(7) Sort [cnt#1 DESC NULLS LAST], true, 0 -+- Exchange rangepartitioning(cnt#1 DESC NULLS LAST, 200) ++- Exchange rangepartitioning(cnt#1 DESC NULLS LAST, 5) +- *(6) Project [c_last_name#2, c_first_name#3, c_salutation#4, c_preferred_cust_flag#5, ss_ticket_number#6, cnt#1] +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight :- *(6) Filter ((cnt#1 >= 1) && (cnt#1 <= 5)) : +- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[count(1)]) - : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#7, 200) + : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#7, 5) : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[partial_count(1)]) : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#6] : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt index f94077a9b..e8e43bd18 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt @@ -3,7 +3,7 @@ WholeStageCodegen InputAdapter Exchange [cnt] #1 WholeStageCodegen - Project [ss_ticket_number,c_salutation,cnt,c_last_name,c_preferred_cust_flag,c_first_name] + Project [c_last_name,ss_ticket_number,c_preferred_cust_flag,c_first_name,c_salutation,cnt] BroadcastHashJoin [ss_customer_sk,c_customer_sk] Filter [cnt] HashAggregate [ss_ticket_number,ss_customer_sk,count,count(1)] [count(1),cnt,count] @@ -17,9 +17,9 @@ WholeStageCodegen BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -41,6 +41,6 @@ WholeStageCodegen InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] + Project [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] Filter [c_customer_sk] - Scan parquet default.customer [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] + Scan parquet default.customer [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt index ad1bfbcca..2aa89a6b7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt @@ -8,7 +8,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : :- Union : : : :- *(4) Filter (isnotnull(year_total#8) && (year_total#8 > 0.00)) : : : : +- *(4) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) - : : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + : : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) : : : : +- *(3) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) : : : : +- *(3) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight @@ -29,7 +29,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : : +- Union : : :- *(8) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) - : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) : : : +- *(7) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) : : : +- *(7) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight @@ -49,7 +49,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : :- LocalTableScan , [customer_id#10, year_total#6] : +- *(12) Filter (isnotnull(year_total#21) && (year_total#21 > 0.00)) : +- *(12) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) - : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) : +- *(11) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) : +- *(11) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] : +- *(11) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight @@ -67,7 +67,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer +- Union :- LocalTableScan , [customer_id#5, year_total#7] +- *(16) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) - +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 200) + +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) +- *(15) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) +- *(15) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] +- *(15) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt index c3144e1d9..831e86b6b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt @@ -1,22 +1,22 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] WholeStageCodegen Project [customer_id,customer_first_name,customer_last_name] - BroadcastHashJoin [year_total,year_total,year_total,year_total,customer_id,customer_id] - Project [customer_first_name,year_total,year_total,customer_last_name,year_total,customer_id,customer_id] + BroadcastHashJoin [customer_id,year_total,year_total,year_total,year_total,customer_id] + Project [year_total,year_total,customer_id,customer_id,customer_last_name,customer_first_name,year_total] BroadcastHashJoin [customer_id,customer_id] BroadcastHashJoin [customer_id,customer_id] InputAdapter Union WholeStageCodegen Filter [year_total] - HashAggregate [d_year,sum(UnscaledValue(ss_net_paid)),c_customer_id,c_last_name,sum,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] + HashAggregate [sum,c_last_name,sum(UnscaledValue(ss_net_paid)),c_customer_id,d_year,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 WholeStageCodegen - HashAggregate [d_year,sum,ss_net_paid,c_customer_id,c_last_name,sum,c_first_name] [sum,sum] - Project [c_customer_id,d_year,ss_net_paid,c_last_name,c_first_name] + HashAggregate [sum,sum,c_last_name,c_customer_id,d_year,ss_net_paid,c_first_name] [sum,sum] + Project [d_year,c_customer_id,c_last_name,c_first_name,ss_net_paid] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,ss_net_paid,c_last_name,c_first_name,ss_sold_date_sk] + Project [c_customer_id,ss_sold_date_sk,c_last_name,c_first_name,ss_net_paid] BroadcastHashJoin [c_customer_sk,ss_customer_sk] Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] Filter [c_customer_sk,c_customer_id] @@ -38,14 +38,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] BroadcastExchange #4 Union WholeStageCodegen - HashAggregate [d_year,sum(UnscaledValue(ss_net_paid)),sum,c_customer_id,c_last_name,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_first_name,sum,customer_last_name,year_total,customer_id] + HashAggregate [c_last_name,sum,sum(UnscaledValue(ss_net_paid)),c_customer_id,d_year,c_first_name] [year_total,sum,sum(UnscaledValue(ss_net_paid)),customer_id,customer_last_name,customer_first_name] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 WholeStageCodegen - HashAggregate [d_year,ss_net_paid,sum,c_customer_id,c_last_name,c_first_name,sum] [sum,sum] - Project [c_customer_id,d_year,ss_net_paid,c_last_name,c_first_name] + HashAggregate [c_last_name,sum,c_customer_id,d_year,ss_net_paid,sum,c_first_name] [sum,sum] + Project [d_year,c_customer_id,c_last_name,c_first_name,ss_net_paid] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,ss_net_paid,c_last_name,c_first_name,ss_sold_date_sk] + Project [c_customer_id,ss_sold_date_sk,c_last_name,c_first_name,ss_net_paid] BroadcastHashJoin [c_customer_sk,ss_customer_sk] Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] Filter [c_customer_sk,c_customer_id] @@ -65,14 +65,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [d_year,sum,sum(UnscaledValue(ws_net_paid)),c_customer_id,c_last_name,c_first_name] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + HashAggregate [c_last_name,sum,c_customer_id,d_year,sum(UnscaledValue(ws_net_paid)),c_first_name] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 WholeStageCodegen - HashAggregate [d_year,sum,sum,ws_net_paid,c_customer_id,c_last_name,c_first_name] [sum,sum] - Project [c_customer_id,ws_net_paid,d_year,c_last_name,c_first_name] + HashAggregate [c_last_name,sum,sum,c_customer_id,d_year,c_first_name,ws_net_paid] [sum,sum] + Project [ws_net_paid,d_year,c_customer_id,c_last_name,c_first_name] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_customer_id,ws_net_paid,c_last_name,ws_sold_date_sk,c_first_name] + Project [ws_net_paid,c_customer_id,c_last_name,c_first_name,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] Filter [c_customer_sk,c_customer_id] @@ -90,14 +90,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [d_year,sum(UnscaledValue(ws_net_paid)),c_customer_id,c_last_name,c_first_name,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + HashAggregate [c_last_name,sum,c_customer_id,d_year,sum(UnscaledValue(ws_net_paid)),c_first_name] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 WholeStageCodegen - HashAggregate [d_year,ws_net_paid,c_customer_id,sum,c_last_name,c_first_name,sum] [sum,sum] - Project [c_customer_id,ws_net_paid,d_year,c_last_name,c_first_name] + HashAggregate [c_last_name,sum,c_customer_id,d_year,sum,c_first_name,ws_net_paid] [sum,sum] + Project [ws_net_paid,d_year,c_customer_id,c_last_name,c_first_name] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_customer_id,ws_net_paid,c_last_name,ws_sold_date_sk,c_first_name] + Project [ws_net_paid,c_customer_id,c_last_name,c_first_name,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] Filter [c_customer_sk,c_customer_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt index 80af7fa5e..b0e2b8b35 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt @@ -3,14 +3,14 @@ TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], out +- *(34) Project [d_year#11 AS prev_year#2, d_year#12 AS year#3, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#13 AS prev_yr_cnt#8, sales_cnt#14 AS curr_yr_cnt#9, (sales_cnt#14 - sales_cnt#13) AS sales_cnt_diff#1, CheckOverflow((promote_precision(cast(sales_amt#15 as decimal(19,2))) - promote_precision(cast(sales_amt#16 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#10] +- *(34) BroadcastHashJoin [i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], [i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], Inner, BuildRight, (CheckOverflow((promote_precision(cast(sales_cnt#14 as decimal(17,2))) / promote_precision(cast(sales_cnt#13 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000) :- *(34) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) - : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, 200) + : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, 5) : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) - : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 200) + : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 5) : +- *(15) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) : +- Union : :- *(10) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) - : : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 200) + : : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 5) : : +- *(9) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) : : +- Union : : :- *(4) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] @@ -24,8 +24,8 @@ TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], out : : : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true])) +- *(33) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) - +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, 200) + +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, 5) +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) - +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 200) + +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 5) +- *(31) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) +- Union :- *(26) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) - : +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 200) + : +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 5) : +- *(25) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) : +- Union : :- *(20) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt index 656d3c077..606f41e6c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt @@ -1,43 +1,43 @@ -TakeOrderedAndProject [i_brand_id,year,sales_amt_diff,prev_yr_cnt,sales_cnt_diff,curr_yr_cnt,i_category_id,i_class_id,i_manufact_id,prev_year] +TakeOrderedAndProject [i_brand_id,curr_yr_cnt,prev_yr_cnt,sales_amt_diff,sales_cnt_diff,prev_year,i_manufact_id,year,i_class_id,i_category_id] WholeStageCodegen - Project [i_brand_id,sales_amt,d_year,sales_cnt,sales_cnt,d_year,sales_amt,i_category_id,i_class_id,i_manufact_id] - BroadcastHashJoin [i_brand_id,i_class_id,i_brand_id,sales_cnt,i_manufact_id,sales_cnt,i_category_id,i_category_id,i_class_id,i_manufact_id] - HashAggregate [sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint)),i_manufact_id,i_brand_id,i_class_id,i_category_id,sum,sum,d_year] [sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint)),sales_amt,sum,sum,sales_cnt] + Project [i_brand_id,i_manufact_id,sales_amt,d_year,sales_cnt,i_class_id,sales_cnt,i_category_id,d_year,sales_amt] + BroadcastHashJoin [i_brand_id,i_category_id,i_brand_id,i_manufact_id,i_manufact_id,sales_cnt,i_class_id,sales_cnt,i_category_id,i_class_id] + HashAggregate [i_brand_id,sum(UnscaledValue(sales_amt)),sum,i_category_id,i_manufact_id,sum(cast(sales_cnt as bigint)),d_year,i_class_id,sum] [sum(UnscaledValue(sales_amt)),sum,sum(cast(sales_cnt as bigint)),sum,sales_cnt,sales_amt] InputAdapter - Exchange [i_manufact_id,i_brand_id,i_class_id,i_category_id,d_year] #1 + Exchange [i_brand_id,i_category_id,i_manufact_id,d_year,i_class_id] #1 WholeStageCodegen - HashAggregate [sales_amt,i_manufact_id,i_brand_id,i_class_id,i_category_id,sum,sum,sales_cnt,sum,sum,d_year] [sum,sum,sum,sum] - HashAggregate [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] + HashAggregate [sales_cnt,sum,i_brand_id,sum,i_category_id,i_manufact_id,sum,d_year,i_class_id,sum,sales_amt] [sum,sum,sum,sum] + HashAggregate [i_brand_id,sales_amt,d_year,sales_cnt,i_class_id,i_manufact_id,i_category_id] InputAdapter - Exchange [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] #2 + Exchange [i_brand_id,sales_amt,d_year,sales_cnt,i_class_id,i_manufact_id,i_category_id] #2 WholeStageCodegen - HashAggregate [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] + HashAggregate [i_brand_id,sales_amt,d_year,sales_cnt,i_class_id,i_manufact_id,i_category_id] InputAdapter Union WholeStageCodegen - HashAggregate [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] + HashAggregate [i_brand_id,sales_amt,d_year,sales_cnt,i_class_id,i_manufact_id,i_category_id] InputAdapter - Exchange [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] #3 + Exchange [i_brand_id,sales_amt,d_year,sales_cnt,i_class_id,i_manufact_id,i_category_id] #3 WholeStageCodegen - HashAggregate [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] + HashAggregate [i_brand_id,sales_amt,d_year,sales_cnt,i_class_id,i_manufact_id,i_category_id] InputAdapter Union WholeStageCodegen - Project [d_year,cr_return_quantity,cr_return_amount,i_category_id,i_brand_id,i_manufact_id,cs_quantity,i_class_id,cs_ext_sales_price] + Project [cs_quantity,i_brand_id,cs_ext_sales_price,cr_return_quantity,d_year,i_class_id,cr_return_amount,i_manufact_id,i_category_id] BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_quantity,i_manufact_id,d_year,i_category_id,cs_order_number,i_brand_id,cs_item_sk,i_class_id,cs_ext_sales_price] + Project [i_manufact_id,d_year,i_class_id,cs_quantity,i_category_id,cs_item_sk,i_brand_id,cs_ext_sales_price,cs_order_number] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,i_manufact_id,i_category_id,cs_sold_date_sk,cs_order_number,i_brand_id,cs_item_sk,i_class_id,cs_ext_sales_price] + Project [i_manufact_id,i_class_id,cs_quantity,i_category_id,cs_item_sk,i_brand_id,cs_ext_sales_price,cs_sold_date_sk,cs_order_number] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] + Project [cs_quantity,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,cs_order_number] Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] + Scan parquet default.catalog_sales [cs_quantity,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,cs_order_number] [cs_quantity,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,cs_order_number] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_manufact_id,i_category_id,i_item_sk,i_brand_id,i_class_id] - Filter [i_category,i_category_id,i_item_sk,i_brand_id,i_manufact_id,i_class_id] - Scan parquet default.item [i_manufact_id,i_category_id,i_item_sk,i_category,i_brand_id,i_class_id] [i_manufact_id,i_category_id,i_item_sk,i_category,i_brand_id,i_class_id] + Project [i_manufact_id,i_class_id,i_category_id,i_brand_id,i_item_sk] + Filter [i_brand_id,i_item_sk,i_class_id,i_category,i_manufact_id,i_category_id] + Scan parquet default.item [i_manufact_id,i_class_id,i_category_id,i_brand_id,i_category,i_item_sk] [i_manufact_id,i_class_id,i_category_id,i_brand_id,i_category,i_item_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen @@ -51,17 +51,17 @@ TakeOrderedAndProject [i_brand_id,year,sales_amt_diff,prev_yr_cnt,sales_cnt_diff Filter [cr_item_sk,cr_order_number] Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] WholeStageCodegen - Project [d_year,sr_return_amt,sr_return_quantity,i_category_id,ss_ext_sales_price,i_brand_id,i_manufact_id,ss_quantity,i_class_id] + Project [i_brand_id,sr_return_quantity,ss_quantity,sr_return_amt,d_year,ss_ext_sales_price,i_class_id,i_manufact_id,i_category_id] BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_quantity,ss_item_sk,i_manufact_id,d_year,i_category_id,ss_ext_sales_price,i_brand_id,i_class_id,ss_ticket_number] + Project [i_manufact_id,d_year,i_class_id,i_category_id,i_brand_id,ss_ticket_number,ss_item_sk,ss_quantity,ss_ext_sales_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,i_manufact_id,i_category_id,ss_ext_sales_price,i_brand_id,ss_sold_date_sk,i_class_id,ss_ticket_number] + Project [i_manufact_id,i_class_id,i_category_id,i_brand_id,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ext_sales_price] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] + Project [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ext_sales_price] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ext_sales_price] [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ext_sales_price] InputAdapter - ReusedExchange [i_manufact_id,i_category_id,i_item_sk,i_brand_id,i_class_id] [i_manufact_id,i_category_id,i_item_sk,i_brand_id,i_class_id] #4 + ReusedExchange [i_manufact_id,i_class_id,i_category_id,i_brand_id,i_item_sk] [i_manufact_id,i_class_id,i_category_id,i_brand_id,i_item_sk] #4 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #5 InputAdapter @@ -71,17 +71,17 @@ TakeOrderedAndProject [i_brand_id,year,sales_amt_diff,prev_yr_cnt,sales_cnt_diff Filter [sr_ticket_number,sr_item_sk] Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] WholeStageCodegen - Project [wr_return_quantity,d_year,ws_ext_sales_price,wr_return_amt,i_category_id,i_brand_id,i_manufact_id,i_class_id,ws_quantity] + Project [i_brand_id,wr_return_quantity,d_year,ws_ext_sales_price,ws_quantity,i_class_id,wr_return_amt,i_manufact_id,i_category_id] BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - Project [i_manufact_id,d_year,ws_ext_sales_price,ws_quantity,ws_order_number,i_category_id,i_brand_id,i_class_id,ws_item_sk] + Project [i_manufact_id,d_year,i_class_id,i_category_id,i_brand_id,ws_order_number,ws_quantity,ws_ext_sales_price,ws_item_sk] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [i_manufact_id,ws_ext_sales_price,ws_quantity,ws_order_number,i_category_id,ws_sold_date_sk,i_brand_id,i_class_id,ws_item_sk] + Project [i_manufact_id,i_class_id,i_category_id,i_brand_id,ws_order_number,ws_quantity,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] + Project [ws_order_number,ws_quantity,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_order_number,ws_quantity,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_order_number,ws_quantity,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [i_manufact_id,i_category_id,i_item_sk,i_brand_id,i_class_id] [i_manufact_id,i_category_id,i_item_sk,i_brand_id,i_class_id] #4 + ReusedExchange [i_manufact_id,i_class_id,i_category_id,i_brand_id,i_item_sk] [i_manufact_id,i_class_id,i_category_id,i_brand_id,i_item_sk] #4 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #5 InputAdapter @@ -93,38 +93,38 @@ TakeOrderedAndProject [i_brand_id,year,sales_amt_diff,prev_yr_cnt,sales_cnt_diff InputAdapter BroadcastExchange #9 WholeStageCodegen - HashAggregate [sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint)),d_year,sum,i_class_id,i_category_id,i_manufact_id,i_brand_id,sum] [sum(UnscaledValue(sales_amt)),sales_amt,sum(cast(sales_cnt as bigint)),sales_cnt,sum,sum] + HashAggregate [i_manufact_id,sum(UnscaledValue(sales_amt)),i_class_id,i_brand_id,sum(cast(sales_cnt as bigint)),sum,sum,d_year,i_category_id] [sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint)),sum,sales_cnt,sum,sales_amt] InputAdapter - Exchange [d_year,i_class_id,i_category_id,i_manufact_id,i_brand_id] #10 + Exchange [i_manufact_id,i_class_id,i_brand_id,d_year,i_category_id] #10 WholeStageCodegen - HashAggregate [sum,d_year,sales_amt,sum,sum,i_class_id,sales_cnt,i_category_id,i_manufact_id,i_brand_id,sum] [sum,sum,sum,sum] - HashAggregate [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] + HashAggregate [i_manufact_id,sales_cnt,i_class_id,i_brand_id,sum,sum,sum,d_year,sales_amt,sum,i_category_id] [sum,sum,sum,sum] + HashAggregate [i_category_id,sales_amt,i_manufact_id,i_class_id,i_brand_id,d_year,sales_cnt] InputAdapter - Exchange [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] #11 + Exchange [i_category_id,sales_amt,i_manufact_id,i_class_id,i_brand_id,d_year,sales_cnt] #11 WholeStageCodegen - HashAggregate [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] + HashAggregate [i_category_id,sales_amt,i_manufact_id,i_class_id,i_brand_id,d_year,sales_cnt] InputAdapter Union WholeStageCodegen - HashAggregate [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] + HashAggregate [i_category_id,sales_amt,i_manufact_id,i_class_id,i_brand_id,d_year,sales_cnt] InputAdapter - Exchange [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] #12 + Exchange [i_category_id,sales_amt,i_manufact_id,i_class_id,i_brand_id,d_year,sales_cnt] #12 WholeStageCodegen - HashAggregate [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] + HashAggregate [i_category_id,sales_amt,i_manufact_id,i_class_id,i_brand_id,d_year,sales_cnt] InputAdapter Union WholeStageCodegen - Project [cr_return_quantity,i_manufact_id,cr_return_amount,d_year,i_category_id,i_class_id,cs_quantity,i_brand_id,cs_ext_sales_price] + Project [cs_quantity,i_category_id,i_manufact_id,i_class_id,cs_ext_sales_price,i_brand_id,cr_return_quantity,d_year,cr_return_amount] BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_quantity,i_brand_id,i_manufact_id,i_category_id,d_year,cs_order_number,cs_item_sk,cs_ext_sales_price,i_class_id] + Project [d_year,cs_quantity,i_category_id,cs_item_sk,i_manufact_id,cs_ext_sales_price,i_brand_id,i_class_id,cs_order_number] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,i_brand_id,i_manufact_id,i_category_id,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price,i_class_id] + Project [cs_quantity,i_category_id,cs_item_sk,i_manufact_id,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,cs_order_number] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] + Project [cs_quantity,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,cs_order_number] Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] + Scan parquet default.catalog_sales [cs_quantity,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,cs_order_number] [cs_quantity,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk,cs_order_number] InputAdapter - ReusedExchange [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] #4 + ReusedExchange [i_item_sk,i_category_id,i_manufact_id,i_brand_id,i_class_id] [i_item_sk,i_category_id,i_manufact_id,i_brand_id,i_class_id] #4 InputAdapter BroadcastExchange #13 WholeStageCodegen @@ -134,33 +134,33 @@ TakeOrderedAndProject [i_brand_id,year,sales_amt_diff,prev_yr_cnt,sales_cnt_diff InputAdapter ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #6 WholeStageCodegen - Project [i_manufact_id,d_year,i_category_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity,i_class_id,i_brand_id] + Project [sr_return_quantity,ss_quantity,i_category_id,sr_return_amt,i_manufact_id,i_class_id,i_brand_id,ss_ext_sales_price,d_year] BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_quantity,ss_item_sk,i_brand_id,i_manufact_id,i_category_id,d_year,ss_ext_sales_price,i_class_id,ss_ticket_number] + Project [d_year,i_category_id,ss_ticket_number,ss_item_sk,ss_quantity,i_manufact_id,i_brand_id,ss_ext_sales_price,i_class_id] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,i_brand_id,i_manufact_id,i_category_id,ss_ext_sales_price,ss_sold_date_sk,i_class_id,ss_ticket_number] + Project [i_category_id,ss_ticket_number,ss_item_sk,ss_quantity,i_manufact_id,i_brand_id,ss_sold_date_sk,ss_ext_sales_price,i_class_id] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] + Project [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ext_sales_price] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ext_sales_price] [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ext_sales_price] InputAdapter - ReusedExchange [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] #4 + ReusedExchange [i_item_sk,i_category_id,i_manufact_id,i_brand_id,i_class_id] [i_item_sk,i_category_id,i_manufact_id,i_brand_id,i_class_id] #4 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #13 InputAdapter ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7 WholeStageCodegen - Project [wr_return_quantity,i_manufact_id,d_year,i_category_id,ws_ext_sales_price,wr_return_amt,i_class_id,i_brand_id,ws_quantity] + Project [i_category_id,i_manufact_id,i_class_id,wr_return_quantity,i_brand_id,d_year,ws_ext_sales_price,ws_quantity,wr_return_amt] BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - Project [i_brand_id,ws_ext_sales_price,ws_quantity,i_manufact_id,i_category_id,d_year,ws_order_number,i_class_id,ws_item_sk] + Project [d_year,i_category_id,i_manufact_id,ws_order_number,i_brand_id,ws_quantity,i_class_id,ws_ext_sales_price,ws_item_sk] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [i_brand_id,ws_ext_sales_price,ws_quantity,i_manufact_id,i_category_id,ws_order_number,ws_sold_date_sk,i_class_id,ws_item_sk] + Project [i_category_id,i_manufact_id,ws_order_number,i_brand_id,ws_quantity,i_class_id,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] + Project [ws_order_number,ws_quantity,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_order_number,ws_quantity,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_order_number,ws_quantity,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] #4 + ReusedExchange [i_item_sk,i_category_id,i_manufact_id,i_brand_id,i_class_id] [i_item_sk,i_category_id,i_manufact_id,i_brand_id,i_class_id] #4 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #13 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt index 20cba6cb5..a7e5edfea 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,col_name#2 ASC NULLS FIRST,d_year#3 ASC NULLS FIRST,d_qoy#4 ASC NULLS FIRST,i_category#5 ASC NULLS FIRST], output=[channel#1,col_name#2,d_year#3,d_qoy#4,i_category#5,sales_cnt#6,sales_amt#7]) +- *(11) HashAggregate(keys=[channel#1, col_name#2, d_year#3, d_qoy#4, i_category#5], functions=[count(1), sum(UnscaledValue(ext_sales_price#8))]) - +- Exchange hashpartitioning(channel#1, col_name#2, d_year#3, d_qoy#4, i_category#5, 200) + +- Exchange hashpartitioning(channel#1, col_name#2, d_year#3, d_qoy#4, i_category#5, 5) +- *(10) HashAggregate(keys=[channel#1, col_name#2, d_year#3, d_qoy#4, i_category#5], functions=[partial_count(1), partial_sum(UnscaledValue(ext_sales_price#8))]) +- Union :- *(3) Project [store AS channel#1, ss_store_sk#9 AS col_name#2, d_year#3, d_qoy#4, i_category#5, ss_ext_sales_price#10 AS ext_sales_price#8] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt index e40976f52..7a40d6326 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt @@ -1,14 +1,14 @@ -TakeOrderedAndProject [d_year,channel,sales_cnt,col_name,d_qoy,i_category,sales_amt] +TakeOrderedAndProject [i_category,sales_cnt,d_qoy,channel,sales_amt,d_year,col_name] WholeStageCodegen - HashAggregate [d_year,channel,sum,sum(UnscaledValue(ext_sales_price)),count,count(1),col_name,d_qoy,i_category] [sum,sum(UnscaledValue(ext_sales_price)),sales_cnt,count,count(1),sales_amt] + HashAggregate [i_category,count,count(1),sum,sum(UnscaledValue(ext_sales_price)),d_qoy,channel,d_year,col_name] [count,count(1),sum,sum(UnscaledValue(ext_sales_price)),sales_cnt,sales_amt] InputAdapter - Exchange [d_year,channel,col_name,d_qoy,i_category] #1 + Exchange [i_category,d_qoy,channel,d_year,col_name] #1 WholeStageCodegen - HashAggregate [d_year,count,channel,sum,ext_sales_price,count,col_name,d_qoy,i_category,sum] [count,sum,count,sum] + HashAggregate [i_category,count,sum,ext_sales_price,d_qoy,channel,count,d_year,col_name,sum] [count,sum,count,sum] InputAdapter Union WholeStageCodegen - Project [d_year,d_qoy,ss_store_sk,i_category,ss_ext_sales_price] + Project [d_year,ss_ext_sales_price,ss_store_sk,d_qoy,i_category] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,i_category] BroadcastHashJoin [ss_item_sk,i_item_sk] @@ -28,7 +28,7 @@ TakeOrderedAndProject [d_year,channel,sales_cnt,col_name,d_qoy,i_category,sales_ Filter [d_date_sk] Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] WholeStageCodegen - Project [d_year,d_qoy,ws_ship_customer_sk,ws_ext_sales_price,i_category] + Project [d_year,ws_ext_sales_price,d_qoy,i_category,ws_ship_customer_sk] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Project [ws_sold_date_sk,ws_ship_customer_sk,ws_ext_sales_price,i_category] BroadcastHashJoin [ws_item_sk,i_item_sk] @@ -40,7 +40,7 @@ TakeOrderedAndProject [d_year,channel,sales_cnt,col_name,d_qoy,i_category,sales_ InputAdapter ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #3 WholeStageCodegen - Project [d_year,d_qoy,i_category,cs_ship_addr_sk,cs_ext_sales_price] + Project [cs_ship_addr_sk,cs_ext_sales_price,d_year,d_qoy,i_category] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] Project [cs_sold_date_sk,cs_ship_addr_sk,cs_ext_sales_price,i_category] BroadcastHashJoin [cs_item_sk,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt index 0199d72c5..0b9dd6982 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt @@ -1,14 +1,14 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) +- *(25) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) - +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 200) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) +- *(24) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) +- *(24) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] +- Union :- *(8) Project [sales#7, coalesce(returns#12, 0.00) AS returns#8, CheckOverflow((promote_precision(cast(profit#13 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#9, store channel AS channel#10, s_store_sk#15 AS id#11] : +- *(8) BroadcastHashJoin [s_store_sk#15], [s_store_sk#16], LeftOuter, BuildRight : :- *(8) HashAggregate(keys=[s_store_sk#15], functions=[sum(UnscaledValue(ss_ext_sales_price#17)), sum(UnscaledValue(ss_net_profit#18))]) - : : +- Exchange hashpartitioning(s_store_sk#15, 200) + : : +- Exchange hashpartitioning(s_store_sk#15, 5) : : +- *(3) HashAggregate(keys=[s_store_sk#15], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#17)), partial_sum(UnscaledValue(ss_net_profit#18))]) : : +- *(3) Project [ss_ext_sales_price#17, ss_net_profit#18, s_store_sk#15] : : +- *(3) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#15], Inner, BuildRight @@ -27,7 +27,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : : +- *(2) FileScan parquet default.store[s_store_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(7) HashAggregate(keys=[s_store_sk#16], functions=[sum(UnscaledValue(sr_return_amt#23)), sum(UnscaledValue(sr_net_loss#24))]) - : +- Exchange hashpartitioning(s_store_sk#16, 200) + : +- Exchange hashpartitioning(s_store_sk#16, 5) : +- *(6) HashAggregate(keys=[s_store_sk#16], functions=[partial_sum(UnscaledValue(sr_return_amt#23)), partial_sum(UnscaledValue(sr_net_loss#24))]) : +- *(6) Project [sr_return_amt#23, sr_net_loss#24, s_store_sk#16] : +- *(6) BroadcastHashJoin [sr_store_sk#25], [cast(s_store_sk#16 as bigint)], Inner, BuildRight @@ -48,7 +48,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : +- BroadcastNestedLoopJoin BuildLeft, Inner : :- BroadcastExchange IdentityBroadcastMode : : +- *(11) HashAggregate(keys=[cs_call_center_sk#33], functions=[sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))]) - : : +- Exchange hashpartitioning(cs_call_center_sk#33, 200) + : : +- Exchange hashpartitioning(cs_call_center_sk#33, 5) : : +- *(10) HashAggregate(keys=[cs_call_center_sk#33], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))]) : : +- *(10) Project [cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] : : +- *(10) BroadcastHashJoin [cs_sold_date_sk#37], [d_date_sk#21], Inner, BuildRight @@ -68,7 +68,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL +- *(23) Project [sales#41, coalesce(returns#42, 0.00) AS returns#43, CheckOverflow((promote_precision(cast(profit#44 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#45, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#46, web channel AS channel#47, wp_web_page_sk#48 AS id#49] +- *(23) BroadcastHashJoin [wp_web_page_sk#48], [wp_web_page_sk#50], LeftOuter, BuildRight :- *(23) HashAggregate(keys=[wp_web_page_sk#48], functions=[sum(UnscaledValue(ws_ext_sales_price#51)), sum(UnscaledValue(ws_net_profit#52))]) - : +- Exchange hashpartitioning(wp_web_page_sk#48, 200) + : +- Exchange hashpartitioning(wp_web_page_sk#48, 5) : +- *(18) HashAggregate(keys=[wp_web_page_sk#48], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#51)), partial_sum(UnscaledValue(ws_net_profit#52))]) : +- *(18) Project [ws_ext_sales_price#51, ws_net_profit#52, wp_web_page_sk#48] : +- *(18) BroadcastHashJoin [ws_web_page_sk#53], [wp_web_page_sk#48], Inner, BuildRight @@ -84,7 +84,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : +- *(17) FileScan parquet default.web_page[wp_web_page_sk#48] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_web_page_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(22) HashAggregate(keys=[wp_web_page_sk#50], functions=[sum(UnscaledValue(wr_return_amt#55)), sum(UnscaledValue(wr_net_loss#56))]) - +- Exchange hashpartitioning(wp_web_page_sk#50, 200) + +- Exchange hashpartitioning(wp_web_page_sk#50, 5) +- *(21) HashAggregate(keys=[wp_web_page_sk#50], functions=[partial_sum(UnscaledValue(wr_return_amt#55)), partial_sum(UnscaledValue(wr_net_loss#56))]) +- *(21) Project [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#50] +- *(21) BroadcastHashJoin [wr_web_page_sk#57], [cast(wp_web_page_sk#50 as bigint)], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt index 1c29af5ef..a633b75ce 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt @@ -1,21 +1,21 @@ -TakeOrderedAndProject [channel,profit,id,sales,returns] +TakeOrderedAndProject [profit,channel,returns,sales,id] WholeStageCodegen - HashAggregate [channel,sum,sum,id,sum(profit),spark_grouping_id,sum(sales),sum,sum(returns)] [sum,profit,sum,sum(profit),sales,sum(sales),sum,returns,sum(returns)] + HashAggregate [sum,spark_grouping_id,sum,sum,sum(profit),channel,sum(sales),sum(returns),id] [sum,profit,sum,sum,sum(profit),returns,sum(sales),sum(returns),sales] InputAdapter Exchange [channel,id,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [profit,channel,sum,sum,sales,sum,sum,id,sum,spark_grouping_id,returns,sum] [sum,sum,sum,sum,sum,sum] - Expand [profit,channel,sales,id,returns] + HashAggregate [sum,spark_grouping_id,profit,sum,sum,channel,sum,sum,id,sum,sales,returns] [sum,sum,sum,sum,sum,sum] + Expand [id,channel,profit,sales,returns] InputAdapter Union WholeStageCodegen - Project [profit,s_store_sk,sales,returns,profit_loss] + Project [profit,s_store_sk,profit_loss,returns,sales] BroadcastHashJoin [s_store_sk,s_store_sk] - HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),sum,sum,s_store_sk,sum(UnscaledValue(ss_net_profit))] [sales,sum(UnscaledValue(ss_ext_sales_price)),sum,sum,sum(UnscaledValue(ss_net_profit)),profit] + HashAggregate [sum,s_store_sk,sum(UnscaledValue(ss_net_profit)),sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_net_profit)),profit,sum,sales,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [s_store_sk] #2 WholeStageCodegen - HashAggregate [sum,sum,sum,sum,s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + HashAggregate [sum,sum,sum,s_store_sk,ss_net_profit,sum,ss_ext_sales_price] [sum,sum,sum,sum] Project [ss_ext_sales_price,ss_net_profit,s_store_sk] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] @@ -38,11 +38,11 @@ TakeOrderedAndProject [channel,profit,id,sales,returns] InputAdapter BroadcastExchange #5 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),s_store_sk,sum] [sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),profit_loss,returns,sum] + HashAggregate [sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),sum,s_store_sk] [profit_loss,sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),sum,returns] InputAdapter Exchange [s_store_sk] #6 WholeStageCodegen - HashAggregate [sum,sum,sr_return_amt,s_store_sk,sum,sum,sr_net_loss] [sum,sum,sum,sum] + HashAggregate [sum,sr_return_amt,sum,sum,sr_net_loss,sum,s_store_sk] [sum,sum,sum,sum] Project [sr_return_amt,sr_net_loss,s_store_sk] BroadcastHashJoin [sr_store_sk,s_store_sk] Project [sr_store_sk,sr_return_amt,sr_net_loss] @@ -63,16 +63,16 @@ TakeOrderedAndProject [channel,profit,id,sales,returns] Filter [s_store_sk] Scan parquet default.store [s_store_sk] [s_store_sk] WholeStageCodegen - Project [sales,returns,profit,cs_call_center_sk,profit_loss] + Project [profit,profit_loss,sales,cs_call_center_sk,returns] InputAdapter BroadcastNestedLoopJoin BroadcastExchange #9 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(cs_net_profit)),sum,sum(UnscaledValue(cs_ext_sales_price)),cs_call_center_sk] [sales,sum,profit,sum(UnscaledValue(cs_net_profit)),sum,sum(UnscaledValue(cs_ext_sales_price))] + HashAggregate [sum(UnscaledValue(cs_ext_sales_price)),sum,cs_call_center_sk,sum,sum(UnscaledValue(cs_net_profit))] [sum(UnscaledValue(cs_ext_sales_price)),profit,sum,sum,sum(UnscaledValue(cs_net_profit)),sales] InputAdapter Exchange [cs_call_center_sk] #10 WholeStageCodegen - HashAggregate [sum,sum,cs_net_profit,sum,cs_ext_sales_price,cs_call_center_sk,sum] [sum,sum,sum,sum] + HashAggregate [sum,cs_net_profit,sum,cs_ext_sales_price,cs_call_center_sk,sum,sum] [sum,sum,sum,sum] Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] Project [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] @@ -81,11 +81,11 @@ TakeOrderedAndProject [channel,profit,id,sales,returns] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss))] [profit_loss,sum,returns,sum(UnscaledValue(cr_return_amount)),sum,sum(UnscaledValue(cr_net_loss))] + HashAggregate [sum,sum,sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss))] [sum(UnscaledValue(cr_return_amount)),sum,returns,sum(UnscaledValue(cr_net_loss)),sum,profit_loss] InputAdapter Exchange #11 WholeStageCodegen - HashAggregate [cr_return_amount,sum,sum,sum,cr_net_loss,sum] [sum,sum,sum,sum] + HashAggregate [sum,sum,sum,cr_net_loss,cr_return_amount,sum] [sum,sum,sum,sum] Project [cr_return_amount,cr_net_loss] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] Project [cr_returned_date_sk,cr_return_amount,cr_net_loss] @@ -94,13 +94,13 @@ TakeOrderedAndProject [channel,profit,id,sales,returns] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 WholeStageCodegen - Project [profit,returns,sales,wp_web_page_sk,profit_loss] + Project [profit,profit_loss,sales,wp_web_page_sk,returns] BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] - HashAggregate [sum(UnscaledValue(ws_ext_sales_price)),sum,sum(UnscaledValue(ws_net_profit)),sum,wp_web_page_sk] [sum(UnscaledValue(ws_ext_sales_price)),profit,sales,sum,sum(UnscaledValue(ws_net_profit)),sum] + HashAggregate [sum,wp_web_page_sk,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] [sum,sum,sales,sum(UnscaledValue(ws_ext_sales_price)),profit,sum(UnscaledValue(ws_net_profit))] InputAdapter Exchange [wp_web_page_sk] #12 WholeStageCodegen - HashAggregate [sum,sum,ws_ext_sales_price,ws_net_profit,sum,sum,wp_web_page_sk] [sum,sum,sum,sum] + HashAggregate [sum,sum,wp_web_page_sk,sum,ws_net_profit,sum,ws_ext_sales_price] [sum,sum,sum,sum] Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] @@ -119,11 +119,11 @@ TakeOrderedAndProject [channel,profit,id,sales,returns] InputAdapter BroadcastExchange #14 WholeStageCodegen - HashAggregate [sum,wp_web_page_sk,sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),sum] [sum,returns,sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),profit_loss,sum] + HashAggregate [sum(UnscaledValue(wr_net_loss)),sum,sum(UnscaledValue(wr_return_amt)),wp_web_page_sk,sum] [sum(UnscaledValue(wr_net_loss)),profit_loss,sum,sum(UnscaledValue(wr_return_amt)),sum,returns] InputAdapter Exchange [wp_web_page_sk] #15 WholeStageCodegen - HashAggregate [sum,wp_web_page_sk,wr_return_amt,sum,wr_net_loss,sum,sum] [sum,sum,sum,sum] + HashAggregate [sum,sum,wr_net_loss,wp_web_page_sk,sum,sum,wr_return_amt] [sum,sum,sum,sum] Project [wr_return_amt,wr_net_loss,wp_web_page_sk] BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] Project [wr_web_page_sk,wr_return_amt,wr_net_loss] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt index 70080e59a..05bd1bcc3 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt @@ -1,11 +1,11 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC NULLS LAST,ss_wc#3 DESC NULLS LAST,ss_sp#4 DESC NULLS LAST,other_chan_qty#5 ASC NULLS FIRST,other_chan_wholesale_cost#6 ASC NULLS FIRST,other_chan_sales_price#7 ASC NULLS FIRST,round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) ASC NULLS FIRST], output=[ratio#1,store_qty#10,store_wholesale_cost#11,store_sales_price#12,other_chan_qty#5,other_chan_wholesale_cost#6,other_chan_sales_price#7]) -+- *(12) Project [round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) AS ratio#1, ss_qty#2 AS store_qty#10, ss_wc#3 AS store_wholesale_cost#11, ss_sp#4 AS store_sales_price#12, (coalesce(ws_qty#8, 0) + coalesce(cs_qty#9, 0)) AS other_chan_qty#5, CheckOverflow((promote_precision(cast(coalesce(ws_wc#13, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#6, CheckOverflow((promote_precision(cast(coalesce(ws_sp#15, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#16, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#7, cs_qty#9, ws_qty#8, ss_qty#2, ss_sp#4, ss_wc#3] ++- *(12) Project [round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) AS ratio#1, ss_qty#2 AS store_qty#10, ss_wc#3 AS store_wholesale_cost#11, ss_sp#4 AS store_sales_price#12, (coalesce(ws_qty#8, 0) + coalesce(cs_qty#9, 0)) AS other_chan_qty#5, CheckOverflow((promote_precision(cast(coalesce(ws_wc#13, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#6, CheckOverflow((promote_precision(cast(coalesce(ws_sp#15, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#16, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#7, ss_wc#3, cs_qty#9, ss_qty#2, ss_sp#4, ws_qty#8] +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [cs_sold_year#20, cs_item_sk#21, cs_customer_sk#22], Inner, BuildRight :- *(12) Project [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19, ss_qty#2, ss_wc#3, ss_sp#4, ws_qty#8, ws_wc#13, ws_sp#15] : +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [ws_sold_year#23, ws_item_sk#24, ws_customer_sk#25], Inner, BuildRight : :- *(12) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[sum(cast(ss_quantity#27 as bigint)), sum(UnscaledValue(ss_wholesale_cost#28)), sum(UnscaledValue(ss_sales_price#29))]) - : : +- Exchange hashpartitioning(d_year#26, ss_item_sk#18, ss_customer_sk#19, 200) + : : +- Exchange hashpartitioning(d_year#26, ss_item_sk#18, ss_customer_sk#19, 5) : : +- *(3) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[partial_sum(cast(ss_quantity#27 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#28)), partial_sum(UnscaledValue(ss_sales_price#29))]) : : +- *(3) Project [ss_item_sk#18, ss_customer_sk#19, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29, d_year#26] : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight @@ -26,7 +26,7 @@ TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) : +- *(7) Filter (coalesce(ws_qty#8, 0) > 0) : +- *(7) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[sum(cast(ws_quantity#36 as bigint)), sum(UnscaledValue(ws_wholesale_cost#37)), sum(UnscaledValue(ws_sales_price#38))]) - : +- Exchange hashpartitioning(d_year#26, ws_item_sk#24, ws_bill_customer_sk#35, 200) + : +- Exchange hashpartitioning(d_year#26, ws_item_sk#24, ws_bill_customer_sk#35, 5) : +- *(6) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[partial_sum(cast(ws_quantity#36 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#37)), partial_sum(UnscaledValue(ws_sales_price#38))]) : +- *(6) Project [ws_item_sk#24, ws_bill_customer_sk#35, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38, d_year#26] : +- *(6) BroadcastHashJoin [ws_sold_date_sk#39], [d_date_sk#31], Inner, BuildRight @@ -44,7 +44,7 @@ TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) +- *(11) Filter (coalesce(cs_qty#9, 0) > 0) +- *(11) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[sum(cast(cs_quantity#44 as bigint)), sum(UnscaledValue(cs_wholesale_cost#45)), sum(UnscaledValue(cs_sales_price#46))]) - +- Exchange hashpartitioning(d_year#26, cs_item_sk#21, cs_bill_customer_sk#43, 200) + +- Exchange hashpartitioning(d_year#26, cs_item_sk#21, cs_bill_customer_sk#43, 5) +- *(10) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[partial_sum(cast(cs_quantity#44 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#45)), partial_sum(UnscaledValue(cs_sales_price#46))]) +- *(10) Project [cs_bill_customer_sk#43, cs_item_sk#21, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46, d_year#26] +- *(10) BroadcastHashJoin [cs_sold_date_sk#47], [d_date_sk#31], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt index 64b375073..9a5a90a44 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt @@ -1,22 +1,22 @@ -TakeOrderedAndProject [cs_qty,store_wholesale_cost,store_sales_price,ws_qty,ss_qty,ss_sp,other_chan_qty,store_qty,ratio,other_chan_sales_price,other_chan_wholesale_cost,ss_wc] +TakeOrderedAndProject [store_wholesale_cost,other_chan_qty,ss_wc,cs_qty,ratio,ss_qty,other_chan_sales_price,ss_sp,other_chan_wholesale_cost,store_qty,store_sales_price,ws_qty] WholeStageCodegen - Project [ws_wc,cs_qty,ws_qty,cs_sp,ss_qty,ss_sp,ws_sp,cs_wc,ss_wc] - BroadcastHashJoin [ss_item_sk,ss_customer_sk,cs_customer_sk,cs_item_sk,cs_sold_year,ss_sold_year] - Project [ss_customer_sk,ws_sp,ws_wc,ss_wc,ss_sold_year,ss_sp,ws_qty,ss_qty,ss_item_sk] - BroadcastHashJoin [ss_item_sk,ss_customer_sk,ws_sold_year,ws_item_sk,ws_customer_sk,ss_sold_year] - HashAggregate [d_year,ss_customer_sk,sum(UnscaledValue(ss_sales_price)),sum,sum,sum(cast(ss_quantity as bigint)),sum,sum(UnscaledValue(ss_wholesale_cost)),ss_item_sk] [sum(UnscaledValue(ss_sales_price)),sum,ss_wc,sum,sum(cast(ss_quantity as bigint)),ss_sold_year,ss_sp,ss_qty,sum,sum(UnscaledValue(ss_wholesale_cost))] + Project [cs_wc,ws_wc,ss_wc,cs_qty,ss_qty,ss_sp,ws_sp,cs_sp,ws_qty] + BroadcastHashJoin [ss_sold_year,cs_sold_year,ss_customer_sk,ss_item_sk,cs_customer_sk,cs_item_sk] + Project [ss_customer_sk,ss_wc,ss_item_sk,ss_sold_year,ws_sp,ws_qty,ss_sp,ss_qty,ws_wc] + BroadcastHashJoin [ws_item_sk,ss_sold_year,ws_sold_year,ss_customer_sk,ws_customer_sk,ss_item_sk] + HashAggregate [ss_customer_sk,sum,sum(cast(ss_quantity as bigint)),sum,sum(UnscaledValue(ss_sales_price)),ss_item_sk,d_year,sum,sum(UnscaledValue(ss_wholesale_cost))] [sum,sum(cast(ss_quantity as bigint)),sum,sum(UnscaledValue(ss_sales_price)),ss_wc,ss_sold_year,ss_sp,ss_qty,sum,sum(UnscaledValue(ss_wholesale_cost))] InputAdapter Exchange [d_year,ss_item_sk,ss_customer_sk] #1 WholeStageCodegen - HashAggregate [d_year,sum,ss_wholesale_cost,ss_customer_sk,sum,sum,ss_sales_price,sum,sum,ss_quantity,sum,ss_item_sk] [sum,sum,sum,sum,sum,sum] - Project [ss_quantity,ss_item_sk,d_year,ss_customer_sk,ss_sales_price,ss_wholesale_cost] + HashAggregate [ss_customer_sk,sum,sum,ss_item_sk,ss_wholesale_cost,ss_quantity,d_year,ss_sales_price,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [d_year,ss_wholesale_cost,ss_sales_price,ss_customer_sk,ss_item_sk,ss_quantity] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] + Project [ss_wholesale_cost,ss_sales_price,ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk] Filter [sr_ticket_number] BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] + Project [ss_wholesale_cost,ss_sales_price,ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk] Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] + Scan parquet default.store_sales [ss_wholesale_cost,ss_sales_price,ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk] [ss_wholesale_cost,ss_sales_price,ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen @@ -33,19 +33,19 @@ TakeOrderedAndProject [cs_qty,store_wholesale_cost,store_sales_price,ws_qty,ss_q BroadcastExchange #4 WholeStageCodegen Filter [ws_qty] - HashAggregate [d_year,sum(cast(ws_quantity as bigint)),ws_item_sk,sum(UnscaledValue(ws_sales_price)),ws_bill_customer_sk,sum,sum,sum(UnscaledValue(ws_wholesale_cost)),sum] [sum(cast(ws_quantity as bigint)),sum(UnscaledValue(ws_sales_price)),ws_sp,ws_wc,sum,ws_sold_year,sum,sum(UnscaledValue(ws_wholesale_cost)),sum,ws_customer_sk,ws_qty] + HashAggregate [ws_bill_customer_sk,sum(UnscaledValue(ws_wholesale_cost)),sum,sum,d_year,sum,ws_item_sk,sum(cast(ws_quantity as bigint)),sum(UnscaledValue(ws_sales_price))] [sum(UnscaledValue(ws_wholesale_cost)),ws_customer_sk,sum,sum,ws_sp,ws_qty,ws_sold_year,sum,ws_wc,sum(cast(ws_quantity as bigint)),sum(UnscaledValue(ws_sales_price))] InputAdapter Exchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 WholeStageCodegen - HashAggregate [d_year,ws_wholesale_cost,ws_item_sk,sum,ws_bill_customer_sk,sum,sum,sum,sum,sum,ws_sales_price,ws_quantity] [sum,sum,sum,sum,sum,sum] - Project [d_year,ws_quantity,ws_wholesale_cost,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + HashAggregate [ws_wholesale_cost,ws_bill_customer_sk,sum,sum,sum,sum,d_year,sum,sum,ws_sales_price,ws_quantity,ws_item_sk] [sum,sum,sum,sum,sum,sum] + Project [d_year,ws_bill_customer_sk,ws_sales_price,ws_quantity,ws_wholesale_cost,ws_item_sk] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_quantity,ws_wholesale_cost,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + Project [ws_bill_customer_sk,ws_sales_price,ws_quantity,ws_wholesale_cost,ws_item_sk,ws_sold_date_sk] Filter [wr_order_number] BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - Project [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + Project [ws_bill_customer_sk,ws_order_number,ws_sales_price,ws_quantity,ws_wholesale_cost,ws_item_sk,ws_sold_date_sk] Filter [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] - Scan parquet default.web_sales [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_order_number,ws_sales_price,ws_quantity,ws_wholesale_cost,ws_item_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_order_number,ws_sales_price,ws_quantity,ws_wholesale_cost,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen @@ -58,19 +58,19 @@ TakeOrderedAndProject [cs_qty,store_wholesale_cost,store_sales_price,ws_qty,ss_q BroadcastExchange #7 WholeStageCodegen Filter [cs_qty] - HashAggregate [d_year,sum,sum(UnscaledValue(cs_sales_price)),sum(cast(cs_quantity as bigint)),sum(UnscaledValue(cs_wholesale_cost)),cs_item_sk,cs_bill_customer_sk,sum,sum] [cs_qty,cs_wc,sum,sum(UnscaledValue(cs_sales_price)),sum(cast(cs_quantity as bigint)),cs_sold_year,cs_sp,sum(UnscaledValue(cs_wholesale_cost)),sum,cs_customer_sk,sum] + HashAggregate [sum,sum,cs_item_sk,sum(cast(cs_quantity as bigint)),d_year,sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price)),cs_bill_customer_sk,sum] [sum,cs_sp,sum,cs_qty,sum(cast(cs_quantity as bigint)),cs_customer_sk,sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price)),cs_sold_year,cs_wc,sum] InputAdapter Exchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 WholeStageCodegen - HashAggregate [d_year,sum,sum,sum,cs_item_sk,cs_bill_customer_sk,sum,sum,cs_sales_price,cs_quantity,sum,cs_wholesale_cost] [sum,sum,sum,sum,sum,sum] - Project [cs_wholesale_cost,cs_quantity,d_year,cs_bill_customer_sk,cs_sales_price,cs_item_sk] + HashAggregate [cs_quantity,sum,sum,cs_sales_price,cs_item_sk,d_year,sum,cs_bill_customer_sk,sum,cs_wholesale_cost,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [d_year,cs_quantity,cs_bill_customer_sk,cs_item_sk,cs_wholesale_cost,cs_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk] + Project [cs_quantity,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_wholesale_cost,cs_sales_price] Filter [cr_order_number] BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + Project [cs_quantity,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_wholesale_cost,cs_sales_price,cs_order_number] Filter [cs_sold_date_sk,cs_item_sk,cs_bill_customer_sk] - Scan parquet default.catalog_sales [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + Scan parquet default.catalog_sales [cs_quantity,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_wholesale_cost,cs_sales_price,cs_order_number] [cs_quantity,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_wholesale_cost,cs_sales_price,cs_order_number] InputAdapter BroadcastExchange #9 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt index c0fa08c66..8e43f862c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt @@ -3,7 +3,7 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ +- *(6) Project [c_last_name#1, c_first_name#2, substring(s_city#3, 1, 30) AS substring(s_city, 1, 30)#5, ss_ticket_number#6, amt#7, profit#4, s_city#3] +- *(6) BroadcastHashJoin [ss_customer_sk#8], [c_customer_sk#9], Inner, BuildRight :- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#11)), sum(UnscaledValue(ss_net_profit#12))]) - : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3, 200) + : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3, 5) : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#11)), partial_sum(UnscaledValue(ss_net_profit#12))]) : +- *(4) Project [ss_customer_sk#8, ss_addr_sk#10, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12, s_city#3] : +- *(4) BroadcastHashJoin [ss_hdemo_sk#13], [hd_demo_sk#14], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt index cee3e4c79..c6d42000c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt @@ -1,21 +1,21 @@ -TakeOrderedAndProject [profit,amt,substring(s_city, 1, 30),s_city,c_last_name,ss_ticket_number,c_first_name] +TakeOrderedAndProject [amt,s_city,c_last_name,profit,c_first_name,ss_ticket_number,substring(s_city, 1, 30)] WholeStageCodegen - Project [profit,amt,s_city,c_last_name,ss_ticket_number,c_first_name] + Project [amt,s_city,c_last_name,profit,c_first_name,ss_ticket_number] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - HashAggregate [sum(UnscaledValue(ss_net_profit)),ss_customer_sk,sum(UnscaledValue(ss_coupon_amt)),sum,sum,s_city,ss_ticket_number,ss_addr_sk] [amt,sum(UnscaledValue(ss_net_profit)),profit,sum(UnscaledValue(ss_coupon_amt)),sum,sum] + HashAggregate [ss_customer_sk,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),sum,ss_addr_sk,s_city,ss_ticket_number] [sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),profit,sum,amt] InputAdapter Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city] #1 WholeStageCodegen - HashAggregate [sum,ss_customer_sk,ss_coupon_amt,sum,sum,sum,s_city,ss_ticket_number,ss_addr_sk,ss_net_profit] [sum,sum,sum,sum] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,s_city,ss_ticket_number] + HashAggregate [ss_customer_sk,sum,ss_coupon_amt,ss_net_profit,sum,sum,sum,ss_addr_sk,s_city,ss_ticket_number] [sum,sum,sum,sum] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,s_city,ss_coupon_amt,ss_net_profit] BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,s_city,ss_hdemo_sk,ss_ticket_number] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,s_city,ss_coupon_amt,ss_net_profit] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_store_sk,ss_coupon_amt,ss_net_profit] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_coupon_amt,ss_net_profit] Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_coupon_amt,ss_net_profit] [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_coupon_amt,ss_net_profit] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt index ec6c9ab88..41613515c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt @@ -1,12 +1,12 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) +- *(23) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) - +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 200) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) +- *(22) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) +- *(22) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] +- Union :- *(7) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(ss_ext_sales_price#13)), sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - : +- Exchange hashpartitioning(s_store_id#12, 200) + : +- Exchange hashpartitioning(s_store_id#12, 5) : +- *(6) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#13)), partial_sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) : +- *(6) Project [ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] : +- *(6) BroadcastHashJoin [ss_promo_sk#17], [p_promo_sk#18], Inner, BuildRight @@ -42,7 +42,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : +- *(5) Filter ((isnotnull(p_channel_tv#30) && (p_channel_tv#30 = N)) && isnotnull(p_promo_sk#18)) : +- *(5) FileScan parquet default.promotion[p_promo_sk#18,p_channel_tv#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)], ReadSchema: struct :- *(14) HashAggregate(keys=[cp_catalog_page_id#31], functions=[sum(UnscaledValue(cs_ext_sales_price#32)), sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - : +- Exchange hashpartitioning(cp_catalog_page_id#31, 200) + : +- Exchange hashpartitioning(cp_catalog_page_id#31, 5) : +- *(13) HashAggregate(keys=[cp_catalog_page_id#31], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#32)), partial_sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) : +- *(13) Project [cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] : +- *(13) BroadcastHashJoin [cs_promo_sk#36], [p_promo_sk#18], Inner, BuildRight @@ -69,7 +69,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(21) HashAggregate(keys=[web_site_id#44], functions=[sum(UnscaledValue(ws_ext_sales_price#45)), sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - +- Exchange hashpartitioning(web_site_id#44, 200) + +- Exchange hashpartitioning(web_site_id#44, 5) +- *(20) HashAggregate(keys=[web_site_id#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#45)), partial_sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) +- *(20) Project [ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] +- *(20) BroadcastHashJoin [ws_promo_sk#49], [p_promo_sk#18], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt index e16daec1c..13f8663bf 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt @@ -1,32 +1,32 @@ -TakeOrderedAndProject [profit,sales,id,channel,returns] +TakeOrderedAndProject [sales,profit,channel,id,returns] WholeStageCodegen - HashAggregate [sum,sum(sales),sum(returns),spark_grouping_id,id,channel,sum(profit),sum,sum] [profit,sum,sales,sum(sales),sum(returns),returns,sum(profit),sum,sum] + HashAggregate [sum(profit),sum(returns),sum,channel,sum,sum(sales),sum,spark_grouping_id,id] [sum(profit),sum(returns),sum,sales,profit,sum,sum(sales),sum,returns] InputAdapter Exchange [channel,id,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [profit,sum,sum,returns,spark_grouping_id,sum,id,channel,sales,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Expand [profit,id,returns,sales,channel] + HashAggregate [sum,channel,sum,sum,sales,sum,spark_grouping_id,returns,id,sum,profit,sum] [sum,sum,sum,sum,sum,sum] + Expand [sales,returns,channel,profit,id] InputAdapter Union WholeStageCodegen - HashAggregate [sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),s_store_id,sum,sum(UnscaledValue(ss_ext_sales_price)),sum] [id,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sales,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),profit,sum,returns,sum(UnscaledValue(ss_ext_sales_price)),channel,sum] + HashAggregate [sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),s_store_id,sum,sum(UnscaledValue(ss_ext_sales_price)),sum,sum,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] [profit,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum,sum(UnscaledValue(ss_ext_sales_price)),returns,sum,sum,sales,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),channel,id] InputAdapter Exchange [s_store_id] #2 WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sr_return_amt,s_store_id,ss_ext_sales_price,sum,sr_net_loss,ss_net_profit,sum] [sum,sum,sum,sum,sum,sum] - Project [s_store_id,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + HashAggregate [sum,s_store_id,sum,sr_return_amt,ss_net_profit,sum,sum,ss_ext_sales_price,sr_net_loss,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [sr_return_amt,sr_net_loss,ss_ext_sales_price,s_store_id,ss_net_profit] BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [s_store_id,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] + Project [sr_return_amt,sr_net_loss,ss_promo_sk,ss_ext_sales_price,s_store_id,ss_net_profit] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_id,ss_item_sk,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] + Project [sr_return_amt,ss_item_sk,sr_net_loss,ss_promo_sk,ss_ext_sales_price,s_store_id,ss_net_profit] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] + Project [sr_return_amt,ss_item_sk,sr_net_loss,ss_promo_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,sr_return_amt,sr_net_loss] + Project [sr_return_amt,ss_item_sk,sr_net_loss,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] + Project [ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] [ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -58,24 +58,24 @@ TakeOrderedAndProject [profit,sales,id,channel,returns] Filter [p_channel_tv,p_promo_sk] Scan parquet default.promotion [p_promo_sk,p_channel_tv] [p_promo_sk,p_channel_tv] WholeStageCodegen - HashAggregate [sum(UnscaledValue(cs_ext_sales_price)),cp_catalog_page_id,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] [sum(UnscaledValue(cs_ext_sales_price)),sales,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),returns,id,profit] + HashAggregate [sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum,cp_catalog_page_id,sum(UnscaledValue(cs_ext_sales_price)),sum] [sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),returns,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum,profit,id,channel,sum(UnscaledValue(cs_ext_sales_price)),sales,sum] InputAdapter Exchange [cp_catalog_page_id] #8 WholeStageCodegen - HashAggregate [cp_catalog_page_id,sum,sum,sum,sum,cr_return_amount,sum,cs_net_profit,sum,cr_net_loss,cs_ext_sales_price] [sum,sum,sum,sum,sum,sum] - Project [cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_net_profit,cs_ext_sales_price] + HashAggregate [cs_net_profit,sum,sum,cs_ext_sales_price,cr_net_loss,cp_catalog_page_id,sum,cr_return_amount,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [cr_net_loss,cs_ext_sales_price,cp_catalog_page_id,cs_net_profit,cr_return_amount] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_promo_sk,cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_net_profit,cs_ext_sales_price] + Project [cr_net_loss,cs_promo_sk,cs_ext_sales_price,cp_catalog_page_id,cs_net_profit,cr_return_amount] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_promo_sk,cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] + Project [cr_net_loss,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cp_catalog_page_id,cs_net_profit,cr_return_amount] BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] - Project [cs_promo_sk,cr_net_loss,cs_catalog_page_sk,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] + Project [cr_net_loss,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_catalog_page_sk,cs_net_profit,cr_return_amount] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_promo_sk,cr_net_loss,cs_catalog_page_sk,cs_sold_date_sk,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] + Project [cr_net_loss,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_catalog_page_sk,cs_sold_date_sk,cs_net_profit,cr_return_amount] BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] - Project [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] + Project [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_catalog_page_sk,cs_sold_date_sk,cs_net_profit,cs_order_number] Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk] - Scan parquet default.catalog_sales [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] + Scan parquet default.catalog_sales [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_catalog_page_sk,cs_sold_date_sk,cs_net_profit,cs_order_number] [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_catalog_page_sk,cs_sold_date_sk,cs_net_profit,cs_order_number] InputAdapter BroadcastExchange #9 WholeStageCodegen @@ -95,24 +95,24 @@ TakeOrderedAndProject [profit,sales,id,channel,returns] InputAdapter ReusedExchange [p_promo_sk] [p_promo_sk] #7 WholeStageCodegen - HashAggregate [web_site_id,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] [sum,sales,id,returns,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),profit,sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),channel] + HashAggregate [sum(UnscaledValue(ws_ext_sales_price)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum,web_site_id,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),profit,returns,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum,id,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum,sales,channel,sum] InputAdapter Exchange [web_site_id] #11 WholeStageCodegen - HashAggregate [web_site_id,sum,sum,sum,sum,ws_ext_sales_price,ws_net_profit,sum,wr_return_amt,wr_net_loss,sum] [sum,sum,sum,sum,sum,sum] - Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss] + HashAggregate [sum,wr_net_loss,ws_net_profit,sum,web_site_id,sum,ws_ext_sales_price,sum,wr_return_amt,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [ws_net_profit,web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price] BroadcastHashJoin [ws_promo_sk,p_promo_sk] - Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk] + Project [ws_net_profit,web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_promo_sk] BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk,ws_item_sk] + Project [ws_net_profit,web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_promo_sk,ws_item_sk] BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_web_site_sk,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk,ws_item_sk] + Project [ws_net_profit,ws_web_site_sk,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_promo_sk,ws_item_sk] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_web_site_sk,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_sold_date_sk,ws_promo_sk,ws_item_sk] + Project [ws_net_profit,ws_web_site_sk,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_promo_sk,ws_item_sk,ws_sold_date_sk] BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] - Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] + Project [ws_net_profit,ws_web_site_sk,ws_order_number,ws_ext_sales_price,ws_promo_sk,ws_item_sk,ws_sold_date_sk] Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk] - Scan parquet default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] + Scan parquet default.web_sales [ws_net_profit,ws_web_site_sk,ws_order_number,ws_ext_sales_price,ws_promo_sk,ws_item_sk,ws_sold_date_sk] [ws_net_profit,ws_web_site_sk,ws_order_number,ws_ext_sales_price,ws_promo_sk,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #12 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt index 92713c532..93900040c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt @@ -8,7 +8,7 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salu : : +- *(11) BroadcastHashJoin [ctr_state#21], [ctr_state#21#22], Inner, BuildRight, (cast(ctr_total_return#16 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) : : :- *(11) Filter isnotnull(ctr_total_return#16) : : : +- *(11) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) - : : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 200) + : : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 5) : : : +- *(3) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) : : : +- *(3) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] : : : +- *(3) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight @@ -28,10 +28,10 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salu : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) : : +- *(8) HashAggregate(keys=[ctr_state#21], functions=[avg(ctr_total_return#16)]) - : : +- Exchange hashpartitioning(ctr_state#21, 200) + : : +- Exchange hashpartitioning(ctr_state#21, 5) : : +- *(7) HashAggregate(keys=[ctr_state#21], functions=[partial_avg(ctr_total_return#16)]) : : +- *(7) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) - : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 200) + : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 5) : : +- *(6) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) : : +- *(6) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] : : +- *(6) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt index 8c75e6308..c0e198448 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt @@ -1,17 +1,17 @@ -TakeOrderedAndProject [ca_suite_number,ca_street_name,ca_city,ca_county,ctr_total_return,ca_street_number,c_salutation,ca_country,ca_zip,ca_street_type,ca_gmt_offset,c_customer_id,c_last_name,ca_location_type,c_first_name,ca_state] +TakeOrderedAndProject [ca_street_number,ctr_total_return,ca_street_type,ca_country,ca_city,ca_zip,ca_street_name,c_last_name,ca_gmt_offset,c_customer_id,ca_location_type,c_first_name,c_salutation,ca_suite_number,ca_state,ca_county] WholeStageCodegen - Project [ca_suite_number,ca_street_name,ca_city,ca_county,ctr_total_return,ca_street_number,c_salutation,ca_country,ca_zip,ca_street_type,ca_gmt_offset,c_customer_id,c_last_name,ca_location_type,c_first_name,ca_state] + Project [ca_street_number,ctr_total_return,ca_street_type,ca_country,ca_city,ca_zip,ca_street_name,c_last_name,ca_gmt_offset,c_customer_id,ca_location_type,c_first_name,c_salutation,ca_suite_number,ca_state,ca_county] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_customer_id,c_last_name,c_first_name,c_salutation,ctr_total_return] + Project [c_customer_id,ctr_total_return,c_last_name,c_first_name,c_salutation,c_current_addr_sk] BroadcastHashJoin [ctr_customer_sk,c_customer_sk] Project [ctr_customer_sk,ctr_total_return] BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] Filter [ctr_total_return] - HashAggregate [cr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] [sum,sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_customer_sk,ctr_total_return] + HashAggregate [cr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_total_return,sum,ctr_customer_sk,ctr_state] InputAdapter Exchange [cr_returning_customer_sk,ca_state] #1 WholeStageCodegen - HashAggregate [sum,cr_returning_customer_sk,sum,cr_return_amt_inc_tax,ca_state] [sum,sum] + HashAggregate [cr_return_amt_inc_tax,sum,cr_returning_customer_sk,sum,ca_state] [sum,sum] Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] BroadcastHashJoin [cr_returning_addr_sk,ca_address_sk] Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] @@ -35,16 +35,16 @@ TakeOrderedAndProject [ca_suite_number,ca_street_name,ca_city,ca_county,ctr_tota BroadcastExchange #4 WholeStageCodegen Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [ctr_state,sum,count,avg(ctr_total_return)] [count,avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),sum,ctr_state] + HashAggregate [ctr_state,sum,count,avg(ctr_total_return)] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),sum,count,avg(ctr_total_return),ctr_state] InputAdapter Exchange [ctr_state] #5 WholeStageCodegen - HashAggregate [count,ctr_state,sum,count,sum,ctr_total_return] [sum,count,sum,count] + HashAggregate [sum,count,ctr_state,ctr_total_return,sum,count] [sum,count,sum,count] HashAggregate [cr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_total_return,sum] InputAdapter Exchange [cr_returning_customer_sk,ca_state] #6 WholeStageCodegen - HashAggregate [cr_returning_customer_sk,sum,cr_return_amt_inc_tax,sum,ca_state] [sum,sum] + HashAggregate [cr_return_amt_inc_tax,sum,sum,cr_returning_customer_sk,ca_state] [sum,sum] Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] BroadcastHashJoin [cr_returning_addr_sk,ca_address_sk] Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] @@ -59,12 +59,12 @@ TakeOrderedAndProject [ca_suite_number,ca_street_name,ca_city,ca_county,ctr_tota InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [c_current_addr_sk,c_customer_id,c_customer_sk,c_last_name,c_first_name,c_salutation] + Project [c_customer_id,c_last_name,c_first_name,c_customer_sk,c_salutation,c_current_addr_sk] Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_id,c_customer_sk,c_last_name,c_first_name,c_salutation] [c_current_addr_sk,c_customer_id,c_customer_sk,c_last_name,c_first_name,c_salutation] + Scan parquet default.customer [c_customer_id,c_last_name,c_first_name,c_customer_sk,c_salutation,c_current_addr_sk] [c_customer_id,c_last_name,c_first_name,c_customer_sk,c_salutation,c_current_addr_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [ca_state,ca_zip,ca_street_type,ca_location_type,ca_city,ca_address_sk,ca_county,ca_gmt_offset,ca_street_number,ca_country,ca_suite_number,ca_street_name] + Project [ca_address_sk,ca_city,ca_street_type,ca_state,ca_location_type,ca_street_number,ca_gmt_offset,ca_suite_number,ca_zip,ca_street_name,ca_county,ca_country] Filter [ca_state,ca_address_sk] - Scan parquet default.customer_address [ca_state,ca_zip,ca_street_type,ca_location_type,ca_city,ca_address_sk,ca_county,ca_gmt_offset,ca_street_number,ca_country,ca_suite_number,ca_street_name] [ca_state,ca_zip,ca_street_type,ca_location_type,ca_city,ca_address_sk,ca_county,ca_gmt_offset,ca_street_number,ca_country,ca_suite_number,ca_street_name] + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_street_type,ca_state,ca_location_type,ca_street_number,ca_gmt_offset,ca_suite_number,ca_zip,ca_street_name,ca_county,ca_country] [ca_address_sk,ca_city,ca_street_type,ca_state,ca_location_type,ca_street_number,ca_gmt_offset,ca_suite_number,ca_zip,ca_street_name,ca_county,ca_country] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt index 676bb654b..7b85bc894 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,i_current_price#3]) +- *(5) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, i_current_price#3, 200) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, i_current_price#3, 5) +- *(4) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) +- *(4) Project [i_item_id#1, i_item_desc#2, i_current_price#3] +- *(4) BroadcastHashJoin [i_item_sk#4], [ss_item_sk#5], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt index e7c48e44c..a388cb4ec 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt @@ -9,11 +9,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_item_id,i_item_desc,i_current_price] BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [i_current_price,i_item_sk,inv_date_sk,i_item_desc,i_item_id] + Project [i_current_price,i_item_sk,inv_date_sk,i_item_id,i_item_desc] BroadcastHashJoin [i_item_sk,inv_item_sk] Project [i_item_sk,i_item_id,i_item_desc,i_current_price] Filter [i_current_price,i_manufact_id,i_item_sk] - Scan parquet default.item [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] + Scan parquet default.item [i_manufact_id,i_current_price,i_item_sk,i_item_id,i_item_desc] [i_manufact_id,i_current_price,i_item_sk,i_item_id,i_item_desc] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt index 46662f99a..8b674b50e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt @@ -5,7 +5,7 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,sr_item_qty# :- *(18) Project [item_id#1, sr_item_qty#2, cr_item_qty#4] : +- *(18) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight : :- *(18) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(sr_return_quantity#12 as bigint))]) - : : +- Exchange hashpartitioning(i_item_id#11, 200) + : : +- Exchange hashpartitioning(i_item_id#11, 5) : : +- *(5) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(sr_return_quantity#12 as bigint))]) : : +- *(5) Project [sr_return_quantity#12, i_item_id#11] : : +- *(5) BroadcastHashJoin [sr_returned_date_sk#13], [cast(d_date_sk#14 as bigint)], Inner, BuildRight @@ -34,7 +34,7 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,sr_item_qty# : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : +- *(11) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(cr_return_quantity#21 as bigint))]) - : +- Exchange hashpartitioning(i_item_id#11, 200) + : +- Exchange hashpartitioning(i_item_id#11, 5) : +- *(10) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(cr_return_quantity#21 as bigint))]) : +- *(10) Project [cr_return_quantity#21, i_item_id#11] : +- *(10) BroadcastHashJoin [cr_returned_date_sk#22], [d_date_sk#14], Inner, BuildRight @@ -56,7 +56,7 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,sr_item_qty# : +- ReusedExchange [d_date#17#24], BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) +- *(17) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(wr_return_quantity#25 as bigint))]) - +- Exchange hashpartitioning(i_item_id#11, 200) + +- Exchange hashpartitioning(i_item_id#11, 5) +- *(16) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(wr_return_quantity#25 as bigint))]) +- *(16) Project [wr_return_quantity#25, i_item_id#11] +- *(16) BroadcastHashJoin [wr_returned_date_sk#26], [cast(d_date_sk#14 as bigint)], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt index 027dc649e..3a19ca94c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt @@ -1,4 +1,4 @@ -TakeOrderedAndProject [wr_item_qty,cr_item_qty,wr_dev,sr_dev,sr_item_qty,average,item_id,cr_dev] +TakeOrderedAndProject [wr_item_qty,average,cr_dev,sr_item_qty,item_id,cr_item_qty,sr_dev,wr_dev] WholeStageCodegen Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty] BroadcastHashJoin [item_id,item_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt index 95001b56d..d3a8a9d5f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt @@ -4,15 +4,15 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername] BroadcastHashJoin [cd_demo_sk,sr_cdemo_sk] Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [cd_demo_sk,c_customer_id,hd_income_band_sk,c_last_name,c_first_name] + Project [c_customer_id,hd_income_band_sk,c_last_name,c_first_name,cd_demo_sk] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [cd_demo_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + Project [c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name,cd_demo_sk] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + Project [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_last_name,c_first_name] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + Project [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_last_name,c_first_name,c_current_addr_sk] Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] - Scan parquet default.customer [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + Scan parquet default.customer [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_last_name,c_first_name,c_current_addr_sk] [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_last_name,c_first_name,c_current_addr_sk] InputAdapter BroadcastExchange #1 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt index 560b1eba0..86abf1b0a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[substring(r_reason_desc, 1, 20)#1 ASC NULLS FIRST,aggOrder#2 ASC NULLS FIRST,avg(wr_refunded_cash)#3 ASC NULLS FIRST,avg(wr_fee)#4 ASC NULLS FIRST], output=[substring(r_reason_desc, 1, 20)#1,avg(ws_quantity)#5,avg(wr_refunded_cash)#3,avg(wr_fee)#4]) +- *(9) HashAggregate(keys=[r_reason_desc#6], functions=[avg(cast(ws_quantity#7 as bigint)), avg(UnscaledValue(wr_refunded_cash#8)), avg(UnscaledValue(wr_fee#9))]) - +- Exchange hashpartitioning(r_reason_desc#6, 200) + +- Exchange hashpartitioning(r_reason_desc#6, 5) +- *(8) HashAggregate(keys=[r_reason_desc#6], functions=[partial_avg(cast(ws_quantity#7 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#8)), partial_avg(UnscaledValue(wr_fee#9))]) +- *(8) Project [ws_quantity#7, wr_fee#9, wr_refunded_cash#8, r_reason_desc#6] +- *(8) BroadcastHashJoin [wr_reason_sk#10], [cast(r_reason_sk#11 as bigint)], Inner, BuildRight @@ -30,12 +30,12 @@ TakeOrderedAndProject(limit=100, orderBy=[substring(r_reason_desc, 1, 20)#1 ASC : : : : : +- *(2) FileScan parquet default.web_page[wp_web_page_sk#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_web_page_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [cd_demo_sk#25, cd_marital_status#19, cd_education_status#20] - : : : : +- *(3) Filter ((isnotnull(cd_demo_sk#25) && isnotnull(cd_education_status#20)) && isnotnull(cd_marital_status#19)) - : : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: struct + : : : : +- *(3) Filter ((isnotnull(cd_demo_sk#25) && isnotnull(cd_marital_status#19)) && isnotnull(cd_education_status#20)) + : : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, string, true], input[2, string, true])) : : : +- *(4) Project [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] - : : : +- *(4) Filter ((isnotnull(cd_education_status#23) && isnotnull(cd_marital_status#22)) && isnotnull(cd_demo_sk#21)) - : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk)], ReadSchema: struct + : : : +- *(4) Filter ((isnotnull(cd_marital_status#22) && isnotnull(cd_demo_sk#21)) && isnotnull(cd_education_status#23)) + : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk), IsNotNull(cd_education_status)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [ca_address_sk#15, ca_state#16] : : +- *(5) Filter ((isnotnull(ca_country#33) && (ca_country#33 = United States)) && isnotnull(ca_address_sk#15)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt index b9fdfc26f..c4cb39c7a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt @@ -1,33 +1,33 @@ -TakeOrderedAndProject [avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_desc, 1, 20),avg(wr_refunded_cash)] +TakeOrderedAndProject [avg(wr_refunded_cash),avg(wr_fee),avg(ws_quantity),substring(r_reason_desc, 1, 20),aggOrder] WholeStageCodegen - HashAggregate [r_reason_desc,count,sum,avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),sum,count,count,avg(cast(ws_quantity as bigint)),sum] [count,avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_desc, 1, 20),sum,avg(UnscaledValue(wr_fee)),avg(wr_refunded_cash),avg(UnscaledValue(wr_refunded_cash)),sum,count,count,avg(cast(ws_quantity as bigint)),sum] + HashAggregate [sum,avg(cast(ws_quantity as bigint)),count,r_reason_desc,count,sum,count,avg(UnscaledValue(wr_refunded_cash)),avg(UnscaledValue(wr_fee)),sum] [sum,avg(wr_refunded_cash),avg(cast(ws_quantity as bigint)),avg(wr_fee),count,count,sum,count,avg(UnscaledValue(wr_refunded_cash)),avg(UnscaledValue(wr_fee)),avg(ws_quantity),substring(r_reason_desc, 1, 20),sum,aggOrder] InputAdapter Exchange [r_reason_desc] #1 WholeStageCodegen - HashAggregate [r_reason_desc,count,wr_refunded_cash,sum,count,wr_fee,sum,sum,sum,count,count,count,count,sum,ws_quantity,sum] [count,sum,count,sum,sum,sum,count,count,count,count,sum,sum] + HashAggregate [sum,sum,count,wr_refunded_cash,count,r_reason_desc,wr_fee,count,sum,sum,count,count,sum,sum,count,ws_quantity] [sum,sum,count,count,count,sum,sum,count,count,sum,sum,count] Project [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] BroadcastHashJoin [wr_reason_sk,r_reason_sk] Project [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk,wr_fee] + Project [wr_fee,wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk] BroadcastHashJoin [wr_refunded_addr_sk,ca_address_sk,ca_state,ws_net_profit] - Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,wr_refunded_cash,ws_sold_date_sk,wr_fee] - BroadcastHashJoin [cd_education_status,cd_demo_sk,wr_returning_cdemo_sk,cd_education_status,cd_marital_status,cd_marital_status] - Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,cd_marital_status,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,cd_education_status] - BroadcastHashJoin [cd_education_status,ws_sales_price,cd_demo_sk,cd_marital_status,wr_refunded_cdemo_sk] - Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,ws_sales_price] + Project [wr_fee,ws_net_profit,wr_refunded_addr_sk,wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk] + BroadcastHashJoin [cd_marital_status,cd_education_status,cd_marital_status,cd_education_status,cd_demo_sk,wr_returning_cdemo_sk] + Project [wr_fee,ws_net_profit,wr_returning_cdemo_sk,wr_refunded_addr_sk,cd_education_status,wr_reason_sk,cd_marital_status,ws_quantity,wr_refunded_cash,ws_sold_date_sk] + BroadcastHashJoin [wr_refunded_cdemo_sk,cd_education_status,cd_demo_sk,cd_marital_status,ws_sales_price] + Project [wr_fee,ws_net_profit,wr_returning_cdemo_sk,wr_refunded_addr_sk,ws_sales_price,wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk,wr_refunded_cdemo_sk] BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Project [wr_refunded_addr_sk,wr_reason_sk,ws_web_page_sk,ws_quantity,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,ws_sales_price] + Project [wr_fee,ws_net_profit,wr_returning_cdemo_sk,wr_refunded_addr_sk,ws_web_page_sk,ws_sales_price,wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk,wr_refunded_cdemo_sk] BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] - Project [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] + Project [ws_net_profit,ws_order_number,ws_web_page_sk,ws_sales_price,ws_quantity,ws_item_sk,ws_sold_date_sk] Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] + Scan parquet default.web_sales [ws_net_profit,ws_order_number,ws_web_page_sk,ws_sales_price,ws_quantity,ws_item_sk,ws_sold_date_sk] [ws_net_profit,ws_order_number,ws_web_page_sk,ws_sales_price,ws_quantity,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] - Filter [wr_item_sk,wr_reason_sk,wr_order_number,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_refunded_cdemo_sk] - Scan parquet default.web_returns [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] + Project [wr_fee,wr_order_number,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk,wr_item_sk,wr_refunded_cash,wr_refunded_cdemo_sk] + Filter [wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk,wr_item_sk,wr_order_number,wr_returning_cdemo_sk] + Scan parquet default.web_returns [wr_fee,wr_order_number,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk,wr_item_sk,wr_refunded_cash,wr_refunded_cdemo_sk] [wr_fee,wr_order_number,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk,wr_item_sk,wr_refunded_cash,wr_refunded_cdemo_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -38,13 +38,13 @@ TakeOrderedAndProject [avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_ BroadcastExchange #4 WholeStageCodegen Project [cd_demo_sk,cd_marital_status,cd_education_status] - Filter [cd_demo_sk,cd_education_status,cd_marital_status] + Filter [cd_demo_sk,cd_marital_status,cd_education_status] Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [cd_demo_sk,cd_marital_status,cd_education_status] - Filter [cd_education_status,cd_demo_sk,cd_marital_status] + Filter [cd_marital_status,cd_education_status,cd_demo_sk] Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #6 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt index 9a562efa3..61285df23 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt @@ -3,9 +3,9 @@ TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WH +- *(6) Project [total_sum#4, i_category#2, i_class#5, lochierarchy#1, rank_within_parent#3] +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] +- *(5) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 - +- Exchange hashpartitioning(_w1#7, _w2#8, 200) + +- Exchange hashpartitioning(_w1#7, _w2#8, 5) +- *(4) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ws_net_paid#10))]) - +- Exchange hashpartitioning(i_category#2, i_class#5, spark_grouping_id#9, 200) + +- Exchange hashpartitioning(i_category#2, i_class#5, spark_grouping_id#9, 5) +- *(3) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ws_net_paid#10))]) +- *(3) Expand [List(ws_net_paid#10, i_category#11, i_class#12, 0), List(ws_net_paid#10, i_category#11, null, 1), List(ws_net_paid#10, null, null, 3)], [ws_net_paid#10, i_category#2, i_class#5, spark_grouping_id#9] +- *(3) Project [ws_net_paid#10, i_category#13 AS i_category#11, i_class#14 AS i_class#12] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt index 71aae564f..7ee3cf5f4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [i_category,total_sum,rank_within_parent,lochierarchy,i_class] +TakeOrderedAndProject [rank_within_parent,i_class,total_sum,lochierarchy,i_category] WholeStageCodegen - Project [i_category,total_sum,rank_within_parent,lochierarchy,i_class] + Project [rank_within_parent,i_class,total_sum,lochierarchy,i_category] InputAdapter Window [_w3,_w1,_w2] WholeStageCodegen @@ -8,11 +8,11 @@ TakeOrderedAndProject [i_category,total_sum,rank_within_parent,lochierarchy,i_cl InputAdapter Exchange [_w1,_w2] #1 WholeStageCodegen - HashAggregate [i_category,sum(UnscaledValue(ws_net_paid)),sum,i_class,spark_grouping_id] [_w1,total_sum,sum(UnscaledValue(ws_net_paid)),sum,_w2,lochierarchy,_w3] + HashAggregate [i_class,sum,sum(UnscaledValue(ws_net_paid)),i_category,spark_grouping_id] [sum,sum(UnscaledValue(ws_net_paid)),_w1,_w3,total_sum,lochierarchy,_w2] InputAdapter Exchange [i_category,i_class,spark_grouping_id] #2 WholeStageCodegen - HashAggregate [i_category,sum,ws_net_paid,i_class,sum,spark_grouping_id] [sum,sum] + HashAggregate [i_class,sum,i_category,ws_net_paid,spark_grouping_id,sum] [sum,sum] Expand [ws_net_paid,i_category,i_class] Project [ws_net_paid,i_category,i_class] BroadcastHashJoin [ws_item_sk,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt index 2868a7ac8..c47ee7616 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt @@ -9,7 +9,7 @@ : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftAnti, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 200) + : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 5) : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight @@ -28,7 +28,7 @@ : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) - : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 200) + : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 5) : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight @@ -41,7 +41,7 @@ : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) - +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 200) + +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 5) +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt index c5008bdd8..6fc2010b4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt @@ -6,10 +6,10 @@ WholeStageCodegen HashAggregate [count,count] [count,count] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_first_name,d_date,d_date,c_last_name,c_last_name,c_first_name] + BroadcastHashJoin [d_date,c_last_name,d_date,c_last_name,c_first_name,c_first_name] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [d_date,c_last_name,c_first_name,c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_first_name,d_date,c_last_name,d_date,c_first_name,c_last_name] HashAggregate [c_last_name,c_first_name,d_date] InputAdapter Exchange [c_last_name,c_first_name,d_date] #2 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt index 4a69feebe..fb0d68dce 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt @@ -4,9 +4,9 @@ TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast( +- *(7) Filter (CASE WHEN NOT (avg_monthly_sales#2 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) +- Window [avg(_w0#9) windowspecdefinition(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#6, s_store_name#3, s_company_name#7] +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#6 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#7 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, 200) + +- Exchange hashpartitioning(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, 5) +- *(5) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#10))]) - +- Exchange hashpartitioning(i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, 200) + +- Exchange hashpartitioning(i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, 5) +- *(4) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#10))]) +- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_sales_price#10, d_moy#8, s_store_name#3, s_company_name#7] +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt index 31fd0675f..98d2e6837 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt @@ -1,24 +1,24 @@ -TakeOrderedAndProject [s_company_name,d_moy,sum_sales,i_brand,i_category,avg_monthly_sales,s_store_name,i_class] +TakeOrderedAndProject [avg_monthly_sales,d_moy,sum_sales,s_store_name,s_company_name,i_category,i_brand,i_class] WholeStageCodegen - Project [s_company_name,d_moy,sum_sales,i_brand,avg_monthly_sales,i_category,i_class,s_store_name] + Project [sum_sales,s_store_name,d_moy,s_company_name,i_category,avg_monthly_sales,i_brand,i_class] Filter [avg_monthly_sales,sum_sales] InputAdapter - Window [s_company_name,i_brand,i_category,s_store_name,_w0] + Window [s_store_name,_w0,s_company_name,i_category,i_brand] WholeStageCodegen Sort [i_category,i_brand,s_store_name,s_company_name] InputAdapter Exchange [i_category,i_brand,s_store_name,s_company_name] #1 WholeStageCodegen - HashAggregate [s_company_name,d_moy,i_brand,sum(UnscaledValue(ss_sales_price)),i_category,sum,i_class,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + HashAggregate [sum,sum(UnscaledValue(ss_sales_price)),s_store_name,d_moy,s_company_name,i_category,i_brand,i_class] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] InputAdapter - Exchange [s_company_name,d_moy,i_brand,i_category,i_class,s_store_name] #2 + Exchange [s_store_name,d_moy,s_company_name,i_category,i_brand,i_class] #2 WholeStageCodegen - HashAggregate [s_company_name,d_moy,i_brand,ss_sales_price,i_category,sum,sum,i_class,s_store_name] [sum,sum] - Project [i_class,s_store_name,s_company_name,d_moy,i_category,ss_sales_price,i_brand] + HashAggregate [sum,sum,s_store_name,d_moy,ss_sales_price,s_company_name,i_category,i_brand,i_class] [sum,sum] + Project [ss_sales_price,i_brand,i_category,s_company_name,s_store_name,i_class,d_moy] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_class,d_moy,ss_store_sk,i_category,ss_sales_price,i_brand] + Project [ss_sales_price,i_brand,i_category,ss_store_sk,i_class,d_moy] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_class,ss_store_sk,i_category,ss_sales_price,ss_sold_date_sk,i_brand] + Project [ss_sales_price,i_brand,i_category,ss_sold_date_sk,ss_store_sk,i_class] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_brand,i_class,i_category] Filter [i_category,i_class,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt index 0ed6b1727..f0259e956 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt @@ -1,104 +1,104 @@ == Physical Plan == -*(1) Project [CASE WHEN (Subquery subquery1150 > 62316685) THEN Subquery subquery1151 ELSE Subquery subquery1152 END AS bucket1#1, CASE WHEN (Subquery subquery1154 > 19045798) THEN Subquery subquery1155 ELSE Subquery subquery1156 END AS bucket2#2, CASE WHEN (Subquery subquery1158 > 365541424) THEN Subquery subquery1159 ELSE Subquery subquery1160 END AS bucket3#3, CASE WHEN (Subquery subquery1162 > 216357808) THEN Subquery subquery1163 ELSE Subquery subquery1164 END AS bucket4#4, CASE WHEN (Subquery subquery1166 > 184483884) THEN Subquery subquery1167 ELSE Subquery subquery1168 END AS bucket5#5] -: :- Subquery subquery1150 +*(1) Project [CASE WHEN (Subquery subquery3390 > 62316685) THEN Subquery subquery3391 ELSE Subquery subquery3392 END AS bucket1#1, CASE WHEN (Subquery subquery3394 > 19045798) THEN Subquery subquery3395 ELSE Subquery subquery3396 END AS bucket2#2, CASE WHEN (Subquery subquery3398 > 365541424) THEN Subquery subquery3399 ELSE Subquery subquery3400 END AS bucket3#3, CASE WHEN (Subquery subquery3402 > 216357808) THEN Subquery subquery3403 ELSE Subquery subquery3404 END AS bucket4#4, CASE WHEN (Subquery subquery3406 > 184483884) THEN Subquery subquery3407 ELSE Subquery subquery3408 END AS bucket5#5] +: :- Subquery subquery3390 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery1151 +: :- Subquery subquery3391 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery1152 +: :- Subquery subquery3392 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery1154 +: :- Subquery subquery3394 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery1155 +: :- Subquery subquery3395 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery1156 +: :- Subquery subquery3396 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery1158 +: :- Subquery subquery3398 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery1159 +: :- Subquery subquery3399 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery1160 +: :- Subquery subquery3400 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery1162 +: :- Subquery subquery3402 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery1163 +: :- Subquery subquery3403 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery1164 +: :- Subquery subquery3404 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery1166 +: :- Subquery subquery3406 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct -: :- Subquery subquery1167 +: :- Subquery subquery3407 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct -: +- Subquery subquery1168 +: +- Subquery subquery3408 : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : +- Exchange SinglePartition : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt index 1851df752..c55f7a6a8 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt @@ -16,7 +16,7 @@ WholeStageCodegen InputAdapter Exchange #2 WholeStageCodegen - HashAggregate [sum,ss_ext_discount_amt,sum,count,count] [sum,count,sum,count] + HashAggregate [count,sum,count,sum,ss_ext_discount_amt] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] @@ -26,7 +26,7 @@ WholeStageCodegen InputAdapter Exchange #3 WholeStageCodegen - HashAggregate [sum,count,ss_net_paid,sum,count] [sum,count,sum,count] + HashAggregate [count,sum,ss_net_paid,sum,count] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] @@ -46,7 +46,7 @@ WholeStageCodegen InputAdapter Exchange #5 WholeStageCodegen - HashAggregate [count,sum,count,ss_ext_discount_amt,sum] [sum,count,sum,count] + HashAggregate [sum,count,count,ss_ext_discount_amt,sum] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] @@ -56,7 +56,7 @@ WholeStageCodegen InputAdapter Exchange #6 WholeStageCodegen - HashAggregate [sum,sum,ss_net_paid,count,count] [sum,count,sum,count] + HashAggregate [count,sum,sum,ss_net_paid,count] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] @@ -76,7 +76,7 @@ WholeStageCodegen InputAdapter Exchange #8 WholeStageCodegen - HashAggregate [sum,ss_ext_discount_amt,count,count,sum] [sum,count,sum,count] + HashAggregate [sum,count,count,sum,ss_ext_discount_amt] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] @@ -86,7 +86,7 @@ WholeStageCodegen InputAdapter Exchange #9 WholeStageCodegen - HashAggregate [sum,count,count,sum,ss_net_paid] [sum,count,sum,count] + HashAggregate [count,sum,sum,ss_net_paid,count] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] @@ -106,7 +106,7 @@ WholeStageCodegen InputAdapter Exchange #11 WholeStageCodegen - HashAggregate [ss_ext_discount_amt,sum,count,count,sum] [sum,count,sum,count] + HashAggregate [count,sum,count,ss_ext_discount_amt,sum] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] @@ -116,7 +116,7 @@ WholeStageCodegen InputAdapter Exchange #12 WholeStageCodegen - HashAggregate [count,sum,ss_net_paid,count,sum] [sum,count,sum,count] + HashAggregate [count,sum,count,ss_net_paid,sum] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] @@ -136,7 +136,7 @@ WholeStageCodegen InputAdapter Exchange #14 WholeStageCodegen - HashAggregate [sum,ss_ext_discount_amt,sum,count,count] [sum,count,sum,count] + HashAggregate [count,count,sum,sum,ss_ext_discount_amt] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] @@ -146,7 +146,7 @@ WholeStageCodegen InputAdapter Exchange #15 WholeStageCodegen - HashAggregate [count,ss_net_paid,sum,count,sum] [sum,count,sum,count] + HashAggregate [sum,count,ss_net_paid,count,sum] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt index 90c7f4cff..fd5605d2e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt @@ -1,8 +1,8 @@ == Physical Plan == *(9) Sort [Returns_Loss#1 DESC NULLS LAST], true, 0 -+- Exchange rangepartitioning(Returns_Loss#1 DESC NULLS LAST, 200) ++- Exchange rangepartitioning(Returns_Loss#1 DESC NULLS LAST, 5) +- *(8) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[sum(UnscaledValue(cr_net_loss#7))]) - +- Exchange hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6, 200) + +- Exchange hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6, 5) +- *(7) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[partial_sum(UnscaledValue(cr_net_loss#7))]) +- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#5, cd_education_status#6] +- *(7) BroadcastHashJoin [c_current_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt index 4b2c97c9f..06086853c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt @@ -3,22 +3,22 @@ WholeStageCodegen InputAdapter Exchange [Returns_Loss] #1 WholeStageCodegen - HashAggregate [cc_call_center_id,sum(UnscaledValue(cr_net_loss)),sum,cc_name,cd_education_status,cc_manager,cd_marital_status] [sum(UnscaledValue(cr_net_loss)),sum,Manager,Call_Center_Name,Returns_Loss,Call_Center] + HashAggregate [cc_name,sum(UnscaledValue(cr_net_loss)),cc_manager,cd_education_status,cd_marital_status,sum,cc_call_center_id] [Manager,sum(UnscaledValue(cr_net_loss)),Call_Center_Name,Returns_Loss,sum,Call_Center] InputAdapter - Exchange [cc_call_center_id,cc_name,cd_education_status,cc_manager,cd_marital_status] #2 + Exchange [cc_name,cc_manager,cd_education_status,cd_marital_status,cc_call_center_id] #2 WholeStageCodegen - HashAggregate [cc_call_center_id,sum,cc_name,cd_education_status,cc_manager,cr_net_loss,sum,cd_marital_status] [sum,sum] - Project [cr_net_loss,cd_marital_status,cc_call_center_id,cc_name,cc_manager,cd_education_status] + HashAggregate [cc_name,cc_manager,cd_education_status,cd_marital_status,cr_net_loss,sum,cc_call_center_id,sum] [sum,sum] + Project [cr_net_loss,cc_call_center_id,cc_manager,cd_education_status,cc_name,cd_marital_status] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [cr_net_loss,c_current_hdemo_sk,cd_marital_status,cc_call_center_id,cc_name,cc_manager,cd_education_status] + Project [cr_net_loss,cc_call_center_id,cc_manager,cd_education_status,cc_name,c_current_hdemo_sk,cd_marital_status] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,cr_net_loss,c_current_hdemo_sk,cc_call_center_id,cc_name,cc_manager] + Project [cr_net_loss,cc_call_center_id,cc_manager,c_current_cdemo_sk,cc_name,c_current_hdemo_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_cdemo_sk,c_current_addr_sk,cr_net_loss,c_current_hdemo_sk,cc_call_center_id,cc_name,cc_manager] + Project [cr_net_loss,cc_call_center_id,cc_manager,c_current_cdemo_sk,cc_name,c_current_hdemo_sk,c_current_addr_sk] BroadcastHashJoin [cr_returning_customer_sk,c_customer_sk] - Project [cr_returning_customer_sk,cr_net_loss,cc_call_center_id,cc_name,cc_manager] + Project [cr_net_loss,cc_call_center_id,cc_manager,cr_returning_customer_sk,cc_name] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_returning_customer_sk,cr_net_loss,cc_call_center_id,cr_returned_date_sk,cc_name,cc_manager] + Project [cr_net_loss,cc_call_center_id,cc_manager,cr_returning_customer_sk,cc_name,cr_returned_date_sk] BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] Project [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] Filter [cc_call_center_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt index 4a18c2b2d..c27934561 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt @@ -19,7 +19,7 @@ TakeOrderedAndProject(limit=100, orderBy=[Excess Discount Amount #1 ASC NULLS FI : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) : +- *(4) HashAggregate(keys=[ws_item_sk#6], functions=[avg(UnscaledValue(ws_ext_discount_amt#2))]) - : +- Exchange hashpartitioning(ws_item_sk#6, 200) + : +- Exchange hashpartitioning(ws_item_sk#6, 5) : +- *(3) HashAggregate(keys=[ws_item_sk#6], functions=[partial_avg(UnscaledValue(ws_ext_discount_amt#2))]) : +- *(3) Project [ws_item_sk#6, ws_ext_discount_amt#2] : +- *(3) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt index 6137607f6..09820658d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt @@ -24,11 +24,11 @@ TakeOrderedAndProject [Excess Discount Amount ] BroadcastExchange #3 WholeStageCodegen Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [ws_item_sk,sum,count,avg(UnscaledValue(ws_ext_discount_amt))] [sum,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(ws_ext_discount_amt)),count,ws_item_sk] + HashAggregate [ws_item_sk,sum,count,avg(UnscaledValue(ws_ext_discount_amt))] [sum,ws_item_sk,count,avg(UnscaledValue(ws_ext_discount_amt)),(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] InputAdapter Exchange [ws_item_sk] #4 WholeStageCodegen - HashAggregate [ws_ext_discount_amt,ws_item_sk,sum,sum,count,count] [sum,count,sum,count] + HashAggregate [sum,ws_ext_discount_amt,count,sum,ws_item_sk,count] [sum,count,sum,count] Project [ws_item_sk,ws_ext_discount_amt] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Project [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt index 4788a00a3..5cbc9a6c1 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[sumsales#1 ASC NULLS FIRST,ss_customer_sk#2 ASC NULLS FIRST], output=[ss_customer_sk#2,sumsales#1]) +- *(4) HashAggregate(keys=[ss_customer_sk#2], functions=[sum(act_sales#3)]) - +- Exchange hashpartitioning(ss_customer_sk#2, 200) + +- Exchange hashpartitioning(ss_customer_sk#2, 5) +- *(3) HashAggregate(keys=[ss_customer_sk#2], functions=[partial_sum(act_sales#3)]) +- *(3) Project [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#5 - sr_return_quantity#4) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#5 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#3] +- *(3) BroadcastHashJoin [sr_reason_sk#7], [cast(r_reason_sk#8 as bigint)], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt index 5c7f07a1b..4fe1e3a08 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt @@ -7,9 +7,9 @@ TakeOrderedAndProject [sumsales,ss_customer_sk] HashAggregate [ss_customer_sk,act_sales,sum,sum] [sum,sum] Project [ss_customer_sk,sr_return_quantity,ss_quantity,ss_sales_price] BroadcastHashJoin [sr_reason_sk,r_reason_sk] - Project [ss_quantity,sr_return_quantity,ss_customer_sk,sr_reason_sk,ss_sales_price] + Project [ss_sales_price,ss_customer_sk,ss_quantity,sr_return_quantity,sr_reason_sk] BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_ticket_number] [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_ticket_number] + Scan parquet default.store_sales [ss_sales_price,ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity] [ss_sales_price,ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt index 8034c8beb..2e6006488 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt @@ -4,7 +4,7 @@ TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], outpu +- Exchange SinglePartition +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) +- *(7) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) - +- Exchange hashpartitioning(ws_order_number#6, 200) + +- Exchange hashpartitioning(ws_order_number#6, 5) +- *(6) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) +- *(6) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] +- *(6) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt index cc709ac3c..02cf6314c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -1,27 +1,27 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum] [sum,sum(UnscaledValue(ws_net_profit)),order count ,total net profit ,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum,total shipping cost ] + HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),count(ws_order_number),count,sum(UnscaledValue(ws_net_profit))] [order count ,total shipping cost ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),total net profit ,count(ws_order_number),count,sum(UnscaledValue(ws_net_profit))] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum,count,sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,sum] [sum,count,sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),sum,count,sum] - HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,sum] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),sum,sum] + HashAggregate [ws_order_number,sum,sum,count,sum(UnscaledValue(ws_ext_ship_cost)),count(ws_order_number),sum,sum,sum(UnscaledValue(ws_net_profit))] [sum,sum,count,sum(UnscaledValue(ws_ext_ship_cost)),count(ws_order_number),count,sum,sum,sum(UnscaledValue(ws_net_profit))] + HashAggregate [ws_order_number,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] InputAdapter Exchange [ws_order_number] #2 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,ws_net_profit,ws_ext_ship_cost] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),sum,sum] + HashAggregate [ws_order_number,ws_ext_ship_cost,sum(UnscaledValue(ws_ext_ship_cost)),ws_net_profit,sum,sum,sum(UnscaledValue(ws_net_profit))] [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] BroadcastHashJoin [ws_web_site_sk,web_site_sk] Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] - Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number] + Project [ws_net_profit,ws_ext_ship_cost,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] BroadcastHashJoin [ws_ship_date_sk,d_date_sk] BroadcastHashJoin [ws_order_number,wr_order_number] - Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] + Project [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] + Project [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number,ws_warehouse_sk] Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] + Scan parquet default.web_sales [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number,ws_warehouse_sk] [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number,ws_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt index a3de90e6d..2d497b75e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt @@ -4,7 +4,7 @@ TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], outpu +- Exchange SinglePartition +- *(10) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) +- *(10) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) - +- Exchange hashpartitioning(ws_order_number#6, 200) + +- Exchange hashpartitioning(ws_order_number#6, 5) +- *(9) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) +- *(9) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] +- *(9) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt index 5b6f33ebd..03adb7e35 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -1,26 +1,26 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] WholeStageCodegen - HashAggregate [count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] [order count ,total shipping cost ,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,total net profit ,sum(UnscaledValue(ws_ext_ship_cost)),sum] + HashAggregate [sum,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum,sum(UnscaledValue(ws_net_profit))] [sum,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),total shipping cost ,order count ,count,total net profit ,sum,sum(UnscaledValue(ws_net_profit))] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),sum,ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),sum] [sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] - HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),sum] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] + HashAggregate [sum,ws_order_number,sum,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum,sum,sum(UnscaledValue(ws_net_profit))] [sum,sum,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,count,sum,sum,sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,ws_order_number,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] InputAdapter Exchange [ws_order_number] #2 WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),ws_net_profit,ws_ext_ship_cost] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] + HashAggregate [ws_order_number,sum,sum(UnscaledValue(ws_ext_ship_cost)),ws_ext_ship_cost,sum,ws_net_profit,sum(UnscaledValue(ws_net_profit))] [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] BroadcastHashJoin [ws_web_site_sk,web_site_sk] Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] - Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number] + Project [ws_net_profit,ws_ext_ship_cost,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] BroadcastHashJoin [ws_ship_date_sk,d_date_sk] BroadcastHashJoin [ws_order_number,wr_order_number] BroadcastHashJoin [ws_order_number,ws_order_number] - Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] + Project [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] + Scan parquet default.web_sales [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt index 136c5a89f..0d528d336 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt @@ -6,9 +6,9 @@ CollectLimit 100 +- *(9) Project [customer_sk#1, customer_sk#2] +- SortMergeJoin [customer_sk#1, item_sk#3], [customer_sk#2, item_sk#4], FullOuter :- *(4) Sort [customer_sk#1 ASC NULLS FIRST, item_sk#3 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(customer_sk#1, item_sk#3, 200) + : +- Exchange hashpartitioning(customer_sk#1, item_sk#3, 5) : +- *(3) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) - : +- Exchange hashpartitioning(ss_customer_sk#5, ss_item_sk#6, 200) + : +- Exchange hashpartitioning(ss_customer_sk#5, ss_item_sk#6, 5) : +- *(2) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) : +- *(2) Project [ss_item_sk#6, ss_customer_sk#5] : +- *(2) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight @@ -20,9 +20,9 @@ CollectLimit 100 : +- *(1) Filter (((isnotnull(d_month_seq#9) && (d_month_seq#9 >= 1200)) && (d_month_seq#9 <= 1211)) && isnotnull(d_date_sk#8)) : +- *(1) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct +- *(8) Sort [customer_sk#2 ASC NULLS FIRST, item_sk#4 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(customer_sk#2, item_sk#4, 200) + +- Exchange hashpartitioning(customer_sk#2, item_sk#4, 5) +- *(7) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) - +- Exchange hashpartitioning(cs_bill_customer_sk#10, cs_item_sk#11, 200) + +- Exchange hashpartitioning(cs_bill_customer_sk#10, cs_item_sk#11, 5) +- *(6) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) +- *(6) Project [cs_bill_customer_sk#10, cs_item_sk#11] +- *(6) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#8], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt index a3cc25d23..b2bfbd555 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt @@ -1,10 +1,10 @@ CollectLimit WholeStageCodegen - HashAggregate [sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] [store_only,sum,catalog_only,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_and_catalog] + HashAggregate [sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] [store_and_catalog,sum,store_only,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),catalog_only,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum,sum,sum,customer_sk,sum,customer_sk,sum,sum] [sum,sum,sum,sum,sum,sum] + HashAggregate [sum,sum,sum,customer_sk,customer_sk,sum,sum,sum] [sum,sum,sum,sum,sum,sum] Project [customer_sk,customer_sk] InputAdapter SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt index eee60f541..2ffc242b5 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt @@ -1,13 +1,13 @@ == Physical Plan == *(7) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, revenueratio#6] +- *(7) Sort [i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST], true, 0 - +- Exchange rangepartitioning(i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST, 200) + +- Exchange rangepartitioning(i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST, 5) +- *(6) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#6, i_item_id#7] +- Window [sum(_w1#10) windowspecdefinition(i_class#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#3] +- *(5) Sort [i_class#3 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_class#3, 200) + +- Exchange hashpartitioning(i_class#3, 5) +- *(4) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[sum(UnscaledValue(ss_ext_sales_price#11))]) - +- Exchange hashpartitioning(i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4, 200) + +- Exchange hashpartitioning(i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4, 5) +- *(3) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#11))]) +- *(3) Project [ss_ext_sales_price#11, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt index be26ab7ec..b6a936a1c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt @@ -1,10 +1,10 @@ WholeStageCodegen - Project [revenueratio,i_item_desc,itemrevenue,i_category,i_current_price,i_class] - Sort [revenueratio,i_item_id,i_item_desc,i_category,i_class] + Project [i_current_price,itemrevenue,revenueratio,i_item_desc,i_category,i_class] + Sort [revenueratio,i_item_desc,i_item_id,i_category,i_class] InputAdapter - Exchange [revenueratio,i_item_id,i_item_desc,i_category,i_class] #1 + Exchange [revenueratio,i_item_desc,i_item_id,i_category,i_class] #1 WholeStageCodegen - Project [i_item_id,i_item_desc,_w0,itemrevenue,i_category,_we0,i_current_price,i_class] + Project [i_current_price,itemrevenue,i_item_desc,_w0,i_item_id,i_category,_we0,i_class] InputAdapter Window [_w1,i_class] WholeStageCodegen @@ -12,14 +12,14 @@ WholeStageCodegen InputAdapter Exchange [i_class] #2 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum,sum(UnscaledValue(ss_ext_sales_price)),i_category,i_current_price,i_class] [sum,sum(UnscaledValue(ss_ext_sales_price)),_w0,itemrevenue,_w1] + HashAggregate [i_current_price,sum(UnscaledValue(ss_ext_sales_price)),i_item_desc,i_item_id,i_category,sum,i_class] [itemrevenue,_w1,sum(UnscaledValue(ss_ext_sales_price)),_w0,sum] InputAdapter - Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #3 + Exchange [i_current_price,i_item_desc,i_item_id,i_category,i_class] #3 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum,i_category,ss_ext_sales_price,sum,i_current_price,i_class] [sum,sum] - Project [i_class,i_current_price,ss_ext_sales_price,i_category,i_item_desc,i_item_id] + HashAggregate [i_current_price,sum,i_item_desc,ss_ext_sales_price,i_item_id,i_category,sum,i_class] [sum,sum] + Project [i_current_price,i_category,i_item_id,ss_ext_sales_price,i_class,i_item_desc] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_class,i_current_price,ss_ext_sales_price,i_category,ss_sold_date_sk,i_item_desc,i_item_id] + Project [i_current_price,i_category,ss_sold_date_sk,i_item_id,ss_ext_sales_price,i_class,i_item_desc] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] Filter [ss_item_sk,ss_sold_date_sk] @@ -27,9 +27,9 @@ WholeStageCodegen InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Project [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] Filter [i_category,i_item_sk] - Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Scan parquet default.item [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] InputAdapter BroadcastExchange #5 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt index fbd4c2dc5..2106af4de 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[substring(w_warehouse_name, 1, 20)#1 ASC NULLS FIRST,sm_type#2 ASC NULLS FIRST,cc_name#3 ASC NULLS FIRST], output=[substring(w_warehouse_name, 1, 20)#1,sm_type#2,cc_name#3,30 days #4,31 - 60 days #5,61 - 90 days #6,91 - 120 days #7,>120 days #8]) +- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3, 200) + +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3, 5) +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) +- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, w_warehouse_name#9, sm_type#2, cc_name#3] +- *(5) BroadcastHashJoin [cs_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt index 934bf3e47..9b05c64d9 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt @@ -1,21 +1,21 @@ -TakeOrderedAndProject [substring(w_warehouse_name, 1, 20),61 - 90 days ,cc_name,>120 days ,91 - 120 days ,30 days ,sm_type,31 - 60 days ] +TakeOrderedAndProject [30 days ,cc_name,91 - 120 days ,61 - 90 days ,substring(w_warehouse_name, 1, 20),31 - 60 days ,sm_type,>120 days ] WholeStageCodegen - HashAggregate [sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,cc_name,sum,sm_type,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum] [sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum,>120 days ,sum,91 - 120 days ,30 days ,31 - 60 days ,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum] + HashAggregate [sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),cc_name,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sm_type] [sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),30 days ,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum,sum,91 - 120 days ,sum,sum,sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,substring(w_warehouse_name, 1, 20),31 - 60 days ,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),>120 days ] InputAdapter Exchange [substring(w_warehouse_name, 1, 20),sm_type,cc_name] #1 WholeStageCodegen - HashAggregate [sum,sum,sum,substring(w_warehouse_name, 1, 20),w_warehouse_name,sum,sum,cs_sold_date_sk,sum,cc_name,sum,sum,sm_type,cs_ship_date_sk,sum,sum] [sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum] - Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cc_name] + HashAggregate [cc_name,sum,sum,cs_ship_date_sk,w_warehouse_name,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,cs_sold_date_sk,sum,sum,sm_type] [sum,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum] + Project [w_warehouse_name,cs_ship_date_sk,sm_type,cs_sold_date_sk,cc_name] BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cc_name] + Project [w_warehouse_name,cs_ship_date_sk,sm_type,cs_sold_date_sk,cc_name] BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] - Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cs_call_center_sk] + Project [cs_call_center_sk,w_warehouse_name,cs_ship_date_sk,sm_type,cs_sold_date_sk] BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] - Project [cs_ship_date_sk,cs_sold_date_sk,cs_ship_mode_sk,w_warehouse_name,cs_call_center_sk] + Project [cs_call_center_sk,cs_ship_mode_sk,w_warehouse_name,cs_ship_date_sk,cs_sold_date_sk] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_ship_date_sk,cs_sold_date_sk] Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk] - Scan parquet default.catalog_sales [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_ship_date_sk,cs_sold_date_sk] [cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_ship_date_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen From 8b58b6bc85c09b40c68f8cf0da263fa6cda16c4a Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 10 Mar 2021 15:53:58 -0800 Subject: [PATCH 08/31] update plan --- .../approved-plans-v1_4/q1/simplified.txt | 10 +- .../approved-plans-v1_4/q10/simplified.txt | 14 +- .../approved-plans-v1_4/q11/simplified.txt | 60 ++++---- .../approved-plans-v1_4/q12/simplified.txt | 18 +-- .../approved-plans-v1_4/q13/simplified.txt | 20 +-- .../approved-plans-v1_4/q14a/explain.txt | 20 +-- .../approved-plans-v1_4/q14a/simplified.txt | 56 +++---- .../approved-plans-v1_4/q14b/explain.txt | 48 +++--- .../approved-plans-v1_4/q14b/simplified.txt | 48 +++--- .../approved-plans-v1_4/q16/simplified.txt | 16 +- .../approved-plans-v1_4/q17/explain.txt | 8 +- .../approved-plans-v1_4/q17/simplified.txt | 36 ++--- .../approved-plans-v1_4/q18/simplified.txt | 28 ++-- .../approved-plans-v1_4/q19/simplified.txt | 22 +-- .../approved-plans-v1_4/q2/simplified.txt | 14 +- .../approved-plans-v1_4/q20/simplified.txt | 18 +-- .../approved-plans-v1_4/q21/simplified.txt | 4 +- .../approved-plans-v1_4/q22/simplified.txt | 18 +-- .../approved-plans-v1_4/q23a/explain.txt | 4 +- .../approved-plans-v1_4/q23a/simplified.txt | 16 +- .../approved-plans-v1_4/q23b/explain.txt | 4 +- .../approved-plans-v1_4/q23b/simplified.txt | 24 +-- .../approved-plans-v1_4/q24a/explain.txt | 4 +- .../approved-plans-v1_4/q24a/simplified.txt | 54 +++---- .../approved-plans-v1_4/q24b/explain.txt | 4 +- .../approved-plans-v1_4/q24b/simplified.txt | 54 +++---- .../approved-plans-v1_4/q25/explain.txt | 12 +- .../approved-plans-v1_4/q25/simplified.txt | 38 ++--- .../approved-plans-v1_4/q26/explain.txt | 4 +- .../approved-plans-v1_4/q26/simplified.txt | 20 +-- .../approved-plans-v1_4/q27/explain.txt | 4 +- .../approved-plans-v1_4/q27/simplified.txt | 22 +-- .../approved-plans-v1_4/q28/simplified.txt | 48 +++--- .../approved-plans-v1_4/q29/explain.txt | 8 +- .../approved-plans-v1_4/q29/simplified.txt | 36 ++--- .../approved-plans-v1_4/q3/simplified.txt | 4 +- .../approved-plans-v1_4/q30/simplified.txt | 20 +-- .../approved-plans-v1_4/q31/simplified.txt | 34 ++--- .../approved-plans-v1_4/q32/simplified.txt | 4 +- .../approved-plans-v1_4/q34/simplified.txt | 10 +- .../approved-plans-v1_4/q35/simplified.txt | 14 +- .../approved-plans-v1_4/q36/simplified.txt | 14 +- .../approved-plans-v1_4/q37/simplified.txt | 4 +- .../approved-plans-v1_4/q38/simplified.txt | 4 +- .../approved-plans-v1_4/q39a/simplified.txt | 24 +-- .../approved-plans-v1_4/q39b/simplified.txt | 24 +-- .../approved-plans-v1_4/q4/simplified.txt | 120 +++++++-------- .../approved-plans-v1_4/q40/simplified.txt | 16 +- .../approved-plans-v1_4/q41/simplified.txt | 6 +- .../approved-plans-v1_4/q42/simplified.txt | 4 +- .../approved-plans-v1_4/q43/simplified.txt | 6 +- .../approved-plans-v1_4/q44/explain.txt | 8 +- .../approved-plans-v1_4/q44/simplified.txt | 10 +- .../approved-plans-v1_4/q45/simplified.txt | 2 +- .../approved-plans-v1_4/q46/simplified.txt | 24 +-- .../approved-plans-v1_4/q47/explain.txt | 85 ++++++----- .../approved-plans-v1_4/q47/simplified.txt | 127 ++++++++-------- .../approved-plans-v1_4/q48/simplified.txt | 8 +- .../approved-plans-v1_4/q49/explain.txt | 86 ++++++----- .../approved-plans-v1_4/q49/simplified.txt | 140 +++++++++--------- .../approved-plans-v1_4/q5/simplified.txt | 36 ++--- .../approved-plans-v1_4/q50/explain.txt | 4 +- .../approved-plans-v1_4/q50/simplified.txt | 28 ++-- .../approved-plans-v1_4/q51/simplified.txt | 8 +- .../approved-plans-v1_4/q52/simplified.txt | 4 +- .../approved-plans-v1_4/q53/simplified.txt | 4 +- .../approved-plans-v1_4/q54/explain.txt | 10 +- .../approved-plans-v1_4/q54/simplified.txt | 2 +- .../approved-plans-v1_4/q55/simplified.txt | 4 +- .../approved-plans-v1_4/q57/explain.txt | 4 +- .../approved-plans-v1_4/q57/simplified.txt | 50 +++---- .../approved-plans-v1_4/q58/explain.txt | 18 +-- .../approved-plans-v1_4/q58/simplified.txt | 4 +- .../approved-plans-v1_4/q59/simplified.txt | 20 +-- .../approved-plans-v1_4/q6/explain.txt | 6 +- .../approved-plans-v1_4/q6/simplified.txt | 6 +- .../approved-plans-v1_4/q61/simplified.txt | 12 +- .../approved-plans-v1_4/q62/simplified.txt | 18 +-- .../approved-plans-v1_4/q63/simplified.txt | 4 +- .../approved-plans-v1_4/q64/simplified.txt | 124 ++++++++-------- .../approved-plans-v1_4/q65/simplified.txt | 16 +- .../approved-plans-v1_4/q66/simplified.txt | 50 +++---- .../approved-plans-v1_4/q67/simplified.txt | 26 ++-- .../approved-plans-v1_4/q68/simplified.txt | 24 +-- .../approved-plans-v1_4/q69/simplified.txt | 14 +- .../approved-plans-v1_4/q7/explain.txt | 4 +- .../approved-plans-v1_4/q7/simplified.txt | 20 +-- .../approved-plans-v1_4/q70/simplified.txt | 8 +- .../approved-plans-v1_4/q71/simplified.txt | 6 +- .../approved-plans-v1_4/q72/explain.txt | 4 +- .../approved-plans-v1_4/q72/simplified.txt | 34 ++--- .../approved-plans-v1_4/q73/simplified.txt | 10 +- .../approved-plans-v1_4/q74/simplified.txt | 36 ++--- .../approved-plans-v1_4/q75/explain.txt | 4 +- .../approved-plans-v1_4/q75/simplified.txt | 118 +++++++-------- .../approved-plans-v1_4/q76/simplified.txt | 14 +- .../approved-plans-v1_4/q77/simplified.txt | 38 ++--- .../approved-plans-v1_4/q78/explain.txt | 2 +- .../approved-plans-v1_4/q78/simplified.txt | 46 +++--- .../approved-plans-v1_4/q79/simplified.txt | 18 +-- .../approved-plans-v1_4/q80/simplified.txt | 62 ++++---- .../approved-plans-v1_4/q81/simplified.txt | 24 +-- .../approved-plans-v1_4/q82/simplified.txt | 4 +- .../approved-plans-v1_4/q83/simplified.txt | 2 +- .../approved-plans-v1_4/q84/simplified.txt | 10 +- .../approved-plans-v1_4/q85/explain.txt | 8 +- .../approved-plans-v1_4/q85/simplified.txt | 34 ++--- .../approved-plans-v1_4/q86/simplified.txt | 8 +- .../approved-plans-v1_4/q87/simplified.txt | 4 +- .../approved-plans-v1_4/q89/simplified.txt | 18 +-- .../approved-plans-v1_4/q9/explain.txt | 32 ++-- .../approved-plans-v1_4/q9/simplified.txt | 20 +-- .../approved-plans-v1_4/q91/simplified.txt | 18 +-- .../approved-plans-v1_4/q92/simplified.txt | 4 +- .../approved-plans-v1_4/q93/simplified.txt | 4 +- .../approved-plans-v1_4/q94/simplified.txt | 16 +- .../approved-plans-v1_4/q95/simplified.txt | 14 +- .../approved-plans-v1_4/q97/simplified.txt | 4 +- .../approved-plans-v1_4/q98/simplified.txt | 22 +-- .../approved-plans-v1_4/q99/simplified.txt | 18 +-- 120 files changed, 1394 insertions(+), 1402 deletions(-) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt index a27c58fe2..6104e2ac9 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt @@ -7,11 +7,11 @@ TakeOrderedAndProject [c_customer_id] Project [ctr_customer_sk,ctr_store_sk] BroadcastHashJoin [ctr_store_sk,ctr_store_skL,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] Filter [ctr_total_return] - HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_customer_sk,ctr_total_return,sum,ctr_store_sk,sum(UnscaledValue(sr_return_amt))] + HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_customer_sk,ctr_total_return,ctr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] InputAdapter Exchange [sr_customer_sk,sr_store_sk] #1 WholeStageCodegen - HashAggregate [sum,sr_return_amt,sr_customer_sk,sr_store_sk,sum] [sum,sum] + HashAggregate [sr_customer_sk,sum,sr_return_amt,sum,sr_store_sk] [sum,sum] Project [sr_customer_sk,sr_store_sk,sr_return_amt] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] Project [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] @@ -27,16 +27,16 @@ TakeOrderedAndProject [c_customer_id] BroadcastExchange #3 WholeStageCodegen Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [ctr_store_sk,sum,count,avg(ctr_total_return)] [sum,ctr_store_skL,avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),count] + HashAggregate [ctr_store_sk,sum,count,avg(ctr_total_return)] [avg(ctr_total_return),ctr_store_skL,sum,count,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] InputAdapter Exchange [ctr_store_sk] #4 WholeStageCodegen - HashAggregate [sum,ctr_total_return,ctr_store_sk,count,sum,count] [sum,count,sum,count] + HashAggregate [ctr_store_sk,ctr_total_return,sum,count,sum,count] [sum,count,sum,count] HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [sum(UnscaledValue(sr_return_amt)),ctr_store_sk,ctr_total_return,sum] InputAdapter Exchange [sr_customer_sk,sr_store_sk] #5 WholeStageCodegen - HashAggregate [sr_return_amt,sr_customer_sk,sum,sr_store_sk,sum] [sum,sum] + HashAggregate [sum,sr_customer_sk,sum,sr_return_amt,sr_store_sk] [sum,sum] Project [sr_customer_sk,sr_store_sk,sr_return_amt] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] Project [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt index 78aec4ebf..876a5eab0 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt @@ -1,11 +1,11 @@ -TakeOrderedAndProject [cnt5,cd_dep_employed_count,cd_gender,cd_dep_college_count,cd_education_status,cd_dep_count,cnt6,cd_credit_rating,cnt4,cd_purchase_estimate,cd_marital_status,cnt2,cnt1,cnt3] +TakeOrderedAndProject [cnt5,cnt6,cnt1,cnt4,cd_purchase_estimate,cnt2,cnt3,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] WholeStageCodegen - HashAggregate [cd_dep_employed_count,cd_gender,cd_dep_college_count,cd_education_status,cd_dep_count,cd_credit_rating,cd_purchase_estimate,cd_marital_status,count,count(1)] [cnt5,cnt6,cnt4,count,count(1),cnt2,cnt1,cnt3] + HashAggregate [count,count(1),cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] [cnt5,cnt6,cnt1,count,cnt4,count(1),cnt2,cnt3] InputAdapter - Exchange [cd_dep_employed_count,cd_gender,cd_dep_college_count,cd_education_status,cd_dep_count,cd_credit_rating,cd_purchase_estimate,cd_marital_status] #1 + Exchange [cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] #1 WholeStageCodegen - HashAggregate [count,cd_dep_employed_count,cd_gender,cd_dep_college_count,cd_education_status,cd_dep_count,cd_credit_rating,cd_purchase_estimate,cd_marital_status,count] [count,count] - Project [cd_gender,cd_dep_count,cd_education_status,cd_dep_college_count,cd_marital_status,cd_credit_rating,cd_purchase_estimate,cd_dep_employed_count] + HashAggregate [count,cd_purchase_estimate,count,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] [count,count] + Project [cd_dep_college_count,cd_dep_employed_count,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] @@ -60,6 +60,6 @@ TakeOrderedAndProject [cnt5,cd_dep_employed_count,cd_gender,cd_dep_college_count InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [cd_gender,cd_dep_count,cd_education_status,cd_dep_college_count,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate,cd_dep_employed_count] + Project [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_gender,cd_dep_count,cd_education_status,cd_dep_college_count,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate,cd_dep_employed_count] [cd_gender,cd_dep_count,cd_education_status,cd_dep_college_count,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate,cd_dep_employed_count] + Scan parquet default.customer_demographics [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt index dd6939bc5..75734357c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt @@ -1,8 +1,8 @@ TakeOrderedAndProject [customer_preferred_cust_flag] WholeStageCodegen Project [customer_preferred_cust_flag] - BroadcastHashJoin [year_total,customer_id,year_total,customer_id,year_total,year_total] - Project [year_total,year_total,customer_id,year_total,customer_preferred_cust_flag] + BroadcastHashJoin [year_total,year_total,customer_id,year_total,year_total,customer_id] + Project [customer_preferred_cust_flag,year_total,year_total,customer_id,year_total] BroadcastHashJoin [customer_id,customer_id] Project [customer_id,year_total,customer_preferred_cust_flag,year_total] BroadcastHashJoin [customer_id,customer_id] @@ -10,18 +10,18 @@ TakeOrderedAndProject [customer_preferred_cust_flag] Union WholeStageCodegen Filter [year_total] - HashAggregate [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_email_address,sum,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] + HashAggregate [d_year,sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_first_name,c_email_address] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] InputAdapter - Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #1 + Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #1 WholeStageCodegen - HashAggregate [c_email_address,sum,ss_ext_list_price,c_login,c_last_name,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name,ss_ext_discount_amt] [sum,sum] - Project [d_year,c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_discount_amt,ss_ext_list_price] + HashAggregate [d_year,sum,ss_ext_discount_amt,c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,c_last_name,c_first_name,c_birth_country,ss_ext_list_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,c_birth_country,c_login,ss_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_discount_amt,ss_ext_list_price] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] InputAdapter BroadcastExchange #2 WholeStageCodegen @@ -39,18 +39,18 @@ TakeOrderedAndProject [customer_preferred_cust_flag] BroadcastExchange #4 Union WholeStageCodegen - HashAggregate [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_email_address,c_login,c_last_name,sum,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),year_total,sum,customer_id,customer_preferred_cust_flag] + HashAggregate [d_year,sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_first_name,c_email_address] [customer_preferred_cust_flag,sum,year_total,customer_id,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2))))] InputAdapter - Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #5 + Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #5 WholeStageCodegen - HashAggregate [c_email_address,ss_ext_list_price,c_login,c_last_name,sum,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name,ss_ext_discount_amt] [sum,sum] - Project [d_year,c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_discount_amt,ss_ext_list_price] + HashAggregate [d_year,sum,ss_ext_discount_amt,c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,c_last_name,c_first_name,c_birth_country,ss_ext_list_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,c_birth_country,c_login,ss_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_discount_amt,ss_ext_list_price] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] InputAdapter ReusedExchange [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] #2 InputAdapter @@ -66,18 +66,18 @@ TakeOrderedAndProject [customer_preferred_cust_flag] LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [c_email_address,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_login,c_last_name,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] + HashAggregate [d_year,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] InputAdapter - Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #8 + Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #8 WholeStageCodegen - HashAggregate [sum,c_email_address,ws_ext_list_price,c_login,c_last_name,ws_ext_discount_amt,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name] [sum,sum] - Project [d_year,c_customer_id,c_birth_country,c_login,ws_ext_list_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_discount_amt] + HashAggregate [d_year,ws_ext_discount_amt,sum,c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address] [sum,sum] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_customer_id,c_birth_country,c_login,ws_ext_list_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_discount_amt,ws_sold_date_sk] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] InputAdapter BroadcastExchange #9 WholeStageCodegen @@ -91,18 +91,18 @@ TakeOrderedAndProject [customer_preferred_cust_flag] Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [c_email_address,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_login,c_last_name,sum,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] + HashAggregate [d_year,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] InputAdapter - Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #11 + Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #11 WholeStageCodegen - HashAggregate [c_email_address,ws_ext_list_price,c_login,c_last_name,sum,ws_ext_discount_amt,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name,sum] [sum,sum] - Project [d_year,c_customer_id,c_birth_country,c_login,ws_ext_list_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_discount_amt] + HashAggregate [d_year,ws_ext_discount_amt,sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address,sum] [sum,sum] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_customer_id,c_birth_country,c_login,ws_ext_list_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_discount_amt,ws_sold_date_sk] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] InputAdapter ReusedExchange [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] #9 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt index 6bd7cd99e..497db4cde 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [i_current_price,revenueratio,i_item_desc,i_item_id,i_category,itemrevenue,i_class] +TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,i_category,revenueratio,i_current_price,i_class] WholeStageCodegen - Project [i_current_price,_we0,_w0,i_item_desc,i_item_id,i_category,itemrevenue,i_class] + Project [i_item_id,i_item_desc,itemrevenue,_we0,i_category,i_current_price,i_class,_w0] InputAdapter Window [_w1,i_class] WholeStageCodegen @@ -8,14 +8,14 @@ TakeOrderedAndProject [i_current_price,revenueratio,i_item_desc,i_item_id,i_cate InputAdapter Exchange [i_class] #1 WholeStageCodegen - HashAggregate [i_current_price,sum,i_item_desc,sum(UnscaledValue(ws_ext_sales_price)),i_item_id,i_category,i_class] [_w0,sum,_w1,sum(UnscaledValue(ws_ext_sales_price)),itemrevenue] + HashAggregate [i_item_id,i_item_desc,i_category,sum,i_current_price,sum(UnscaledValue(ws_ext_sales_price)),i_class] [itemrevenue,_w1,sum,sum(UnscaledValue(ws_ext_sales_price)),_w0] InputAdapter - Exchange [i_current_price,i_item_desc,i_item_id,i_category,i_class] #2 + Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #2 WholeStageCodegen - HashAggregate [i_current_price,sum,sum,i_item_desc,ws_ext_sales_price,i_item_id,i_category,i_class] [sum,sum] - Project [i_current_price,i_category,i_item_id,ws_ext_sales_price,i_class,i_item_desc] + HashAggregate [i_item_id,i_item_desc,ws_ext_sales_price,i_category,sum,i_current_price,i_class,sum] [sum,sum] + Project [i_class,i_current_price,ws_ext_sales_price,i_category,i_item_desc,i_item_id] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [i_current_price,i_category,i_item_id,ws_ext_sales_price,i_class,i_item_desc,ws_sold_date_sk] + Project [i_class,i_current_price,ws_ext_sales_price,ws_sold_date_sk,i_category,i_item_desc,i_item_id] BroadcastHashJoin [ws_item_sk,i_item_sk] Project [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] Filter [ws_item_sk,ws_sold_date_sk] @@ -23,9 +23,9 @@ TakeOrderedAndProject [i_current_price,revenueratio,i_item_desc,i_item_id,i_cate InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] + Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] Filter [i_category,i_item_sk] - Scan parquet default.item [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] + Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt index a5dc71c58..0b02236c0 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt @@ -1,22 +1,22 @@ WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_wholesale_cost)),count,sum,sum,avg(cast(ss_quantity as bigint)),sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),avg(UnscaledValue(ss_ext_sales_price)),count,count] [avg(UnscaledValue(ss_ext_wholesale_cost)),count,sum,sum(ss_ext_wholesale_cost),sum,avg(cast(ss_quantity as bigint)),avg(ss_ext_sales_price),sum,sum,avg(ss_ext_wholesale_cost),avg(ss_quantity),sum(UnscaledValue(ss_ext_wholesale_cost)),avg(UnscaledValue(ss_ext_sales_price)),count,count] + HashAggregate [sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum,avg(cast(ss_quantity as bigint)),count,sum,avg(UnscaledValue(ss_ext_sales_price)),count,avg(UnscaledValue(ss_ext_wholesale_cost)),count] [sum,avg(ss_ext_sales_price),sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum,sum(ss_ext_wholesale_cost),avg(ss_ext_wholesale_cost),avg(cast(ss_quantity as bigint)),avg(ss_quantity),count,sum,avg(UnscaledValue(ss_ext_sales_price)),count,avg(UnscaledValue(ss_ext_wholesale_cost)),count] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum,count,ss_quantity,count,sum,sum,count,ss_ext_sales_price,sum,sum,sum,sum,ss_ext_wholesale_cost,count,count,count,sum] [sum,count,count,sum,sum,count,sum,sum,sum,sum,count,count,count,sum] + HashAggregate [ss_ext_wholesale_cost,count,sum,sum,count,sum,count,sum,sum,count,sum,sum,sum,ss_ext_sales_price,count,ss_quantity,count] [count,sum,sum,count,sum,count,sum,sum,count,sum,sum,sum,count,count] Project [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] - BroadcastHashJoin [ss_hdemo_sk,cd_education_status,hd_demo_sk,hd_dep_count,cd_marital_status,ss_sales_price] - Project [ss_sales_price,ss_hdemo_sk,cd_education_status,ss_quantity,cd_marital_status,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [hd_dep_count,ss_hdemo_sk,ss_sales_price,cd_education_status,hd_demo_sk,cd_marital_status] + Project [ss_quantity,cd_marital_status,ss_ext_sales_price,ss_sales_price,ss_hdemo_sk,cd_education_status,ss_ext_wholesale_cost] BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + Project [ss_quantity,ss_ext_sales_price,ss_sales_price,ss_cdemo_sk,ss_hdemo_sk,ss_ext_wholesale_cost] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_quantity,ss_sold_date_sk,ss_ext_sales_price,ss_ext_wholesale_cost] + Project [ss_quantity,ss_ext_sales_price,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] - Project [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit,ss_ext_wholesale_cost] + Project [ss_addr_sk,ss_quantity,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_ext_wholesale_cost] - Filter [ss_hdemo_sk,ss_store_sk,ss_sold_date_sk,ss_addr_sk,ss_cdemo_sk] - Scan parquet default.store_sales [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_ext_wholesale_cost] [ss_sales_price,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_ext_wholesale_cost] + Project [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt index cb9881765..0acde64df 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt @@ -6,8 +6,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 +- *(79) Expand [List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13, 0), List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, null, 1), List(sales#8, number_sales#9, channel#10, i_brand_id#11, null, null, 3), List(sales#8, number_sales#9, channel#10, null, null, null, 7), List(sales#8, number_sales#9, null, null, null, null, 15)], [sales#8, number_sales#9, channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7] +- Union :- *(26) Project [sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(26) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16 as decimal(32,6)) > cast(Subquery subquery3902 as decimal(32,6)))) - : : +- Subquery subquery3902 + : +- *(26) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16 as decimal(32,6)) > cast(Subquery subquery1662 as decimal(32,6)))) + : : +- Subquery subquery1662 : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) : : +- Exchange SinglePartition : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) @@ -48,8 +48,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : +- *(11) Project [i_item_sk#33 AS ss_item_sk#34] : : : +- *(11) BroadcastHashJoin [i_brand_id#11, i_class_id#12, i_category_id#13], [brand_id#35, class_id#36, category_id#37], Inner, BuildRight : : : :- *(11) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : +- *(11) Filter ((isnotnull(i_category_id#13) && isnotnull(i_brand_id#11)) && isnotnull(i_class_id#12)) - : : : : +- *(11) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)], ReadSchema: struct + : : : : +- *(11) Filter ((isnotnull(i_class_id#12) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) + : : : : +- *(11) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) @@ -67,8 +67,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : +- *(1) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#33) && isnotnull(i_category_id#13)) && isnotnull(i_brand_id#11)) && isnotnull(i_class_id#12)) - : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)], ReadSchema: struct + : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#33) && isnotnull(i_class_id#12)) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(2) Project [d_date_sk#20] : : : : : +- *(2) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) @@ -107,8 +107,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : +- *(24) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#40)) && (d_year#21 = 2001)) && (d_moy#40 = 11)) && isnotnull(d_date_sk#20)) : +- *(24) FileScan parquet default.date_dim[d_date_sk#20,d_year#21,d_moy#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct :- *(52) Project [sales#41, number_sales#42, channel#43, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44) && (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44 as decimal(32,6)) > cast(Subquery subquery3907 as decimal(32,6)))) - : : +- Subquery subquery3907 + : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44) && (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44 as decimal(32,6)) > cast(Subquery subquery1667 as decimal(32,6)))) + : : +- Subquery subquery1667 : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) : : +- Exchange SinglePartition : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) @@ -149,8 +149,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(78) Project [sales#45, number_sales#46, channel#47, i_brand_id#11, i_class_id#12, i_category_id#13] - +- *(78) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48 as decimal(32,6)) > cast(Subquery subquery3912 as decimal(32,6)))) - : +- Subquery subquery3912 + +- *(78) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48 as decimal(32,6)) > cast(Subquery subquery1672 as decimal(32,6)))) + : +- Subquery subquery1672 : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) : +- Exchange SinglePartition : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt index a35247783..93bae0792 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt @@ -1,15 +1,15 @@ -TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_brand_id,i_class_id] +TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_category_id,i_class_id] WholeStageCodegen - HashAggregate [i_category_id,channel,sum,sum(number_salesL),sum(sales),i_brand_id,i_class_id,spark_grouping_id,sum] [sum(sales),sum(number_sales),sum,sum(number_salesL),sum(sales),sum] + HashAggregate [sum(sales),spark_grouping_id,i_brand_id,sum,sum,channel,i_category_id,i_class_id,sum(number_salesL)] [sum(sales),sum(sales),sum(number_sales),sum,sum,sum(number_salesL)] InputAdapter - Exchange [i_category_id,channel,i_brand_id,i_class_id,spark_grouping_id] #1 + Exchange [spark_grouping_id,i_brand_id,channel,i_category_id,i_class_id] #1 WholeStageCodegen - HashAggregate [sum,sum,i_category_id,channel,sum,sales,number_sales,i_brand_id,i_class_id,spark_grouping_id,sum] [sum,sum,sum,sum] - Expand [sales,i_brand_id,number_sales,channel,i_category_id,i_class_id] + HashAggregate [sales,spark_grouping_id,sum,i_brand_id,sum,number_sales,sum,sum,channel,i_category_id,i_class_id] [sum,sum,sum,sum] + Expand [channel,sales,i_category_id,number_sales,i_class_id,i_brand_id] InputAdapter Union WholeStageCodegen - Project [sales,i_brand_id,number_sales,channel,i_category_id,i_class_id] + Project [channel,sales,i_category_id,number_sales,i_class_id,i_brand_id] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #1 WholeStageCodegen @@ -17,7 +17,7 @@ TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_bran InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [sum,list_price,sum,quantity,count,count] [sum,count,sum,count] + HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] InputAdapter Union WholeStageCodegen @@ -48,14 +48,14 @@ TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_bran Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [i_brand_id,count(1),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count,sum,i_class_id,i_category_id] [count(1),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),channel,sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count,sum,number_sales] + HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,i_category_id,i_brand_id,sum,i_class_id] [sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,number_sales,channel,sum] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #2 WholeStageCodegen - HashAggregate [i_brand_id,ss_quantity,count,count,sum,i_class_id,ss_list_price,i_category_id,sum] [sum,count,sum,count] - Project [ss_list_price,i_class_id,i_category_id,i_brand_id,ss_quantity] + HashAggregate [count,count,sum,ss_list_price,i_category_id,i_brand_id,ss_quantity,sum,i_class_id] [sum,count,sum,count] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,i_class_id] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_list_price,i_class_id,i_category_id,i_brand_id,ss_quantity,ss_sold_date_sk] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,ss_sold_date_sk,i_class_id] BroadcastHashJoin [ss_item_sk,i_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] @@ -65,22 +65,22 @@ TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_bran BroadcastExchange #3 WholeStageCodegen Project [i_item_sk] - BroadcastHashJoin [i_brand_id,category_id,class_id,i_class_id,brand_id,i_category_id] + BroadcastHashJoin [category_id,class_id,i_category_id,i_brand_id,i_class_id,brand_id] Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_category_id,i_brand_id,i_class_id] + Filter [i_class_id,i_brand_id,i_category_id] Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #4 WholeStageCodegen HashAggregate [brand_id,class_id,category_id] HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [i_category_id,class_id,brand_id,i_brand_id,i_class_id,category_id] + BroadcastHashJoin [class_id,i_class_id,i_category_id,brand_id,i_brand_id,category_id] HashAggregate [brand_id,class_id,category_id] InputAdapter Exchange [brand_id,class_id,category_id] #5 WholeStageCodegen HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [class_id,i_category_id,i_class_id,brand_id,category_id,i_brand_id] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,class_id,brand_id,category_id] Project [i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] @@ -92,7 +92,7 @@ TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_bran BroadcastExchange #6 WholeStageCodegen Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_item_sk,i_category_id,i_brand_id,i_class_id] + Filter [i_item_sk,i_class_id,i_brand_id,i_category_id] Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #7 @@ -148,7 +148,7 @@ TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_bran Filter [d_year,d_moy,d_date_sk] Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] WholeStageCodegen - Project [i_brand_id,number_sales,i_class_id,channel,i_category_id,sales] + Project [sales,channel,i_category_id,i_brand_id,number_sales,i_class_id] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #2 WholeStageCodegen @@ -156,7 +156,7 @@ TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_bran InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [sum,list_price,sum,quantity,count,count] [sum,count,sum,count] + HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] InputAdapter Union WholeStageCodegen @@ -187,14 +187,14 @@ TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_bran Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [i_brand_id,count(1),sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count,i_class_id,i_category_id] [count(1),number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count,channel,sales] + HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum,i_category_id,i_brand_id,count,count(1),i_class_id] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sales,channel,sum,number_sales,count,count(1)] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #15 WholeStageCodegen - HashAggregate [cs_quantity,i_brand_id,sum,sum,cs_list_price,count,i_class_id,count,i_category_id] [sum,count,sum,count] - Project [i_class_id,cs_quantity,i_category_id,cs_list_price,i_brand_id] + HashAggregate [sum,cs_list_price,sum,i_category_id,count,i_brand_id,cs_quantity,count,i_class_id] [sum,count,sum,count] + Project [cs_quantity,i_category_id,i_brand_id,i_class_id,cs_list_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [i_class_id,cs_quantity,i_category_id,cs_list_price,i_brand_id,cs_sold_date_sk] + Project [cs_quantity,i_category_id,cs_sold_date_sk,i_brand_id,i_class_id,cs_list_price] BroadcastHashJoin [cs_item_sk,i_item_sk] BroadcastHashJoin [cs_item_sk,ss_item_sk] Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] @@ -207,7 +207,7 @@ TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_bran InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #12 WholeStageCodegen - Project [i_brand_id,sales,i_class_id,i_category_id,number_sales,channel] + Project [number_sales,i_category_id,i_brand_id,i_class_id,sales,channel] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #3 WholeStageCodegen @@ -215,7 +215,7 @@ TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_bran InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [sum,list_price,sum,quantity,count,count] [sum,count,sum,count] + HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] InputAdapter Union WholeStageCodegen @@ -246,14 +246,14 @@ TakeOrderedAndProject [i_category_id,sum(sales),channel,sum(number_sales),i_bran Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [i_brand_id,sum,count,count(1),i_class_id,i_category_id,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,sales,count(1),number_sales,channel,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] + HashAggregate [count(1),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,i_category_id,i_brand_id,i_class_id] [count(1),number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,sales,channel] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #16 WholeStageCodegen - HashAggregate [i_brand_id,sum,count,count,sum,ws_quantity,i_class_id,i_category_id,ws_list_price] [sum,count,sum,count] - Project [i_class_id,i_category_id,i_brand_id,ws_list_price,ws_quantity] + HashAggregate [count,sum,sum,count,i_category_id,i_brand_id,i_class_id,ws_list_price,ws_quantity] [sum,count,sum,count] + Project [ws_quantity,i_category_id,ws_list_price,i_brand_id,i_class_id] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [i_class_id,i_category_id,i_brand_id,ws_list_price,ws_quantity,ws_sold_date_sk] + Project [ws_quantity,i_category_id,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id] BroadcastHashJoin [ws_item_sk,i_item_sk] BroadcastHashJoin [ws_item_sk,ss_item_sk] Project [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt index 25a3c636b..858eda8d6 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt @@ -2,8 +2,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sales#5,number_sales#6,channel#7,i_brand_id#8,i_class_id#9,i_category_id#10,sales#11,number_sales#12]) +- *(52) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [i_brand_id#8, i_class_id#9, i_category_id#10], Inner, BuildRight :- *(52) Project [channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, sales#5, number_sales#6] - : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15 as decimal(32,6)) > cast(Subquery subquery4124 as decimal(32,6)))) - : : +- Subquery subquery4124 + : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15 as decimal(32,6)) > cast(Subquery subquery1884 as decimal(32,6)))) + : : +- Subquery subquery1884 : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) : : +- Exchange SinglePartition : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) @@ -44,8 +44,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : +- *(11) Project [i_item_sk#32 AS ss_item_sk#33] : : : +- *(11) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [brand_id#34, class_id#35, category_id#36], Inner, BuildRight : : : :- *(11) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : +- *(11) Filter ((isnotnull(i_category_id#4) && isnotnull(i_brand_id#2)) && isnotnull(i_class_id#3)) - : : : : +- *(11) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)], ReadSchema: struct + : : : : +- *(11) Filter ((isnotnull(i_class_id#3) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) + : : : : +- *(11) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) @@ -63,8 +63,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : +- *(1) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_category_id#4)) && isnotnull(i_brand_id#2)) && isnotnull(i_class_id#3)) - : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_brand_id), IsNotNull(i_class_id)], ReadSchema: struct + : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(2) Project [d_date_sk#19] : : : : : +- *(2) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) @@ -95,25 +95,25 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(23) BroadcastHashJoin [i_item_sk#32], [ss_item_sk#33], LeftSemi, BuildRight : : :- *(23) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : +- *(23) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_category_id#4)) && isnotnull(i_class_id#3)) && isnotnull(i_brand_id#2)) - : : : +- *(23) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category_id), IsNotNull(i_class_id), IsNotNull(i_brand_id)], ReadSchema: struct + : : : +- *(23) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_category_id#4)) && isnotnull(i_brand_id#2)) + : : : +- *(23) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)], ReadSchema: struct : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(24) Project [d_date_sk#19] - : +- *(24) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery4123)) && isnotnull(d_date_sk#19)) - : : +- Subquery subquery4123 + : +- *(24) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1883)) && isnotnull(d_date_sk#19)) + : : +- Subquery subquery1883 : : +- *(1) Project [d_week_seq#39] - : : +- *(1) Filter (((((isnotnull(d_dom#40) && isnotnull(d_year#20)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + : : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct : +- *(24) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct - : +- Subquery subquery4123 + : +- Subquery subquery1883 : +- *(1) Project [d_week_seq#39] - : +- *(1) Filter (((((isnotnull(d_dom#40) && isnotnull(d_year#20)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true])) +- *(51) Project [channel#7, i_brand_id#8, i_class_id#9, i_category_id#10, sales#11, number_sales#12] - +- *(51) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42 as decimal(32,6)) > cast(Subquery subquery4130 as decimal(32,6)))) - : +- Subquery subquery4130 + +- *(51) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42 as decimal(32,6)) > cast(Subquery subquery1890 as decimal(32,6)))) + : +- Subquery subquery1890 : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) : +- Exchange SinglePartition : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) @@ -154,13 +154,13 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : +- ReusedExchange [i_item_sk#43, i_brand_id#8, i_class_id#9, i_category_id#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(49) Project [d_date_sk#19] - +- *(49) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery4129)) && isnotnull(d_date_sk#19)) - : +- Subquery subquery4129 + +- *(49) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1889)) && isnotnull(d_date_sk#19)) + : +- Subquery subquery1889 : +- *(1) Project [d_week_seq#39] - : +- *(1) Filter (((((isnotnull(d_dom#40) && isnotnull(d_year#20)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct + : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct +- *(49) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct - +- Subquery subquery4129 + +- Subquery subquery1889 +- *(1) Project [d_week_seq#39] - +- *(1) Filter (((((isnotnull(d_dom#40) && isnotnull(d_year#20)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct \ No newline at end of file + +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt index bc05af485..d0ac00bfe 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt @@ -1,7 +1,7 @@ -TakeOrderedAndProject [number_sales,sales,i_category_id,i_class_id,channel,channel,i_class_id,sales,i_brand_id,i_brand_id,i_category_id,number_sales] +TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_id,sales,sales,number_sales,i_category_id,channel,channel,i_class_id] WholeStageCodegen - BroadcastHashJoin [i_category_id,i_brand_id,i_category_id,i_class_id,i_class_id,i_brand_id] - Project [number_sales,i_brand_id,sales,channel,i_class_id,i_category_id] + BroadcastHashJoin [i_category_id,i_category_id,i_brand_id,i_class_id,i_brand_id,i_class_id] + Project [number_sales,i_category_id,channel,sales,i_brand_id,i_class_id] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #2 WholeStageCodegen @@ -9,7 +9,7 @@ TakeOrderedAndProject [number_sales,sales,i_category_id,i_class_id,channel,chann InputAdapter Exchange #12 WholeStageCodegen - HashAggregate [list_price,sum,count,count,quantity,sum] [sum,count,sum,count] + HashAggregate [quantity,sum,count,list_price,count,sum] [sum,count,sum,count] InputAdapter Union WholeStageCodegen @@ -40,14 +40,14 @@ TakeOrderedAndProject [number_sales,sales,i_category_id,i_class_id,channel,chann Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #13 - HashAggregate [i_brand_id,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count(1),count,i_class_id,i_category_id] [number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sales,count(1),channel,count] + HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sum,i_category_id,i_brand_id,count,i_class_id] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),number_sales,count(1),sum,channel,sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #1 WholeStageCodegen - HashAggregate [i_brand_id,sum,sum,ss_quantity,count,count,i_class_id,ss_list_price,i_category_id] [sum,count,sum,count] - Project [ss_list_price,i_class_id,i_category_id,i_brand_id,ss_quantity] + HashAggregate [sum,sum,ss_list_price,i_category_id,i_brand_id,ss_quantity,count,i_class_id,count] [sum,count,sum,count] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,i_class_id] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_list_price,i_class_id,i_category_id,i_brand_id,ss_quantity,ss_sold_date_sk] + Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,ss_sold_date_sk,i_class_id] BroadcastHashJoin [ss_item_sk,i_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] @@ -57,22 +57,22 @@ TakeOrderedAndProject [number_sales,sales,i_category_id,i_class_id,channel,chann BroadcastExchange #2 WholeStageCodegen Project [i_item_sk] - BroadcastHashJoin [i_brand_id,category_id,brand_id,class_id,i_class_id,i_category_id] + BroadcastHashJoin [brand_id,category_id,i_category_id,i_brand_id,class_id,i_class_id] Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_category_id,i_brand_id,i_class_id] + Filter [i_class_id,i_brand_id,i_category_id] Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #3 WholeStageCodegen HashAggregate [brand_id,class_id,category_id] HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [brand_id,category_id,i_category_id,i_brand_id,i_class_id,class_id] + BroadcastHashJoin [i_class_id,i_category_id,class_id,category_id,i_brand_id,brand_id] HashAggregate [brand_id,class_id,category_id] InputAdapter Exchange [brand_id,class_id,category_id] #4 WholeStageCodegen HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [brand_id,category_id,i_category_id,i_class_id,class_id,i_brand_id] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,class_id,category_id,brand_id] Project [i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] @@ -84,7 +84,7 @@ TakeOrderedAndProject [number_sales,sales,i_category_id,i_class_id,channel,chann BroadcastExchange #5 WholeStageCodegen Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_item_sk,i_category_id,i_brand_id,i_class_id] + Filter [i_item_sk,i_class_id,i_brand_id,i_category_id] Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #6 @@ -129,7 +129,7 @@ TakeOrderedAndProject [number_sales,sales,i_category_id,i_class_id,channel,chann WholeStageCodegen BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_item_sk,i_category_id,i_class_id,i_brand_id] + Filter [i_item_sk,i_class_id,i_category_id,i_brand_id] Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter ReusedExchange [ss_item_sk] [ss_item_sk] #2 @@ -141,18 +141,18 @@ TakeOrderedAndProject [number_sales,sales,i_category_id,i_class_id,channel,chann Subquery #1 WholeStageCodegen Project [d_week_seq] - Filter [d_dom,d_year,d_moy] + Filter [d_year,d_dom,d_moy] Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] Subquery #1 WholeStageCodegen Project [d_week_seq] - Filter [d_dom,d_year,d_moy] + Filter [d_year,d_dom,d_moy] Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] InputAdapter BroadcastExchange #14 WholeStageCodegen - Project [sales,i_class_id,channel,i_brand_id,i_category_id,number_sales] + Project [i_brand_id,i_class_id,sales,number_sales,i_category_id,channel] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #4 WholeStageCodegen @@ -160,7 +160,7 @@ TakeOrderedAndProject [number_sales,sales,i_category_id,i_class_id,channel,chann InputAdapter Exchange #12 WholeStageCodegen - HashAggregate [list_price,sum,count,count,quantity,sum] [sum,count,sum,count] + HashAggregate [quantity,sum,count,list_price,count,sum] [sum,count,sum,count] InputAdapter Union WholeStageCodegen @@ -191,14 +191,14 @@ TakeOrderedAndProject [number_sales,sales,i_category_id,i_class_id,channel,chann Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #13 - HashAggregate [i_class_id,count(1),i_brand_id,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),i_category_id,sum,count] [sales,channel,count(1),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,number_sales] + HashAggregate [i_brand_id,sum,i_class_id,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,i_category_id] [sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),channel] InputAdapter Exchange [i_brand_id,i_class_id,i_category_id] #15 WholeStageCodegen - HashAggregate [i_class_id,sum,ss_quantity,i_brand_id,i_category_id,sum,count,count,ss_list_price] [sum,count,sum,count] - Project [ss_list_price,ss_quantity,i_brand_id,i_class_id,i_category_id] + HashAggregate [i_brand_id,sum,sum,i_class_id,count,count,ss_list_price,i_category_id,ss_quantity] [sum,count,sum,count] + Project [ss_quantity,i_category_id,ss_list_price,i_brand_id,i_class_id] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_list_price,ss_quantity,i_brand_id,ss_sold_date_sk,i_class_id,i_category_id] + Project [ss_quantity,i_category_id,ss_list_price,i_brand_id,ss_sold_date_sk,i_class_id] BroadcastHashJoin [ss_item_sk,i_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] @@ -216,11 +216,11 @@ TakeOrderedAndProject [number_sales,sales,i_category_id,i_class_id,channel,chann Subquery #3 WholeStageCodegen Project [d_week_seq] - Filter [d_dom,d_year,d_moy] + Filter [d_year,d_dom,d_moy] Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] Subquery #3 WholeStageCodegen Project [d_week_seq] - Filter [d_dom,d_year,d_moy] + Filter [d_year,d_dom,d_moy] Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt index 4f13dbdf2..57b72893e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -1,27 +1,27 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] WholeStageCodegen - HashAggregate [sum,count,sum(UnscaledValue(cs_ext_ship_cost)),count(cs_order_number),sum(UnscaledValue(cs_net_profit)),sum] [sum,count,sum(UnscaledValue(cs_ext_ship_cost)),total net profit ,total shipping cost ,count(cs_order_number),order count ,sum(UnscaledValue(cs_net_profit)),sum] + HashAggregate [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),total net profit ,order count ,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),total shipping cost ] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(cs_ext_ship_cost)),count(cs_order_number),sum,sum(UnscaledValue(cs_net_profit)),sum,count,cs_order_number,sum] [sum,count,sum(UnscaledValue(cs_ext_ship_cost)),count(cs_order_number),sum,sum(UnscaledValue(cs_net_profit)),sum,count,sum] - HashAggregate [sum,sum(UnscaledValue(cs_ext_ship_cost)),sum,sum(UnscaledValue(cs_net_profit)),sum,cs_order_number,sum] [sum,sum(UnscaledValue(cs_ext_ship_cost)),sum,sum(UnscaledValue(cs_net_profit)),sum,sum] + HashAggregate [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number] [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] + HashAggregate [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number] [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] InputAdapter Exchange [cs_order_number] #2 WholeStageCodegen - HashAggregate [cs_net_profit,sum(UnscaledValue(cs_ext_ship_cost)),sum,sum(UnscaledValue(cs_net_profit)),cs_order_number,cs_ext_ship_cost,sum] [sum,sum(UnscaledValue(cs_ext_ship_cost)),sum,sum(UnscaledValue(cs_net_profit)),sum,sum] + HashAggregate [cs_ext_ship_cost,sum(UnscaledValue(cs_net_profit)),sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number,cs_net_profit] [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] Project [cs_order_number,cs_ext_ship_cost,cs_net_profit] BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] Project [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] BroadcastHashJoin [cs_ship_addr_sk,ca_address_sk] - Project [cs_call_center_sk,cs_ship_addr_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] + Project [cs_ship_addr_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] BroadcastHashJoin [cs_ship_date_sk,d_date_sk] BroadcastHashJoin [cs_order_number,cr_order_number] - Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_addr_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] + Project [cs_ship_addr_sk,cs_ship_date_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] BroadcastHashJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] - Project [cs_call_center_sk,cs_warehouse_sk,cs_ship_date_sk,cs_ship_addr_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] + Project [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk] - Scan parquet default.catalog_sales [cs_call_center_sk,cs_warehouse_sk,cs_ship_date_sk,cs_ship_addr_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] [cs_call_center_sk,cs_warehouse_sk,cs_ship_date_sk,cs_ship_addr_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] + Scan parquet default.catalog_sales [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt index 898520b2a..5d0ff39f9 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt @@ -22,12 +22,12 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_des : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#27,ss_item_sk#19,ss_customer_sk#33,ss_store_sk#21,ss_ticket_number#34,ss_quantity#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : : +- *(2) Filter ((isnotnull(cs_bill_customer_sk#31) && isnotnull(cs_item_sk#32)) && isnotnull(cs_sold_date_sk#23)) + : : : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#23,cs_bill_customer_sk#31,cs_item_sk#32,cs_quantity#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [d_date_sk#28] : : : : +- *(3) Filter ((isnotnull(d_quarter_name#36) && (d_quarter_name#36 = 2001Q1)) && isnotnull(d_date_sk#28)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt index 6db08032d..454790ddb 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt @@ -1,38 +1,38 @@ -TakeOrderedAndProject [as_store_returns_quantitystdev,catalog_sales_quantitycov,store_sales_quantitycount,catalog_sales_quantityave,as_store_returns_quantitycount,catalog_sales_quantitycount,store_sales_quantitycov,s_state,store_sales_quantityave,as_store_returns_quantityave,i_item_desc,store_sales_quantitystdev,i_item_id,store_returns_quantitycov,catalog_sales_quantitystdev] +TakeOrderedAndProject [as_store_returns_quantityave,catalog_sales_quantitycount,i_item_id,i_item_desc,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitystdev,catalog_sales_quantitycov,s_state,store_returns_quantitycov,catalog_sales_quantityave,store_sales_quantitycov,as_store_returns_quantitystdev,as_store_returns_quantitycount,catalog_sales_quantitystdev] WholeStageCodegen - HashAggregate [count,sum,stddev_samp(cast(sr_return_quantity as double)),count,avg(cast(sr_return_quantity as bigint)),sum,m2,count,count,n,n,n,s_state,m2,count,avg,sum,stddev_samp(cast(ss_quantity as double)),i_item_desc,count(ss_quantity),avg(cast(ss_quantity as bigint)),count(sr_return_quantity),stddev_samp(cast(cs_quantity as double)),avg,i_item_id,m2,avg(cast(cs_quantity as bigint)),count,avg,count(cs_quantity)] [as_store_returns_quantitystdev,catalog_sales_quantitycov,store_sales_quantitycount,catalog_sales_quantityave,count,sum,stddev_samp(cast(sr_return_quantity as double)),count,avg(cast(sr_return_quantity as bigint)),as_store_returns_quantitycount,sum,m2,catalog_sales_quantitycount,count,count,n,n,n,store_sales_quantitycov,store_sales_quantityave,m2,count,avg,sum,as_store_returns_quantityave,stddev_samp(cast(ss_quantity as double)),count(ss_quantity),store_sales_quantitystdev,avg(cast(ss_quantity as bigint)),count(sr_return_quantity),stddev_samp(cast(cs_quantity as double)),avg,m2,avg(cast(cs_quantity as bigint)),store_returns_quantitycov,catalog_sales_quantitystdev,count,avg,count(cs_quantity)] + HashAggregate [count,avg(cast(ss_quantity as bigint)),i_item_id,stddev_samp(cast(ss_quantity as double)),i_item_desc,stddev_samp(cast(sr_return_quantity as double)),m2,avg(cast(sr_return_quantity as bigint)),n,count,m2,avg,avg,s_state,count(sr_return_quantity),sum,avg,n,count,m2,count(cs_quantity),sum,stddev_samp(cast(cs_quantity as double)),avg(cast(cs_quantity as bigint)),count,n,sum,count,count(ss_quantity),count] [count,avg(cast(ss_quantity as bigint)),as_store_returns_quantityave,catalog_sales_quantitycount,stddev_samp(cast(ss_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),m2,avg(cast(sr_return_quantity as bigint)),n,count,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitystdev,m2,avg,avg,catalog_sales_quantitycov,count(sr_return_quantity),store_returns_quantitycov,sum,catalog_sales_quantityave,store_sales_quantitycov,avg,n,count,as_store_returns_quantitystdev,m2,count(cs_quantity),sum,stddev_samp(cast(cs_quantity as double)),avg(cast(cs_quantity as bigint)),count,as_store_returns_quantitycount,n,sum,count,count(ss_quantity),count,catalog_sales_quantitystdev] InputAdapter Exchange [i_item_id,i_item_desc,s_state] #1 WholeStageCodegen - HashAggregate [count,cs_quantity,avg,count,sum,n,sr_return_quantity,count,count,avg,sum,m2,ss_quantity,count,count,n,n,n,s_state,sum,m2,count,count,avg,sum,i_item_desc,m2,n,avg,count,n,avg,i_item_id,m2,m2,m2,sum,count,count,avg,sum,count] [count,avg,count,sum,n,count,count,avg,sum,m2,count,count,n,n,n,sum,m2,count,count,avg,sum,m2,n,avg,count,n,avg,m2,m2,m2,sum,count,count,avg,sum,count] - Project [cs_quantity,ss_quantity,i_item_id,sr_return_quantity,i_item_desc,s_state] + HashAggregate [count,m2,i_item_id,m2,i_item_desc,m2,count,avg,avg,m2,count,n,count,sum,n,sum,n,count,m2,avg,avg,s_state,avg,sum,sr_return_quantity,avg,count,count,count,n,count,m2,sum,count,ss_quantity,cs_quantity,n,sum,count,n,sum,count] [count,m2,m2,m2,count,avg,avg,m2,count,n,count,sum,n,sum,n,count,m2,avg,avg,avg,sum,avg,count,count,count,n,count,m2,sum,count,n,sum,count,n,sum,count] + Project [ss_quantity,cs_quantity,sr_return_quantity,s_state,i_item_desc,i_item_id] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [cs_quantity,ss_item_sk,ss_quantity,sr_return_quantity,s_state] + Project [ss_quantity,ss_item_sk,cs_quantity,sr_return_quantity,s_state] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [cs_quantity,ss_item_sk,ss_quantity,sr_return_quantity,ss_store_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,sr_return_quantity,ss_store_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,sr_return_quantity,ss_store_sk,sr_returned_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,sr_returned_date_sk] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_return_quantity,ss_store_sk,sr_returned_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_returned_date_sk] BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [sr_item_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,ss_store_sk,sr_customer_sk,sr_returned_date_sk] - BroadcastHashJoin [ss_customer_sk,sr_item_sk,ss_item_sk,sr_ticket_number,sr_customer_sk,ss_ticket_number] - Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_sold_date_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,sr_return_quantity,sr_customer_sk,sr_item_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] - Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] + Project [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] - Filter [cs_item_sk,cs_bill_customer_sk,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] InputAdapter BroadcastExchange #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt index 37081499f..d045bd95e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt @@ -1,26 +1,26 @@ -TakeOrderedAndProject [ca_country,agg6,agg4,ca_state,agg7,agg2,i_item_id,agg1,agg3,agg5,ca_county] +TakeOrderedAndProject [ca_country,agg7,agg6,ca_county,agg2,agg1,agg4,agg3,ca_state,agg5,i_item_id] WholeStageCodegen - HashAggregate [avg(cast(cs_net_profit as decimal(12,2))),ca_country,count,count,count,sum,avg(cast(cs_list_price as decimal(12,2))),sum,avg(cast(cs_quantity as decimal(12,2))),count,ca_state,sum,count,sum,i_item_id,avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,count,spark_grouping_id,sum,avg(cast(c_birth_year as decimal(12,2))),count,ca_county,avg(cast(cs_sales_price as decimal(12,2)))] [avg(cast(cs_net_profit as decimal(12,2))),count,count,count,agg6,sum,avg(cast(cs_list_price as decimal(12,2))),sum,avg(cast(cs_quantity as decimal(12,2))),agg4,count,sum,count,sum,agg7,agg2,avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,count,agg1,agg3,sum,avg(cast(c_birth_year as decimal(12,2))),count,agg5,avg(cast(cs_sales_price as decimal(12,2)))] + HashAggregate [ca_country,avg(cast(cs_quantity as decimal(12,2))),count,sum,avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,ca_county,avg(cast(cs_list_price as decimal(12,2))),sum,sum,count,sum,count,avg(cast(cs_net_profit as decimal(12,2))),spark_grouping_id,sum,count,ca_state,avg(cast(cd_dep_count as decimal(12,2))),count,avg(cast(cs_sales_price as decimal(12,2))),count,count,avg(cast(c_birth_year as decimal(12,2))),i_item_id] [avg(cast(cs_quantity as decimal(12,2))),count,agg7,sum,avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,agg6,avg(cast(cs_list_price as decimal(12,2))),agg2,agg1,sum,sum,count,sum,count,agg4,agg3,avg(cast(cs_net_profit as decimal(12,2))),sum,count,agg5,avg(cast(cd_dep_count as decimal(12,2))),count,avg(cast(cs_sales_price as decimal(12,2))),count,count,avg(cast(c_birth_year as decimal(12,2)))] InputAdapter - Exchange [ca_country,ca_state,i_item_id,spark_grouping_id,ca_county] #1 + Exchange [ca_country,ca_county,spark_grouping_id,ca_state,i_item_id] #1 WholeStageCodegen - HashAggregate [cs_quantity,ca_country,count,count,count,count,sum,sum,cs_net_profit,cd_dep_count,count,sum,sum,sum,cs_sales_price,count,ca_state,sum,count,cs_coupon_amt,sum,sum,count,sum,cs_list_price,i_item_id,count,count,sum,sum,count,c_birth_year,spark_grouping_id,sum,sum,sum,count,count,count,ca_county] [count,count,count,count,sum,sum,count,sum,sum,sum,count,sum,count,sum,sum,count,sum,count,count,sum,sum,count,sum,sum,sum,count,count,count] - Expand [cs_quantity,i_item_id,ca_country,cs_net_profit,cd_dep_count,cs_sales_price,cs_coupon_amt,ca_county,cs_list_price,c_birth_year,ca_state] - Project [cs_quantity,ca_country,cs_net_profit,cd_dep_count,cs_sales_price,cs_coupon_amt,cs_list_price,c_birth_year,i_item_id,ca_state,ca_county] + HashAggregate [ca_country,cs_list_price,cs_coupon_amt,sum,count,sum,count,count,sum,sum,count,sum,sum,count,sum,count,ca_county,sum,sum,count,sum,count,count,sum,sum,sum,spark_grouping_id,cs_net_profit,sum,count,ca_state,cs_sales_price,cs_quantity,cd_dep_count,count,count,c_birth_year,count,count,i_item_id] [sum,count,sum,count,count,sum,sum,count,sum,sum,count,sum,count,sum,sum,count,sum,count,count,sum,sum,sum,sum,count,count,count,count,count] + Expand [cs_list_price,cs_coupon_amt,ca_country,i_item_id,ca_county,cs_net_profit,cs_sales_price,cs_quantity,cd_dep_count,ca_state,c_birth_year] + Project [cs_list_price,cs_coupon_amt,i_item_id,ca_county,ca_country,cs_net_profit,cs_sales_price,cs_quantity,cd_dep_count,c_birth_year,ca_state] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_quantity,cd_dep_count,cs_list_price,cs_item_sk,ca_state,cs_coupon_amt,ca_county,c_birth_year,ca_country,cs_net_profit,cs_sales_price] + Project [ca_state,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cd_dep_count,ca_county,cs_item_sk,cs_net_profit,ca_country,cs_list_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,cd_dep_count,cs_list_price,cs_item_sk,ca_state,cs_sold_date_sk,cs_coupon_amt,ca_county,c_birth_year,ca_country,cs_net_profit,cs_sales_price] + Project [ca_state,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,ca_county,cs_item_sk,cs_net_profit,ca_country,cs_list_price] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [cs_quantity,cd_dep_count,cs_list_price,cs_item_sk,cs_sold_date_sk,cs_coupon_amt,c_birth_year,cs_net_profit,c_current_addr_sk,cs_sales_price] + Project [c_current_addr_sk,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [cs_quantity,cd_dep_count,cs_list_price,cs_item_sk,c_current_cdemo_sk,cs_sold_date_sk,cs_coupon_amt,c_birth_year,cs_net_profit,c_current_addr_sk,cs_sales_price] + Project [c_current_cdemo_sk,c_current_addr_sk,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_quantity,cd_dep_count,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_coupon_amt,cs_net_profit,cs_sales_price] + Project [cs_coupon_amt,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_coupon_amt,cs_net_profit,cs_sales_price] + Project [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk] - Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_coupon_amt,cs_net_profit,cs_sales_price] [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk,cs_coupon_amt,cs_net_profit,cs_sales_price] + Scan parquet default.catalog_sales [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] InputAdapter BroadcastExchange #2 WholeStageCodegen @@ -32,7 +32,7 @@ TakeOrderedAndProject [ca_country,agg6,agg4,ca_state,agg7,agg2,i_item_id,agg1,ag WholeStageCodegen Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] - Scan parquet default.customer [c_current_cdemo_sk,c_birth_year,c_customer_sk,c_current_addr_sk,c_birth_month] [c_current_cdemo_sk,c_birth_year,c_customer_sk,c_current_addr_sk,c_birth_month] + Scan parquet default.customer [c_current_cdemo_sk,c_current_addr_sk,c_birth_year,c_customer_sk,c_birth_month] [c_current_cdemo_sk,c_current_addr_sk,c_birth_year,c_customer_sk,c_birth_month] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt index 810d37bad..c008da8ae 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt @@ -1,17 +1,17 @@ -TakeOrderedAndProject [brand_id,brand,i_manufact,ext_price,i_manufact_id] +TakeOrderedAndProject [ext_price,brand_id,i_manufact,i_manufact_id,brand] WholeStageCodegen - HashAggregate [i_brand_id,i_manufact,sum(UnscaledValue(ss_ext_sales_price)),sum,i_manufact_id,i_brand] [brand_id,brand,sum(UnscaledValue(ss_ext_sales_price)),ext_price,sum] + HashAggregate [i_manufact,sum(UnscaledValue(ss_ext_sales_price)),i_brand,i_brand_id,i_manufact_id,sum] [ext_price,brand_id,sum(UnscaledValue(ss_ext_sales_price)),brand,sum] InputAdapter Exchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 WholeStageCodegen - HashAggregate [i_brand_id,sum,i_manufact,ss_ext_sales_price,sum,i_manufact_id,i_brand] [sum,sum] - Project [i_manufact_id,i_manufact,i_brand_id,i_brand,ss_ext_sales_price] + HashAggregate [i_manufact,i_brand,sum,ss_ext_sales_price,i_brand_id,i_manufact_id,sum] [sum,sum] + Project [i_manufact,i_manufact_id,ss_ext_sales_price,i_brand_id,i_brand] BroadcastHashJoin [ss_store_sk,s_store_sk,ca_zip,s_zip] - Project [i_manufact_id,i_manufact,i_brand_id,i_brand,ca_zip,ss_store_sk,ss_ext_sales_price] + Project [i_manufact,i_manufact_id,ca_zip,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [i_manufact_id,i_manufact,i_brand_id,i_brand,ss_store_sk,ss_ext_sales_price,c_current_addr_sk] + Project [c_current_addr_sk,i_manufact,i_manufact_id,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [i_manufact_id,i_manufact,ss_customer_sk,i_brand_id,i_brand,ss_store_sk,ss_ext_sales_price] + Project [i_manufact,i_manufact_id,ss_store_sk,ss_customer_sk,ss_ext_sales_price,i_brand_id,i_brand] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] @@ -21,15 +21,15 @@ TakeOrderedAndProject [brand_id,brand,i_manufact,ext_price,i_manufact_id] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] + Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_manufact_id,i_manufact,i_brand_id,i_brand,i_item_sk] + Project [i_manufact,i_manufact_id,i_item_sk,i_brand_id,i_brand] Filter [i_manager_id,i_item_sk] - Scan parquet default.item [i_manufact_id,i_manager_id,i_manufact,i_brand_id,i_brand,i_item_sk] [i_manufact_id,i_manager_id,i_manufact,i_brand_id,i_brand,i_item_sk] + Scan parquet default.item [i_manufact,i_manufact_id,i_item_sk,i_manager_id,i_brand_id,i_brand] [i_manufact,i_manufact_id,i_item_sk,i_manager_id,i_brand_id,i_brand] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt index 6f69879cb..a3121688c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt @@ -3,15 +3,15 @@ WholeStageCodegen InputAdapter Exchange [d_week_seq1] #1 WholeStageCodegen - Project [mon_sales1,fri_sales1,thu_sales1,wed_sales1,sun_sales2,mon_sales2,fri_sales2,thu_sales2,sat_sales2,tue_sales2,d_week_seq1,wed_sales2,sun_sales1,tue_sales1,sat_sales1] + Project [wed_sales2,mon_sales1,sat_sales2,d_week_seq1,fri_sales2,thu_sales2,sat_sales1,tue_sales1,sun_sales2,sun_sales1,tue_sales2,wed_sales1,mon_sales2,thu_sales1,fri_sales1] BroadcastHashJoin [d_week_seq1,d_week_seq2] - Project [sat_sales,wed_sales,sun_sales,d_week_seq,mon_sales,fri_sales,tue_sales,thu_sales] + Project [sun_sales,thu_sales,sat_sales,wed_sales,fri_sales,tue_sales,d_week_seq,mon_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum,sum,sum,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END))] [sun_sales,thu_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),fri_sales,mon_sales,sum,sum,sum,wed_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum,tue_sales,sat_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END))] + HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),mon_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum,thu_sales,sum,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum,sat_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum,wed_sales] InputAdapter Exchange [d_week_seq] #2 WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum,sum,d_week_seq,d_day_name,sum,sum,sales_price,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + HashAggregate [d_day_name,sum,sum,sum,sum,sum,sum,sum,d_week_seq,sum,sum,sum,sum,sales_price,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] Project [sales_price,d_week_seq,d_day_name] BroadcastHashJoin [sold_date_sk,d_date_sk] InputAdapter @@ -39,11 +39,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [sat_sales,wed_sales,sun_sales,d_week_seq,mon_sales,fri_sales,tue_sales,thu_sales] + Project [sun_sales,thu_sales,sat_sales,wed_sales,fri_sales,tue_sales,d_week_seq,mon_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum] [sum,sun_sales,thu_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),fri_sales,mon_sales,wed_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum,sum,tue_sales,sat_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum] + HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),mon_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),thu_sales,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sun_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sat_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum,wed_sales] InputAdapter - ReusedExchange [sum,sum,d_week_seq,sum,sum,sum,sum,sum] [sum,sum,d_week_seq,sum,sum,sum,sum,sum] #2 + ReusedExchange [sum,sum,sum,sum,d_week_seq,sum,sum,sum] [sum,sum,sum,sum,d_week_seq,sum,sum,sum] #2 InputAdapter BroadcastExchange #6 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt index bca0f3ec5..ebfef4216 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [i_current_price,revenueratio,i_item_desc,i_item_id,itemrevenue,i_category,i_class] +TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,revenueratio,i_category,i_current_price,i_class] WholeStageCodegen - Project [i_current_price,i_item_desc,i_item_id,itemrevenue,i_category,_we0,i_class,_w0] + Project [i_item_id,i_item_desc,itemrevenue,_we0,i_category,_w0,i_current_price,i_class] InputAdapter Window [_w1,i_class] WholeStageCodegen @@ -8,14 +8,14 @@ TakeOrderedAndProject [i_current_price,revenueratio,i_item_desc,i_item_id,itemre InputAdapter Exchange [i_class] #1 WholeStageCodegen - HashAggregate [i_current_price,sum(UnscaledValue(cs_ext_sales_price)),sum,i_item_desc,i_item_id,i_category,i_class] [sum(UnscaledValue(cs_ext_sales_price)),sum,itemrevenue,_w1,_w0] + HashAggregate [i_item_id,i_item_desc,sum(UnscaledValue(cs_ext_sales_price)),i_category,sum,i_current_price,i_class] [itemrevenue,sum(UnscaledValue(cs_ext_sales_price)),_w1,sum,_w0] InputAdapter - Exchange [i_current_price,i_item_desc,i_item_id,i_category,i_class] #2 + Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #2 WholeStageCodegen - HashAggregate [i_current_price,sum,sum,cs_ext_sales_price,i_item_desc,i_item_id,i_category,i_class] [sum,sum] - Project [i_current_price,i_category,cs_ext_sales_price,i_item_id,i_class,i_item_desc] + HashAggregate [i_item_id,i_item_desc,sum,i_category,sum,i_current_price,cs_ext_sales_price,i_class] [sum,sum] + Project [i_class,i_current_price,i_category,i_item_desc,cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [i_current_price,i_category,cs_ext_sales_price,cs_sold_date_sk,i_item_id,i_class,i_item_desc] + Project [i_class,i_current_price,cs_sold_date_sk,i_category,i_item_desc,cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] Project [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] Filter [cs_item_sk,cs_sold_date_sk] @@ -23,9 +23,9 @@ TakeOrderedAndProject [i_current_price,revenueratio,i_item_desc,i_item_id,itemre InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] + Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] Filter [i_category,i_item_sk] - Scan parquet default.item [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] + Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt index f3c16ce10..b74976c2f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt @@ -1,11 +1,11 @@ TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] WholeStageCodegen Filter [inv_before,inv_after] - HashAggregate [sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),w_warehouse_name,sum,sum,i_item_id,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint))] [inv_before,sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_after,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint))] + HashAggregate [i_item_id,sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,w_warehouse_name,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum] [sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_before,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_after] InputAdapter Exchange [w_warehouse_name,i_item_id] #1 WholeStageCodegen - HashAggregate [sum,inv_quantity_on_hand,w_warehouse_name,sum,d_date,sum,i_item_id,sum] [sum,sum,sum,sum] + HashAggregate [i_item_id,sum,d_date,w_warehouse_name,sum,inv_quantity_on_hand,sum,sum] [sum,sum,sum,sum] Project [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] BroadcastHashJoin [inv_date_sk,d_date_sk] Project [inv_date_sk,inv_quantity_on_hand,w_warehouse_name,i_item_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt index 156a01dd0..49b575687 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt @@ -1,14 +1,14 @@ -TakeOrderedAndProject [i_category,qoh,i_brand,i_class,i_product_name] +TakeOrderedAndProject [i_category,i_class,i_product_name,i_brand,qoh] WholeStageCodegen - HashAggregate [i_category,avg(cast(inv_quantity_on_hand as bigint)),i_brand,sum,i_class,count,spark_grouping_id,i_product_name] [avg(cast(inv_quantity_on_hand as bigint)),qoh,sum,count] + HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),i_category,count,spark_grouping_id,sum,i_class,i_product_name,i_brand] [avg(cast(inv_quantity_on_hand as bigint)),qoh,sum,count] InputAdapter - Exchange [i_category,i_brand,i_class,spark_grouping_id,i_product_name] #1 + Exchange [i_category,spark_grouping_id,i_class,i_product_name,i_brand] #1 WholeStageCodegen - HashAggregate [i_category,inv_quantity_on_hand,count,i_brand,sum,sum,i_class,count,spark_grouping_id,i_product_name] [sum,count,sum,count] - Expand [i_brand,i_product_name,inv_quantity_on_hand,i_category,i_class] - Project [inv_quantity_on_hand,i_category,i_brand,i_product_name,i_class] + HashAggregate [sum,i_category,count,count,spark_grouping_id,sum,i_class,i_product_name,inv_quantity_on_hand,i_brand] [sum,count,sum,count] + Expand [i_product_name,i_class,i_brand,i_category,inv_quantity_on_hand] + Project [i_brand,i_category,inv_quantity_on_hand,i_class,i_product_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_product_name,inv_quantity_on_hand,i_brand,i_category,inv_warehouse_sk,i_class] + Project [i_class,inv_quantity_on_hand,inv_warehouse_sk,i_product_name,i_category,i_brand] BroadcastHashJoin [inv_item_sk,i_item_sk] Project [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] BroadcastHashJoin [inv_date_sk,d_date_sk] @@ -24,9 +24,9 @@ TakeOrderedAndProject [i_category,qoh,i_brand,i_class,i_product_name] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_product_name,i_brand,i_category,i_item_sk,i_class] + Project [i_class,i_item_sk,i_product_name,i_category,i_brand] Filter [i_item_sk] - Scan parquet default.item [i_product_name,i_brand,i_category,i_item_sk,i_class] [i_product_name,i_brand,i_category,i_item_sk,i_class] + Scan parquet default.item [i_class,i_item_sk,i_product_name,i_category,i_brand] [i_class,i_item_sk,i_product_name,i_category,i_brand] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt index 9e3d90e72..cd1cae198 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt @@ -36,8 +36,8 @@ CollectLimit 100 : : : +- *(2) FileScan parquet default.item[i_item_sk#13,i_item_desc#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(7) Project [c_customer_sk#7] - : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery5292 as decimal(32,6)))), DecimalType(38,8)))) - : : : +- Subquery subquery5292 + : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3052 as decimal(32,6)))), DecimalType(38,8)))) + : : : +- Subquery subquery3052 : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#21)]) : : : +- Exchange SinglePartition : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#21)]) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt index 3686a996b..e3c9d92e2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt @@ -14,19 +14,19 @@ CollectLimit BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] BroadcastHashJoin [cs_item_sk,item_sk] - Project [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Project [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [item_sk] Filter [count(1)] - HashAggregate [d_date,count,i_item_sk,count(1),substring(i_item_desc, 1, 30)] [count(1),item_sk,count(1),count] + HashAggregate [count(1),d_date,i_item_sk,substring(i_item_desc, 1, 30),count] [count(1),item_sk,count(1),count] InputAdapter Exchange [substring(i_item_desc, 1, 30),i_item_sk,d_date] #3 WholeStageCodegen - HashAggregate [d_date,count,i_item_desc,i_item_sk,count,substring(i_item_desc, 1, 30)] [count,substring(i_item_desc, 1, 30),count] + HashAggregate [i_item_desc,count,d_date,i_item_sk,substring(i_item_desc, 1, 30),count] [count,substring(i_item_desc, 1, 30),count] Project [d_date,i_item_sk,i_item_desc] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_item_sk,d_date] @@ -62,7 +62,7 @@ CollectLimit InputAdapter Exchange [c_customer_sk] #10 WholeStageCodegen - HashAggregate [sum,ss_quantity,c_customer_sk,ss_sales_price,sum] [sum,sum] + HashAggregate [sum,c_customer_sk,ss_sales_price,sum,ss_quantity] [sum,sum] Project [ss_quantity,ss_sales_price,c_customer_sk] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] @@ -86,7 +86,7 @@ CollectLimit InputAdapter Exchange [c_customer_sk] #7 WholeStageCodegen - HashAggregate [ss_quantity,sum,c_customer_sk,ss_sales_price,sum] [sum,sum] + HashAggregate [sum,sum,c_customer_sk,ss_sales_price,ss_quantity] [sum,sum] Project [ss_quantity,ss_sales_price,c_customer_sk] BroadcastHashJoin [ss_customer_sk,c_customer_sk] Project [ss_customer_sk,ss_quantity,ss_sales_price] @@ -111,9 +111,9 @@ CollectLimit BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] BroadcastHashJoin [ws_item_sk,item_sk] - Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] InputAdapter ReusedExchange [item_sk] [item_sk] #2 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt index 134e9552e..7a9af1be4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt @@ -37,8 +37,8 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ : : : : +- *(2) FileScan parquet default.item[i_item_sk#16,i_item_desc#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(7) Project [c_customer_sk#9 AS c_customer_sk#9#10] - : : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery5384 as decimal(32,6)))), DecimalType(38,8)))) - : : : : +- Subquery subquery5384 + : : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3144 as decimal(32,6)))), DecimalType(38,8)))) + : : : : +- Subquery subquery3144 : : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#24)]) : : : : +- Exchange SinglePartition : : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#24)]) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt index b8e3c71f2..ead663422 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt @@ -5,27 +5,27 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales] InputAdapter Exchange [c_last_name,c_first_name] #1 WholeStageCodegen - HashAggregate [cs_quantity,sum,c_last_name,cs_list_price,sum,c_first_name] [sum,sum] + HashAggregate [cs_list_price,sum,sum,c_last_name,cs_quantity,c_first_name] [sum,sum] Project [cs_quantity,cs_list_price,c_first_name,c_last_name] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,cs_list_price,cs_sold_date_sk,c_last_name,c_first_name] + Project [cs_quantity,c_last_name,cs_sold_date_sk,c_first_name,cs_list_price] BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] BroadcastHashJoin [cs_item_sk,item_sk] - Project [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Project [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_quantity,cs_list_price,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [item_sk] Filter [count(1)] - HashAggregate [count,d_date,substring(i_item_desc, 1, 30),i_item_sk,count(1)] [count(1),item_sk,count(1),count] + HashAggregate [d_date,count,i_item_sk,count(1),substring(i_item_desc, 1, 30)] [count(1),item_sk,count(1),count] InputAdapter Exchange [substring(i_item_desc, 1, 30),i_item_sk,d_date] #3 WholeStageCodegen - HashAggregate [count,d_date,count,i_item_desc,substring(i_item_desc, 1, 30),i_item_sk] [count,substring(i_item_desc, 1, 30),count] + HashAggregate [i_item_desc,d_date,count,count,i_item_sk,substring(i_item_desc, 1, 30)] [count,substring(i_item_desc, 1, 30),count] Project [d_date,i_item_sk,i_item_desc] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_item_sk,d_date] @@ -61,7 +61,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales] InputAdapter Exchange [c_customer_sk] #10 WholeStageCodegen - HashAggregate [sum,ss_quantity,c_customer_sk,sum,ss_sales_price] [sum,sum] + HashAggregate [sum,sum,c_customer_sk,ss_sales_price,ss_quantity] [sum,sum] Project [ss_quantity,ss_sales_price,c_customer_sk] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] @@ -85,7 +85,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales] InputAdapter Exchange [c_customer_sk] #7 WholeStageCodegen - HashAggregate [ss_quantity,c_customer_sk,sum,ss_sales_price,sum] [sum,sum] + HashAggregate [sum,c_customer_sk,ss_sales_price,sum,ss_quantity] [sum,sum] Project [ss_quantity,ss_sales_price,c_customer_sk] BroadcastHashJoin [ss_customer_sk,c_customer_sk] Project [ss_customer_sk,ss_quantity,ss_sales_price] @@ -117,17 +117,17 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales] InputAdapter Exchange [c_last_name,c_first_name] #14 WholeStageCodegen - HashAggregate [sum,c_last_name,ws_quantity,sum,c_first_name,ws_list_price] [sum,sum] + HashAggregate [sum,c_last_name,ws_list_price,c_first_name,sum,ws_quantity] [sum,sum] Project [ws_quantity,ws_list_price,c_first_name,c_last_name] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_list_price,c_last_name,c_first_name,ws_quantity,ws_sold_date_sk] + Project [ws_quantity,c_last_name,ws_list_price,ws_sold_date_sk,c_first_name] BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] BroadcastHashJoin [ws_item_sk,item_sk] - Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] + Project [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] InputAdapter ReusedExchange [item_sk] [item_sk] #2 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt index 1103d6e02..4d115faf3 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == *(8) Project [c_last_name#1, c_first_name#2, s_store_name#3, paid#4] -+- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery5486 as decimal(33,8)))) - : +- Subquery subquery5486 ++- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery3246 as decimal(33,8)))) + : +- Subquery subquery3246 : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) : +- Exchange SinglePartition : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt index 71544ba9b..08aae40a6 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt @@ -7,25 +7,25 @@ WholeStageCodegen InputAdapter Exchange #8 WholeStageCodegen - HashAggregate [count,sum,netpaid,count,sum] [sum,count,sum,count] - HashAggregate [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,sum,sum(UnscaledValue(ss_net_paid)),i_color,c_first_name,i_units,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [sum,count,count,netpaid,sum] [sum,count,sum,count] + HashAggregate [i_units,i_color,i_size,s_state,sum(UnscaledValue(ss_net_paid)),sum,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] #9 + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #9 WholeStageCodegen - HashAggregate [i_current_price,sum,i_manager_id,s_store_name,c_last_name,s_state,i_size,ss_net_paid,sum,i_color,c_first_name,i_units,ca_state] [sum,sum] - Project [i_manager_id,i_units,ca_state,i_current_price,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] + HashAggregate [i_units,i_color,i_size,ss_net_paid,s_state,sum,i_manager_id,c_last_name,sum,i_current_price,c_first_name,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [i_manager_id,c_birth_country,i_units,i_current_price,s_zip,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [i_manager_id,ss_customer_sk,i_units,i_current_price,s_zip,i_size,s_store_name,i_color,ss_net_paid,s_state] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_customer_sk,ss_item_sk,s_zip,s_store_name,ss_net_paid,s_state] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -37,13 +37,13 @@ WholeStageCodegen WholeStageCodegen Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_sk,s_zip,s_store_name,s_market_id,s_state] [s_store_sk,s_zip,s_store_name,s_market_id,s_state] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] InputAdapter BroadcastExchange #10 WholeStageCodegen - Project [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] Filter [i_item_sk] - Scan parquet default.item [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] InputAdapter BroadcastExchange #6 WholeStageCodegen @@ -56,29 +56,29 @@ WholeStageCodegen Project [ca_state,ca_zip,ca_country] Filter [ca_zip] Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] - HashAggregate [c_last_name,c_first_name,s_store_name,sum,sum(netpaid)] [sum(netpaid),paid,sum(netpaid),sum] + HashAggregate [c_first_name,s_store_name,c_last_name,sum(netpaid),sum] [sum(netpaid),paid,sum(netpaid),sum] InputAdapter Exchange [c_last_name,c_first_name,s_store_name] #1 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,sum,netpaid,s_store_name,sum] [sum,sum] - HashAggregate [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,sum(UnscaledValue(ss_net_paid)),i_color,c_first_name,sum,i_units,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [c_first_name,s_store_name,c_last_name,sum,netpaid,sum] [sum,sum] + HashAggregate [i_units,i_color,i_size,sum,s_state,sum(UnscaledValue(ss_net_paid)),i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] #2 + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #2 WholeStageCodegen - HashAggregate [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,sum,ss_net_paid,i_color,c_first_name,sum,i_units,ca_state] [sum,sum] - Project [i_manager_id,i_units,ca_state,i_current_price,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] + HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,sum,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [i_manager_id,c_birth_country,i_units,i_current_price,s_zip,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [i_manager_id,ss_customer_sk,i_units,i_current_price,s_zip,i_size,s_store_name,i_color,ss_net_paid,s_state] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_customer_sk,ss_item_sk,s_zip,s_store_name,ss_net_paid,s_state] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -90,13 +90,13 @@ WholeStageCodegen WholeStageCodegen Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_sk,s_zip,s_store_name,s_market_id,s_state] [s_store_sk,s_zip,s_store_name,s_market_id,s_state] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] Filter [i_color,i_item_sk] - Scan parquet default.item [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] InputAdapter BroadcastExchange #6 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt index 8afda776c..35e443a34 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == *(8) Project [c_last_name#1, c_first_name#2, s_store_name#3, paid#4] -+- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery5530 as decimal(33,8)))) - : +- Subquery subquery5530 ++- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery3290 as decimal(33,8)))) + : +- Subquery subquery3290 : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) : +- Exchange SinglePartition : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt index 3eca25bd7..87db312c3 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt @@ -7,25 +7,25 @@ WholeStageCodegen InputAdapter Exchange #8 WholeStageCodegen - HashAggregate [netpaid,count,sum,count,sum] [sum,count,sum,count] - HashAggregate [i_current_price,sum,sum(UnscaledValue(ss_net_paid)),i_manager_id,s_store_name,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [count,sum,count,netpaid,sum] [sum,count,sum,count] + HashAggregate [i_units,sum,i_color,i_size,s_state,i_manager_id,c_last_name,sum(UnscaledValue(ss_net_paid)),i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] #9 + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #9 WholeStageCodegen - HashAggregate [i_current_price,sum,i_manager_id,s_store_name,c_last_name,s_state,i_size,ss_net_paid,i_color,c_first_name,i_units,ca_state,sum] [sum,sum] - Project [i_manager_id,i_units,ca_state,i_current_price,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] + HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,sum,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [i_manager_id,c_birth_country,i_units,i_current_price,s_zip,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [i_manager_id,ss_customer_sk,i_units,i_current_price,s_zip,i_size,s_store_name,i_color,ss_net_paid,s_state] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_customer_sk,ss_item_sk,s_zip,s_store_name,ss_net_paid,s_state] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -37,13 +37,13 @@ WholeStageCodegen WholeStageCodegen Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_sk,s_zip,s_store_name,s_market_id,s_state] [s_store_sk,s_zip,s_store_name,s_market_id,s_state] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] InputAdapter BroadcastExchange #10 WholeStageCodegen - Project [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] Filter [i_item_sk] - Scan parquet default.item [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] InputAdapter BroadcastExchange #6 WholeStageCodegen @@ -56,29 +56,29 @@ WholeStageCodegen Project [ca_state,ca_zip,ca_country] Filter [ca_zip] Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] - HashAggregate [c_last_name,c_first_name,sum,s_store_name,sum(netpaid)] [sum(netpaid),paid,sum(netpaid),sum] + HashAggregate [c_first_name,s_store_name,c_last_name,sum(netpaid),sum] [sum(netpaid),paid,sum(netpaid),sum] InputAdapter Exchange [c_last_name,c_first_name,s_store_name] #1 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,netpaid,sum,s_store_name,sum] [sum,sum] - HashAggregate [i_current_price,sum(UnscaledValue(ss_net_paid)),i_manager_id,s_store_name,sum,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [c_first_name,s_store_name,c_last_name,netpaid,sum,sum] [sum,sum] + HashAggregate [i_units,sum,i_color,i_size,s_state,i_manager_id,c_last_name,sum(UnscaledValue(ss_net_paid)),i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [i_current_price,i_manager_id,s_store_name,c_last_name,s_state,i_size,i_color,c_first_name,i_units,ca_state] #2 + Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #2 WholeStageCodegen - HashAggregate [i_current_price,i_manager_id,s_store_name,sum,c_last_name,s_state,i_size,ss_net_paid,sum,i_color,c_first_name,i_units,ca_state] [sum,sum] - Project [i_manager_id,i_units,ca_state,i_current_price,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] + HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,sum,s_store_name,ca_state] [sum,sum] + Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [i_manager_id,c_birth_country,i_units,i_current_price,s_zip,i_size,s_store_name,c_last_name,c_first_name,i_color,ss_net_paid,s_state] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [i_manager_id,ss_customer_sk,i_units,i_current_price,s_zip,i_size,s_store_name,i_color,ss_net_paid,s_state] + Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_customer_sk,ss_item_sk,s_zip,s_store_name,ss_net_paid,s_state] + Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] + Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_store_sk,ss_net_paid] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -90,13 +90,13 @@ WholeStageCodegen WholeStageCodegen Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_sk,s_zip,s_store_name,s_market_id,s_state] [s_store_sk,s_zip,s_store_name,s_market_id,s_state] + Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] + Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] Filter [i_color,i_item_sk] - Scan parquet default.item [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] [i_manager_id,i_units,i_current_price,i_item_sk,i_size,i_color] + Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] InputAdapter BroadcastExchange #6 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt index afab62a0f..c1f44f753 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt @@ -22,12 +22,12 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_des : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_net_profit#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : : +- *(2) Filter ((isnotnull(cs_bill_customer_sk#23) && isnotnull(cs_item_sk#24)) && isnotnull(cs_sold_date_sk#15)) + : : : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#15,cs_bill_customer_sk#23,cs_item_sk#24,cs_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [d_date_sk#20] : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 4)) && (d_year#29 = 2001)) && isnotnull(d_date_sk#20)) @@ -38,8 +38,8 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_des : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [d_date_sk#16] - : : +- *(5) Filter (((((isnotnull(d_moy#32) && isnotnull(d_year#33)) && (d_moy#32 >= 4)) && (d_moy#32 <= 10)) && (d_year#33 = 2001)) && isnotnull(d_date_sk#16)) - : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#33,d_moy#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct + : : +- *(5) Filter (((((isnotnull(d_year#32) && isnotnull(d_moy#33)) && (d_moy#33 >= 4)) && (d_moy#33 <= 10)) && (d_year#32 = 2001)) && isnotnull(d_date_sk#16)) + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32,d_moy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] : +- *(6) Filter isnotnull(s_store_sk#14) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt index 4d8bbb2d8..20dec06b1 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt @@ -1,38 +1,38 @@ -TakeOrderedAndProject [s_store_id,s_store_name,store_sales_profit,i_item_desc,catalog_sales_profit,i_item_id,store_returns_loss] +TakeOrderedAndProject [i_item_id,i_item_desc,store_sales_profit,catalog_sales_profit,s_store_id,store_returns_loss,s_store_name] WholeStageCodegen - HashAggregate [sum(UnscaledValue(ss_net_profit)),s_store_id,sum(UnscaledValue(cs_net_profit)),s_store_name,sum(UnscaledValue(sr_net_loss)),sum,sum,i_item_desc,i_item_id,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),store_sales_profit,sum,sum,catalog_sales_profit,sum,store_returns_loss] + HashAggregate [i_item_id,i_item_desc,sum,sum,s_store_id,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit)),s_store_name,sum] [sum,store_sales_profit,sum,catalog_sales_profit,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),store_returns_loss,sum(UnscaledValue(ss_net_profit)),sum] InputAdapter Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 WholeStageCodegen - HashAggregate [s_store_id,s_store_name,sum,sum,cs_net_profit,ss_net_profit,sum,sum,sum,i_item_desc,sr_net_loss,i_item_id,sum] [sum,sum,sum,sum,sum,sum] - Project [sr_net_loss,s_store_name,i_item_id,s_store_id,ss_net_profit,cs_net_profit,i_item_desc] + HashAggregate [i_item_id,i_item_desc,sum,sum,s_store_id,cs_net_profit,sum,sum,sum,s_store_name,sum,sr_net_loss,ss_net_profit] [sum,sum,sum,sum,sum,sum] + Project [s_store_id,s_store_name,ss_net_profit,cs_net_profit,i_item_desc,sr_net_loss,i_item_id] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,sr_net_loss,s_store_name,s_store_id,ss_net_profit,cs_net_profit] + Project [s_store_id,s_store_name,ss_item_sk,ss_net_profit,cs_net_profit,sr_net_loss] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,sr_net_loss,ss_store_sk,ss_net_profit,cs_net_profit] + Project [ss_item_sk,ss_store_sk,ss_net_profit,cs_net_profit,sr_net_loss] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ss_item_sk,sr_net_loss,cs_sold_date_sk,ss_store_sk,ss_net_profit,cs_net_profit] + Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,sr_net_loss] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [ss_item_sk,sr_net_loss,cs_sold_date_sk,ss_store_sk,sr_returned_date_sk,ss_net_profit,cs_net_profit] + Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,sr_returned_date_sk,sr_net_loss] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_item_sk,sr_net_loss,cs_sold_date_sk,ss_sold_date_sk,ss_store_sk,sr_returned_date_sk,ss_net_profit,cs_net_profit] + Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,ss_sold_date_sk,sr_returned_date_sk,sr_net_loss] BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [sr_item_sk,ss_item_sk,sr_net_loss,ss_sold_date_sk,ss_store_sk,sr_customer_sk,sr_returned_date_sk,ss_net_profit] - BroadcastHashJoin [ss_customer_sk,sr_item_sk,ss_item_sk,sr_ticket_number,sr_customer_sk,ss_ticket_number] - Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_net_profit] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_sold_date_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_net_profit] + Project [ss_item_sk,ss_store_sk,sr_customer_sk,sr_item_sk,ss_net_profit,ss_sold_date_sk,sr_returned_date_sk,sr_net_loss] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [sr_ticket_number,sr_item_sk,sr_net_loss,sr_customer_sk,sr_returned_date_sk] - Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_ticket_number,sr_item_sk,sr_net_loss,sr_customer_sk,sr_returned_date_sk] [sr_ticket_number,sr_item_sk,sr_net_loss,sr_customer_sk,sr_returned_date_sk] + Project [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] - Filter [cs_item_sk,cs_bill_customer_sk,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] InputAdapter BroadcastExchange #4 @@ -50,7 +50,7 @@ TakeOrderedAndProject [s_store_id,s_store_name,store_sales_profit,i_item_desc,ca BroadcastExchange #6 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] + Filter [d_moy,d_year,d_date_sk] Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt index 5937dee3e..c5c7da628 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt @@ -16,8 +16,8 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[ : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#14,cs_bill_cdemo_sk#16,cs_item_sk#12,cs_promo_sk#10,cs_quantity#6,cs_list_price#7,cs_sales_price#9,cs_coupon_amt#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_pro..., ReadSchema: struct + : : : +- *(1) Filter ((((((isnotnull(cd_gender#18) && isnotnull(cd_marital_status#19)) && isnotnull(cd_education_status#20)) && (cd_gender#18 = M)) && (cd_marital_status#19 = S)) && (cd_education_status#20 = College)) && isnotnull(cd_demo_sk#17)) + : : : +- *(1) FileScan parquet default.customer_demographics[cd_demo_sk#17,cd_gender#18,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_g..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_date_sk#15] : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt index 667950cad..da9dc6cdf 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt @@ -1,26 +1,26 @@ -TakeOrderedAndProject [agg2,agg1,agg3,i_item_id,agg4] +TakeOrderedAndProject [i_item_id,agg3,agg1,agg2,agg4] WholeStageCodegen - HashAggregate [count,avg(UnscaledValue(cs_coupon_amt)),count,sum,sum,count,avg(cast(cs_quantity as bigint)),sum,i_item_id,sum,count,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_list_price))] [count,avg(UnscaledValue(cs_coupon_amt)),count,sum,agg2,sum,count,agg1,avg(cast(cs_quantity as bigint)),sum,agg3,sum,count,avg(UnscaledValue(cs_sales_price)),agg4,avg(UnscaledValue(cs_list_price))] + HashAggregate [i_item_id,count,avg(UnscaledValue(cs_list_price)),sum,sum,count,sum,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_coupon_amt)),count,avg(cast(cs_quantity as bigint)),sum,count] [count,avg(UnscaledValue(cs_list_price)),sum,sum,count,sum,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_coupon_amt)),agg3,count,agg1,agg2,agg4,avg(cast(cs_quantity as bigint)),sum,count] InputAdapter Exchange [i_item_id] #1 WholeStageCodegen - HashAggregate [count,cs_quantity,count,count,count,sum,cs_sales_price,sum,cs_coupon_amt,count,count,cs_list_price,count,sum,sum,i_item_id,sum,count,sum,sum,sum] [count,count,count,count,sum,sum,count,count,count,sum,sum,sum,count,sum,sum,sum] - Project [cs_quantity,cs_list_price,cs_coupon_amt,i_item_id,cs_sales_price] + HashAggregate [cs_list_price,cs_coupon_amt,i_item_id,count,count,sum,sum,count,sum,sum,count,count,count,sum,sum,sum,count,cs_sales_price,cs_quantity,sum,count] [count,count,sum,sum,count,sum,sum,count,count,count,sum,sum,sum,count,sum,count] + Project [cs_coupon_amt,cs_quantity,cs_sales_price,cs_list_price,i_item_id] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_quantity,cs_list_price,cs_promo_sk,cs_coupon_amt,i_item_id,cs_sales_price] + Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_list_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_quantity,cs_list_price,cs_item_sk,cs_promo_sk,cs_coupon_amt,cs_sales_price] + Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_item_sk,cs_list_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,cs_list_price,cs_item_sk,cs_promo_sk,cs_sold_date_sk,cs_coupon_amt,cs_sales_price] + Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_item_sk,cs_promo_sk,cs_sold_date_sk,cs_coupon_amt,cs_sales_price] + Project [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_item_sk,cs_promo_sk] - Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_item_sk,cs_promo_sk,cs_sold_date_sk,cs_coupon_amt,cs_sales_price] [cs_bill_cdemo_sk,cs_quantity,cs_list_price,cs_item_sk,cs_promo_sk,cs_sold_date_sk,cs_coupon_amt,cs_sales_price] + Scan parquet default.catalog_sales [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [cd_demo_sk] - Filter [cd_education_status,cd_gender,cd_marital_status,cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt index 5f523556c..c166032cd 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt @@ -17,8 +17,8 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,s_state#2 : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_item_sk#17,ss_cdemo_sk#23,ss_store_sk#19,ss_quantity#9,ss_list_price#10,ss_sales_price#12,ss_coupon_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- *(1) Filter ((((((isnotnull(cd_gender#25) && isnotnull(cd_marital_status#26)) && isnotnull(cd_education_status#27)) && (cd_gender#25 = M)) && (cd_marital_status#26 = S)) && (cd_education_status#27 = College)) && isnotnull(cd_demo_sk#24)) + : : : +- *(1) FileScan parquet default.customer_demographics[cd_demo_sk#24,cd_gender#25,cd_marital_status#26,cd_education_status#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_g..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_date_sk#22] : : +- *(2) Filter ((isnotnull(d_year#28) && (d_year#28 = 2002)) && isnotnull(d_date_sk#22)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt index a4169a745..5eead43da 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt @@ -1,27 +1,27 @@ -TakeOrderedAndProject [g_state,i_item_id,s_state,agg4,agg1,agg3,agg2] +TakeOrderedAndProject [agg4,i_item_id,s_state,agg3,g_state,agg1,agg2] WholeStageCodegen - HashAggregate [avg(cast(ss_quantity as bigint)),sum,spark_grouping_id,i_item_id,count,s_state,count,sum,avg(UnscaledValue(ss_list_price)),sum,count,avg(UnscaledValue(ss_sales_price)),sum,count,avg(UnscaledValue(ss_coupon_amt))] [g_state,avg(cast(ss_quantity as bigint)),sum,count,count,agg4,agg1,agg3,sum,avg(UnscaledValue(ss_list_price)),sum,count,avg(UnscaledValue(ss_sales_price)),sum,agg2,count,avg(UnscaledValue(ss_coupon_amt))] + HashAggregate [i_item_id,sum,count,count,avg(UnscaledValue(ss_list_price)),sum,count,avg(cast(ss_quantity as bigint)),sum,s_state,count,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),sum,spark_grouping_id] [agg4,sum,count,count,avg(UnscaledValue(ss_list_price)),sum,count,avg(cast(ss_quantity as bigint)),sum,agg3,count,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),sum,g_state,agg1,agg2] InputAdapter Exchange [i_item_id,s_state,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [sum,sum,spark_grouping_id,i_item_id,count,s_state,ss_quantity,count,count,ss_coupon_amt,count,sum,sum,sum,count,count,sum,sum,sum,ss_sales_price,count,ss_list_price,count] [sum,sum,count,count,count,count,sum,sum,sum,count,count,sum,sum,sum,count,count] - Expand [ss_quantity,ss_coupon_amt,i_item_id,s_state,ss_sales_price,ss_list_price] - Project [ss_quantity,ss_coupon_amt,s_state,ss_sales_price,i_item_id,ss_list_price] + HashAggregate [i_item_id,sum,sum,count,sum,count,count,sum,sum,count,sum,ss_coupon_amt,count,ss_sales_price,sum,ss_list_price,count,s_state,count,count,ss_quantity,sum,spark_grouping_id] [sum,sum,count,sum,count,count,sum,sum,count,sum,count,sum,count,count,count,sum] + Expand [i_item_id,ss_coupon_amt,ss_sales_price,ss_list_price,ss_quantity,s_state] + Project [i_item_id,ss_coupon_amt,s_state,ss_sales_price,ss_list_price,ss_quantity] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_list_price,ss_sales_price,ss_item_sk,ss_quantity,ss_coupon_amt,s_state] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,s_state,ss_sales_price] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_list_price,ss_sales_price,ss_item_sk,ss_quantity,ss_store_sk,ss_coupon_amt] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_list_price,ss_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] Filter [ss_cdemo_sk,ss_sold_date_sk,ss_store_sk,ss_item_sk] - Scan parquet default.store_sales [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] + Scan parquet default.store_sales [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [cd_demo_sk] - Filter [cd_education_status,cd_gender,cd_marital_status,cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt index a64d8fda6..2b1d35702 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt @@ -5,91 +5,91 @@ CollectLimit BroadcastNestedLoopJoin BroadcastNestedLoopJoin WholeStageCodegen - HashAggregate [count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,count(ss_list_price),count] [B1_CNTD,B1_CNT,count,count(ss_list_price),B1_LP,avg(UnscaledValue(ss_list_price)),sum,count,count(ss_list_price),count] + HashAggregate [count(ss_list_price),sum,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count] [count(ss_list_price),sum,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),B1_CNTD,B1_LP,B1_CNT,count,count] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,sum,count(ss_list_price),count,ss_list_price,count] [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,count,sum,count(ss_list_price),count,count] - HashAggregate [count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,sum,count,ss_list_price,count] [count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,sum,count,count] + HashAggregate [count(ss_list_price),count,sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),ss_list_price,count,count] [count(ss_list_price),count,sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,count] + HashAggregate [count(ss_list_price),sum,sum,count,count,avg(UnscaledValue(ss_list_price)),ss_list_price,count,count] [count(ss_list_price),sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count,count] InputAdapter Exchange [ss_list_price] #2 WholeStageCodegen - HashAggregate [count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,sum,ss_list_price,count] [count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,sum,count,count] + HashAggregate [count(ss_list_price),sum,count,avg(UnscaledValue(ss_list_price)),ss_list_price,count] [count(ss_list_price),sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #3 WholeStageCodegen - HashAggregate [count,avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),count(ss_list_price),count,sum] [B2_LP,count,avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),B2_CNT,count(ss_list_price),count,B2_CNTD,sum] + HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),sum] [count,B2_CNT,count,count(ss_list_price),B2_LP,avg(UnscaledValue(ss_list_price)),count,B2_CNTD,count(ss_list_price),sum] InputAdapter Exchange #4 WholeStageCodegen - HashAggregate [count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),count,sum,sum,ss_list_price,count,count] [count,count,avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),count(ss_list_price),count,sum,sum,count,count] - HashAggregate [count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,sum,sum,ss_list_price,count] [count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,sum,sum,count] + HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,ss_list_price,sum,count,count,count(ss_list_price),sum] [count,count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,sum,count,count,count(ss_list_price),sum] + HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,ss_list_price,sum,count,sum] [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,sum,count,sum] InputAdapter Exchange [ss_list_price] #5 WholeStageCodegen - HashAggregate [count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,ss_list_price,count] [count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,sum,sum,count] + HashAggregate [count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),ss_list_price,sum,count] [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,sum,count,sum] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #6 WholeStageCodegen - HashAggregate [count,avg(UnscaledValue(ss_list_price)),sum,count,count(ss_list_price),count,count(ss_list_price)] [B3_CNTD,count,avg(UnscaledValue(ss_list_price)),sum,count,B3_CNT,B3_LP,count(ss_list_price),count,count(ss_list_price)] + HashAggregate [count(ss_list_price),count,count,count,count(ss_list_price),sum,avg(UnscaledValue(ss_list_price))] [count(ss_list_price),B3_CNTD,B3_CNT,count,B3_LP,count,count,count(ss_list_price),sum,avg(UnscaledValue(ss_list_price))] InputAdapter Exchange #7 WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count,count,count,ss_list_price,count(ss_list_price),count,count,count(ss_list_price),sum] [count,avg(UnscaledValue(ss_list_price)),sum,count,count,count,count(ss_list_price),count,count,count(ss_list_price),sum] - HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count,count,count,ss_list_price,count,count(ss_list_price),sum] [avg(UnscaledValue(ss_list_price)),sum,count,count,count,count,count(ss_list_price),sum] + HashAggregate [count(ss_list_price),sum,count,count,ss_list_price,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] [count(ss_list_price),sum,count,count,count,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] + HashAggregate [sum,count,ss_list_price,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] [sum,count,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] InputAdapter Exchange [ss_list_price] #8 WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,ss_list_price,count(ss_list_price),sum] [avg(UnscaledValue(ss_list_price)),sum,count,count,count,count,count(ss_list_price),sum] + HashAggregate [sum,ss_list_price,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price))] [sum,count,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #9 WholeStageCodegen - HashAggregate [count,sum,count,avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),count(ss_list_price)] [B4_CNT,B4_CNTD,count,sum,count,avg(UnscaledValue(ss_list_price)),count,B4_LP,count(ss_list_price),count(ss_list_price)] + HashAggregate [count,count,count(ss_list_price),count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum] [count,count,count(ss_list_price),count,avg(UnscaledValue(ss_list_price)),B4_LP,count(ss_list_price),B4_CNTD,sum,B4_CNT] InputAdapter Exchange #10 WholeStageCodegen - HashAggregate [sum,count,count,sum,count,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),count(ss_list_price),ss_list_price] [sum,count,count,count,sum,count,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),count(ss_list_price)] - HashAggregate [sum,count,sum,count,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price] [sum,count,sum,count,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price)] + HashAggregate [count,count,count(ss_list_price),count,count,count,avg(UnscaledValue(ss_list_price)),ss_list_price,count(ss_list_price),sum,sum] [count,count,count,count(ss_list_price),count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,sum] + HashAggregate [count,count,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price)),ss_list_price,sum,sum] [count,count,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price)),sum,sum] InputAdapter Exchange [ss_list_price] #11 WholeStageCodegen - HashAggregate [sum,count,avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),ss_list_price] [sum,count,sum,count,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price)] + HashAggregate [count,count(ss_list_price),count,avg(UnscaledValue(ss_list_price)),ss_list_price,sum] [count,count,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price)),sum,sum] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #12 WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,sum,count(ss_list_price),count(ss_list_price),count] [avg(UnscaledValue(ss_list_price)),count,B5_CNT,B5_CNTD,count,sum,count(ss_list_price),count(ss_list_price),count,B5_LP] + HashAggregate [count(ss_list_price),count,sum,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count] [count(ss_list_price),count,B5_LP,sum,avg(UnscaledValue(ss_list_price)),B5_CNT,count(ss_list_price),count,B5_CNTD,count] InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count,sum,count(ss_list_price),count,count(ss_list_price),count,ss_list_price,count,count] [avg(UnscaledValue(ss_list_price)),count,sum,count,sum,count(ss_list_price),count,count(ss_list_price),count,count,count] - HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count,sum,count(ss_list_price),count,ss_list_price,count,count] [avg(UnscaledValue(ss_list_price)),sum,count,sum,count(ss_list_price),count,count,count] + HashAggregate [count(ss_list_price),sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,ss_list_price,count,count,count] [count(ss_list_price),count,sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,count,count,count] + HashAggregate [sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,ss_list_price,count,count,count] [sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,count,count] InputAdapter Exchange [ss_list_price] #14 WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),ss_list_price,count,count] [avg(UnscaledValue(ss_list_price)),sum,count,sum,count(ss_list_price),count,count,count] + HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,ss_list_price,count] [sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,count,count] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #15 WholeStageCodegen - HashAggregate [count,sum,count(ss_list_price),count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count] [count,B6_CNTD,B6_LP,sum,count(ss_list_price),count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),B6_CNT,count] + HashAggregate [count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,count(ss_list_price),sum] [count,B6_CNT,B6_LP,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,B6_CNTD,count(ss_list_price),sum] InputAdapter Exchange #16 WholeStageCodegen - HashAggregate [count,count,count,sum,count(ss_list_price),count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,count,ss_list_price] [count,count,count,sum,count(ss_list_price),count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,count] - HashAggregate [count,count,count,sum,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count,ss_list_price] [count,count,count,sum,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count] + HashAggregate [sum,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,ss_list_price,count(ss_list_price),count,count,count,sum] [count,sum,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,count(ss_list_price),count,count,count,sum] + HashAggregate [sum,avg(UnscaledValue(ss_list_price)),count,count,ss_list_price,count(ss_list_price),count,count,sum] [sum,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),count,count,sum] InputAdapter Exchange [ss_list_price] #17 WholeStageCodegen - HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,ss_list_price] [count,count,count,sum,count(ss_list_price),avg(UnscaledValue(ss_list_price)),sum,count] + HashAggregate [sum,avg(UnscaledValue(ss_list_price)),ss_list_price,count(ss_list_price),count,count] [sum,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),count,count,sum] Project [ss_list_price] Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt index f4df69036..6bc3dd939 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt @@ -22,12 +22,12 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_des : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_quantity#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : : +- *(2) Filter ((isnotnull(cs_bill_customer_sk#23) && isnotnull(cs_item_sk#24)) && isnotnull(cs_sold_date_sk#15)) + : : : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#15,cs_bill_customer_sk#23,cs_item_sk#24,cs_quantity#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [d_date_sk#20] : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 9)) && (d_year#29 = 1999)) && isnotnull(d_date_sk#20)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt index 12dc662c5..64fa8afa9 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt @@ -1,38 +1,38 @@ -TakeOrderedAndProject [s_store_id,store_returns_quantity,s_store_name,catalog_sales_quantity,i_item_desc,i_item_id,store_sales_quantity] +TakeOrderedAndProject [store_returns_quantity,i_item_id,i_item_desc,store_sales_quantity,s_store_id,catalog_sales_quantity,s_store_name] WholeStageCodegen - HashAggregate [s_store_id,s_store_name,sum,i_item_desc,sum(cast(ss_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum,sum(cast(cs_quantity as bigint)),i_item_id,sum] [store_returns_quantity,sum,catalog_sales_quantity,sum(cast(ss_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum,sum(cast(cs_quantity as bigint)),sum,store_sales_quantity] + HashAggregate [sum(cast(cs_quantity as bigint)),i_item_id,i_item_desc,sum,sum(cast(ss_quantity as bigint)),sum,sum(cast(sr_return_quantity as bigint)),s_store_id,sum,s_store_name] [sum(cast(cs_quantity as bigint)),store_returns_quantity,sum,sum(cast(ss_quantity as bigint)),store_sales_quantity,sum,sum(cast(sr_return_quantity as bigint)),sum,catalog_sales_quantity] InputAdapter Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 WholeStageCodegen - HashAggregate [cs_quantity,s_store_id,sr_return_quantity,s_store_name,sum,ss_quantity,sum,sum,i_item_desc,sum,sum,i_item_id,sum] [sum,sum,sum,sum,sum,sum] - Project [cs_quantity,ss_quantity,s_store_name,i_item_id,sr_return_quantity,s_store_id,i_item_desc] + HashAggregate [i_item_id,sum,i_item_desc,sum,sum,sum,s_store_id,sr_return_quantity,sum,sum,ss_quantity,cs_quantity,s_store_name] [sum,sum,sum,sum,sum,sum] + Project [s_store_id,s_store_name,ss_quantity,cs_quantity,sr_return_quantity,i_item_desc,i_item_id] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [cs_quantity,ss_item_sk,ss_quantity,s_store_name,sr_return_quantity,s_store_id] + Project [s_store_id,s_store_name,ss_quantity,ss_item_sk,cs_quantity,sr_return_quantity] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [cs_quantity,ss_item_sk,ss_quantity,sr_return_quantity,ss_store_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,sr_return_quantity,ss_store_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,sr_return_quantity,ss_store_sk,sr_returned_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,sr_returned_date_sk] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [cs_quantity,ss_item_sk,ss_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_return_quantity,ss_store_sk,sr_returned_date_sk] + Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_returned_date_sk] BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [sr_item_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,ss_store_sk,sr_customer_sk,sr_returned_date_sk] - BroadcastHashJoin [ss_customer_sk,sr_item_sk,ss_item_sk,sr_ticket_number,sr_customer_sk,ss_ticket_number] - Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_sold_date_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,sr_return_quantity,sr_customer_sk,sr_item_sk,ss_sold_date_sk,sr_returned_date_sk] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] - Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] [sr_ticket_number,sr_item_sk,sr_return_quantity,sr_customer_sk,sr_returned_date_sk] + Project [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] - Filter [cs_item_sk,cs_bill_customer_sk,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] InputAdapter BroadcastExchange #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt index 752b64b70..a8432c52f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt @@ -1,10 +1,10 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand] WholeStageCodegen - HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),i_brand_id,sum,d_year,i_brand] [sum(UnscaledValue(ss_ext_sales_price)),brand,sum,sum_agg,brand_id] + HashAggregate [d_year,i_brand,sum(UnscaledValue(ss_ext_sales_price)),i_brand_id,sum] [brand_id,sum(UnscaledValue(ss_ext_sales_price)),sum_agg,sum,brand] InputAdapter Exchange [d_year,i_brand,i_brand_id] #1 WholeStageCodegen - HashAggregate [i_brand_id,sum,sum,d_year,ss_ext_sales_price,i_brand] [sum,sum] + HashAggregate [sum,d_year,i_brand,ss_ext_sales_price,i_brand_id,sum] [sum,sum] Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [d_year,ss_item_sk,ss_ext_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt index 877ba4e42..1be056d32 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt @@ -1,17 +1,17 @@ -TakeOrderedAndProject [c_last_review_date,ctr_total_return,c_email_address,c_birth_month,c_login,c_last_name,c_customer_id,c_birth_country,c_birth_day,c_birth_year,c_preferred_cust_flag,c_first_name,c_salutation] +TakeOrderedAndProject [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth_month,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_birth_year,c_last_review_date] WholeStageCodegen - Project [c_last_review_date,ctr_total_return,c_email_address,c_birth_month,c_login,c_last_name,c_customer_id,c_birth_country,c_birth_day,c_birth_year,c_preferred_cust_flag,c_first_name,c_salutation] + Project [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth_month,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_birth_year,c_last_review_date] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_birth_day,c_customer_id,c_birth_country,c_login,ctr_total_return,c_birth_year,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_salutation,c_current_addr_sk,c_last_review_date,c_birth_month] + Project [c_preferred_cust_flag,c_current_addr_sk,ctr_total_return,c_email_address,c_customer_id,c_birth_year,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] BroadcastHashJoin [ctr_customer_sk,c_customer_sk] Project [ctr_customer_sk,ctr_total_return] BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] Filter [ctr_total_return] - HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [sum(UnscaledValue(wr_return_amt)),ctr_state,sum,ctr_total_return,ctr_customer_sk] + HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [ctr_total_return,ctr_state,sum(UnscaledValue(wr_return_amt)),sum,ctr_customer_sk] InputAdapter Exchange [wr_returning_customer_sk,ca_state] #1 WholeStageCodegen - HashAggregate [sum,sum,wr_returning_customer_sk,wr_return_amt,ca_state] [sum,sum] + HashAggregate [wr_return_amt,sum,wr_returning_customer_sk,sum,ca_state] [sum,sum] Project [wr_returning_customer_sk,wr_return_amt,ca_state] BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] @@ -35,16 +35,16 @@ TakeOrderedAndProject [c_last_review_date,ctr_total_return,c_email_address,c_bir BroadcastExchange #4 WholeStageCodegen Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [ctr_state,sum,count,avg(ctr_total_return)] [sum,avg(ctr_total_return),count,ctr_state,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [ctr_state,sum,count,avg(ctr_total_return)] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),sum,ctr_state,avg(ctr_total_return),count] InputAdapter Exchange [ctr_state] #5 WholeStageCodegen - HashAggregate [sum,ctr_state,sum,ctr_total_return,count,count] [sum,count,sum,count] + HashAggregate [ctr_state,count,sum,ctr_total_return,count,sum] [sum,count,sum,count] HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [sum(UnscaledValue(wr_return_amt)),ctr_state,ctr_total_return,sum] InputAdapter Exchange [wr_returning_customer_sk,ca_state] #6 WholeStageCodegen - HashAggregate [sum,sum,wr_returning_customer_sk,wr_return_amt,ca_state] [sum,sum] + HashAggregate [sum,wr_return_amt,sum,wr_returning_customer_sk,ca_state] [sum,sum] Project [wr_returning_customer_sk,wr_return_amt,ca_state] BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] @@ -59,9 +59,9 @@ TakeOrderedAndProject [c_last_review_date,ctr_total_return,c_email_address,c_bir InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [c_birth_day,c_customer_id,c_birth_country,c_login,c_birth_year,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address,c_salutation,c_current_addr_sk,c_last_review_date,c_birth_month] + Project [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_birth_day,c_customer_id,c_birth_country,c_login,c_birth_year,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address,c_salutation,c_current_addr_sk,c_last_review_date,c_birth_month] [c_birth_day,c_customer_id,c_birth_country,c_login,c_birth_year,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address,c_salutation,c_current_addr_sk,c_last_review_date,c_birth_month] + Scan parquet default.customer [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] InputAdapter BroadcastExchange #8 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt index ce6e8c51c..78c6383a4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt @@ -3,19 +3,19 @@ WholeStageCodegen InputAdapter Exchange [ca_county] #1 WholeStageCodegen - Project [d_year,store_sales,web_sales,web_sales,store_sales,ca_county,store_sales,web_sales] - BroadcastHashJoin [web_sales,ca_county,store_sales,store_sales,ca_county,web_sales] - Project [web_sales,ca_county,store_sales,web_sales,d_year,store_sales,store_sales,ca_county] - BroadcastHashJoin [store_sales,web_sales,store_sales,ca_county,ca_county,web_sales] + Project [web_sales,store_sales,store_sales,web_sales,d_year,store_sales,web_sales,ca_county] + BroadcastHashJoin [web_sales,ca_county,store_sales,ca_county,store_sales,web_sales] + Project [d_year,store_sales,store_sales,ca_county,web_sales,ca_county,store_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,store_sales,web_sales,store_sales,web_sales] BroadcastHashJoin [ca_county,ca_county] - Project [store_sales,d_year,store_sales,store_sales,ca_county] + Project [d_year,store_sales,store_sales,ca_county,store_sales] BroadcastHashJoin [ca_county,ca_county] BroadcastHashJoin [ca_county,ca_county] - HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price)),d_year,d_qoy,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + HashAggregate [d_year,d_qoy,ca_county,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #2 WholeStageCodegen - HashAggregate [sum,sum,d_year,ss_ext_sales_price,d_qoy,ca_county] [sum,sum] + HashAggregate [d_year,d_qoy,sum,ca_county,sum,ss_ext_sales_price] [sum,sum] Project [ss_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ss_addr_sk,ca_address_sk] Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] @@ -38,11 +38,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #5 WholeStageCodegen - HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),ca_county,d_year,d_qoy,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + HashAggregate [d_qoy,d_year,sum(UnscaledValue(ss_ext_sales_price)),sum,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #6 WholeStageCodegen - HashAggregate [ca_county,d_year,d_qoy,ss_ext_sales_price,sum,sum] [sum,sum] + HashAggregate [sum,d_qoy,d_year,sum,ss_ext_sales_price,ca_county] [sum,sum] Project [ss_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ss_addr_sk,ca_address_sk] Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] @@ -61,11 +61,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #8 WholeStageCodegen - HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),ca_county,d_year,d_qoy,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price)),d_year,d_qoy,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #9 WholeStageCodegen - HashAggregate [sum,ca_county,ss_ext_sales_price,d_year,d_qoy,sum] [sum,sum] + HashAggregate [sum,d_year,d_qoy,sum,ss_ext_sales_price,ca_county] [sum,sum] Project [ss_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ss_addr_sk,ca_address_sk] Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] @@ -84,11 +84,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #11 WholeStageCodegen - HashAggregate [ca_county,d_year,sum(UnscaledValue(ws_ext_sales_price)),sum,d_qoy] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + HashAggregate [sum,ca_county,d_qoy,d_year,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #12 WholeStageCodegen - HashAggregate [ca_county,d_year,sum,d_qoy,ws_ext_sales_price,sum] [sum,sum] + HashAggregate [sum,ws_ext_sales_price,ca_county,d_qoy,d_year,sum] [sum,sum] Project [ws_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] @@ -103,11 +103,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #13 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ws_ext_sales_price)),ca_county,d_qoy,d_year] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + HashAggregate [d_qoy,ca_county,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #14 WholeStageCodegen - HashAggregate [sum,sum,ws_ext_sales_price,ca_county,d_qoy,d_year] [sum,sum] + HashAggregate [d_qoy,ca_county,d_year,ws_ext_sales_price,sum,sum] [sum,sum] Project [ws_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] @@ -122,11 +122,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #15 WholeStageCodegen - HashAggregate [sum,d_year,d_qoy,sum(UnscaledValue(ws_ext_sales_price)),ca_county] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + HashAggregate [sum,d_qoy,ca_county,d_year,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #16 WholeStageCodegen - HashAggregate [sum,d_year,d_qoy,ws_ext_sales_price,sum,ca_county] [sum,sum] + HashAggregate [sum,sum,d_qoy,ca_county,ws_ext_sales_price,d_year] [sum,sum] Project [ws_ext_sales_price,d_year,d_qoy,ca_county] BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt index cd99b716a..1da0b46e1 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt @@ -19,11 +19,11 @@ CollectLimit BroadcastExchange #2 WholeStageCodegen Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [cs_item_sk,sum,count,avg(UnscaledValue(cs_ext_discount_amt))] [count,avg(UnscaledValue(cs_ext_discount_amt)),sum,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),cs_item_sk] + HashAggregate [cs_item_sk,sum,count,avg(UnscaledValue(cs_ext_discount_amt))] [avg(UnscaledValue(cs_ext_discount_amt)),(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),sum,count,cs_item_sk] InputAdapter Exchange [cs_item_sk] #3 WholeStageCodegen - HashAggregate [count,sum,cs_item_sk,sum,cs_ext_discount_amt,count] [sum,count,sum,count] + HashAggregate [cs_ext_discount_amt,count,sum,cs_item_sk,sum,count] [sum,count,sum,count] Project [cs_item_sk,cs_ext_discount_amt] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] Project [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt index 8518531e3..70572bb58 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt @@ -3,7 +3,7 @@ WholeStageCodegen InputAdapter Exchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] #1 WholeStageCodegen - Project [cnt,ss_ticket_number,c_last_name,c_preferred_cust_flag,c_first_name,c_salutation] + Project [ss_ticket_number,c_salutation,cnt,c_last_name,c_preferred_cust_flag,c_first_name] BroadcastHashJoin [ss_customer_sk,c_customer_sk] Filter [cnt] HashAggregate [ss_ticket_number,ss_customer_sk,count,count(1)] [count(1),cnt,count] @@ -17,9 +17,9 @@ WholeStageCodegen BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] + Project [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -41,6 +41,6 @@ WholeStageCodegen InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] + Project [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] Filter [c_customer_sk] - Scan parquet default.customer [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] + Scan parquet default.customer [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt index 26426d01b..5fd7a355f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt @@ -1,11 +1,11 @@ -TakeOrderedAndProject [cd_dep_employed_count,max(cd_dep_employed_count),cnt2,max(cd_dep_count),cd_gender,cd_dep_college_count,avg(cd_dep_count),avg(cd_dep_college_count),max(cd_dep_college_count),cnt3,cd_marital_status,ca_state,aggOrder,avg(cd_dep_employed_count),min(cd_dep_employed_count),cnt1,min(cd_dep_count),min(cd_dep_college_count)] +TakeOrderedAndProject [cnt3,max(cd_dep_employed_count),max(cd_dep_count),min(cd_dep_count),min(cd_dep_college_count),min(cd_dep_employed_count),max(cd_dep_college_count),ca_state,avg(cd_dep_count),avg(cd_dep_college_count),avg(cd_dep_employed_count),aggOrder,cd_dep_college_count,cnt2,cd_marital_status,cd_gender,cd_dep_employed_count,cnt1] WholeStageCodegen - HashAggregate [count,min,min(cd_dep_college_count),cd_dep_employed_count,min(cd_dep_employed_count),max,cd_gender,cd_dep_college_count,count,cd_dep_count,min(cd_dep_count),count,max,avg(cast(cd_dep_employed_count as bigint)),cd_marital_status,max(cd_dep_college_count),min,max(cd_dep_count),sum,ca_state,max(cd_dep_employed_count),sum,avg(cast(cd_dep_college_count as bigint)),min,count(1),sum,max,count,avg(cast(cd_dep_count as bigint))] [count,min,min(cd_dep_college_count),max(cd_dep_employed_count),min(cd_dep_employed_count),cnt2,max(cd_dep_count),max,count,min(cd_dep_count),avg(cd_dep_count),count,max,avg(cd_dep_college_count),avg(cast(cd_dep_employed_count as bigint)),max(cd_dep_college_count),cnt3,max(cd_dep_college_count),min,max(cd_dep_count),sum,aggOrder,avg(cd_dep_employed_count),max(cd_dep_employed_count),sum,avg(cast(cd_dep_college_count as bigint)),min,count(1),sum,min(cd_dep_employed_count),max,cnt1,min(cd_dep_count),min(cd_dep_college_count),count,avg(cast(cd_dep_count as bigint))] + HashAggregate [avg(cast(cd_dep_employed_count as bigint)),min,min(cd_dep_college_count),sum,max(cd_dep_college_count),sum,sum,ca_state,count,max,count,min,avg(cast(cd_dep_college_count as bigint)),max(cd_dep_count),max,avg(cast(cd_dep_count as bigint)),min(cd_dep_employed_count),cd_dep_college_count,count(1),cd_dep_count,min(cd_dep_count),cd_marital_status,cd_gender,count,max(cd_dep_employed_count),count,max,cd_dep_employed_count,min] [cnt3,avg(cast(cd_dep_employed_count as bigint)),min,max(cd_dep_employed_count),min(cd_dep_college_count),sum,max(cd_dep_college_count),max(cd_dep_count),sum,min(cd_dep_count),sum,min(cd_dep_college_count),min(cd_dep_employed_count),max(cd_dep_college_count),count,avg(cd_dep_count),max,count,min,avg(cast(cd_dep_college_count as bigint)),max(cd_dep_count),avg(cd_dep_college_count),max,avg(cd_dep_employed_count),avg(cast(cd_dep_count as bigint)),min(cd_dep_employed_count),aggOrder,count(1),cnt2,min(cd_dep_count),count,max(cd_dep_employed_count),count,max,min,cnt1] InputAdapter - Exchange [cd_dep_employed_count,cd_gender,cd_dep_college_count,cd_dep_count,cd_marital_status,ca_state] #1 + Exchange [ca_state,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] #1 WholeStageCodegen - HashAggregate [count,min,max,min,cd_dep_employed_count,count,max,cd_gender,cd_dep_college_count,count,cd_dep_count,count,max,count,max,cd_marital_status,min,sum,ca_state,max,sum,min,sum,sum,min,sum,max,min,count,sum,count,count] [count,min,max,min,count,max,count,count,max,count,max,min,sum,max,sum,min,sum,sum,min,sum,max,min,count,sum,count,count] - Project [cd_gender,cd_dep_count,ca_state,cd_dep_college_count,cd_marital_status,cd_dep_employed_count] + HashAggregate [sum,min,sum,sum,min,sum,sum,sum,max,ca_state,max,count,count,max,count,count,min,count,min,max,max,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,count,count,min,count,max,cd_dep_employed_count,min] [sum,min,sum,sum,min,sum,sum,sum,max,max,count,count,max,count,count,min,count,min,max,max,count,count,min,count,max,min] + Project [cd_dep_college_count,ca_state,cd_dep_employed_count,cd_marital_status,cd_dep_count,cd_gender] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk,ca_state] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] @@ -60,6 +60,6 @@ TakeOrderedAndProject [cd_dep_employed_count,max(cd_dep_employed_count),cnt2,max InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [cd_gender,cd_dep_count,cd_dep_college_count,cd_marital_status,cd_demo_sk,cd_dep_employed_count] + Project [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_gender] Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_gender,cd_dep_count,cd_dep_college_count,cd_marital_status,cd_demo_sk,cd_dep_employed_count] [cd_gender,cd_dep_count,cd_dep_college_count,cd_marital_status,cd_demo_sk,cd_dep_employed_count] + Scan parquet default.customer_demographics [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_gender] [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_gender] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt index 271b19e1b..ac9f32c0a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [gross_margin,i_category,rank_within_parent,lochierarchy,i_class] +TakeOrderedAndProject [i_class,i_category,lochierarchy,gross_margin,rank_within_parent] WholeStageCodegen - Project [gross_margin,i_category,rank_within_parent,lochierarchy,i_class] + Project [i_class,i_category,lochierarchy,gross_margin,rank_within_parent] InputAdapter Window [_w3,_w1,_w2] WholeStageCodegen @@ -8,21 +8,21 @@ TakeOrderedAndProject [gross_margin,i_category,rank_within_parent,lochierarchy,i InputAdapter Exchange [_w1,_w2] #1 WholeStageCodegen - HashAggregate [i_category,sum,sum,sum(UnscaledValue(ss_net_profit)),i_class,sum(UnscaledValue(ss_ext_sales_price)),spark_grouping_id] [_w2,gross_margin,_w3,sum,_w1,sum,lochierarchy,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price))] + HashAggregate [sum,i_class,i_category,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),spark_grouping_id] [_w2,sum,_w1,sum,lochierarchy,sum(UnscaledValue(ss_net_profit)),_w3,sum(UnscaledValue(ss_ext_sales_price)),gross_margin] InputAdapter Exchange [i_category,i_class,spark_grouping_id] #2 WholeStageCodegen - HashAggregate [i_category,sum,ss_net_profit,sum,ss_ext_sales_price,sum,i_class,sum,spark_grouping_id] [sum,sum,sum,sum] + HashAggregate [sum,sum,sum,i_class,i_category,sum,ss_ext_sales_price,spark_grouping_id,ss_net_profit] [sum,sum,sum,sum] Expand [ss_ext_sales_price,ss_net_profit,i_category,i_class] Project [ss_ext_sales_price,ss_net_profit,i_category,i_class] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_category,ss_store_sk,ss_ext_sales_price,i_class,ss_net_profit] + Project [i_class,ss_store_sk,ss_ext_sales_price,i_category,ss_net_profit] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] [ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt index 919606c5d..afe830f9d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt @@ -9,11 +9,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] BroadcastHashJoin [i_item_sk,cs_item_sk] Project [i_item_sk,i_item_id,i_item_desc,i_current_price] BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [i_current_price,i_item_sk,inv_date_sk,i_item_id,i_item_desc] + Project [i_current_price,i_item_sk,inv_date_sk,i_item_desc,i_item_id] BroadcastHashJoin [i_item_sk,inv_item_sk] Project [i_item_sk,i_item_id,i_item_desc,i_current_price] Filter [i_current_price,i_manufact_id,i_item_sk] - Scan parquet default.item [i_manufact_id,i_current_price,i_item_sk,i_item_id,i_item_desc] [i_manufact_id,i_current_price,i_item_sk,i_item_id,i_item_desc] + Scan parquet default.item [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt index 33d987376..caec9c677 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt @@ -7,10 +7,10 @@ CollectLimit HashAggregate [count,count] [count,count] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_first_name,d_date,c_last_name,d_date,c_last_name,c_first_name] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,d_date,c_first_name] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [d_date,d_date,c_last_name,c_first_name,c_first_name,c_last_name] + BroadcastHashJoin [c_first_name,d_date,d_date,c_last_name,c_last_name,c_first_name] HashAggregate [c_last_name,c_first_name,d_date] InputAdapter Exchange [c_last_name,c_first_name,d_date] #2 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt index 1618f51b2..0fadf4ea4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt @@ -1,19 +1,19 @@ WholeStageCodegen - Sort [mean,i_item_sk,mean,cov,d_moy,d_moy,cov,w_warehouse_sk] + Sort [d_moy,mean,i_item_sk,w_warehouse_sk,d_moy,mean,cov,cov] InputAdapter - Exchange [mean,i_item_sk,mean,cov,d_moy,d_moy,cov,w_warehouse_sk] #1 + Exchange [d_moy,mean,i_item_sk,w_warehouse_sk,d_moy,mean,cov,cov] #1 WholeStageCodegen BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] - Project [w_warehouse_sk,i_item_sk,stdev,mean,d_moy] + Project [d_moy,mean,w_warehouse_sk,i_item_sk,stdev] Filter [mean,stdev] - HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),w_warehouse_sk,count,d_moy,w_warehouse_name,avg,n,m2,sum,i_item_sk,stddev_samp(cast(inv_quantity_on_hand as double))] [avg(cast(inv_quantity_on_hand as bigint)),count,avg,n,mean,m2,sum,stdev,stddev_samp(cast(inv_quantity_on_hand as double))] + HashAggregate [m2,w_warehouse_sk,d_moy,sum,count,n,w_warehouse_name,avg,stddev_samp(cast(inv_quantity_on_hand as double)),i_item_sk,avg(cast(inv_quantity_on_hand as bigint))] [stdev,mean,m2,sum,count,n,avg,stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint))] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 WholeStageCodegen - HashAggregate [sum,w_warehouse_sk,count,d_moy,inv_quantity_on_hand,w_warehouse_name,avg,n,m2,n,m2,sum,count,avg,i_item_sk] [sum,count,avg,n,m2,n,m2,sum,count,avg] - Project [w_warehouse_name,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,d_moy] + HashAggregate [m2,w_warehouse_sk,d_moy,m2,sum,count,n,w_warehouse_name,sum,avg,i_item_sk,inv_quantity_on_hand,avg,count,n] [m2,m2,sum,count,n,sum,avg,avg,count,n] + Project [d_moy,inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name] BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [w_warehouse_name,inv_quantity_on_hand,i_item_sk,inv_date_sk,w_warehouse_sk] + Project [inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name,inv_date_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] BroadcastHashJoin [inv_item_sk,i_item_sk] @@ -41,16 +41,16 @@ WholeStageCodegen InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [d_moy,mean,i_item_sk,stdev,w_warehouse_sk] + Project [i_item_sk,d_moy,mean,stdev,w_warehouse_sk] Filter [mean,stdev] - HashAggregate [w_warehouse_name,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,avg,n,w_warehouse_sk,m2,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg(cast(inv_quantity_on_hand as bigint)),mean,count,avg,n,m2,sum,stdev,stddev_samp(cast(inv_quantity_on_hand as double))] + HashAggregate [m2,i_item_sk,sum,count,w_warehouse_name,n,d_moy,avg,stddev_samp(cast(inv_quantity_on_hand as double)),w_warehouse_sk,avg(cast(inv_quantity_on_hand as bigint))] [m2,sum,count,n,mean,avg,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,avg(cast(inv_quantity_on_hand as bigint))] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 WholeStageCodegen - HashAggregate [w_warehouse_name,sum,count,d_moy,i_item_sk,inv_quantity_on_hand,avg,n,w_warehouse_sk,m2,n,m2,sum,count,avg] [sum,count,avg,n,m2,n,m2,sum,count,avg] - Project [i_item_sk,w_warehouse_name,inv_quantity_on_hand,w_warehouse_sk,d_moy] + HashAggregate [m2,m2,i_item_sk,sum,count,w_warehouse_name,n,d_moy,sum,avg,w_warehouse_sk,inv_quantity_on_hand,avg,count,n] [m2,m2,sum,count,n,sum,avg,avg,count,n] + Project [w_warehouse_sk,inv_quantity_on_hand,w_warehouse_name,i_item_sk,d_moy] BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [i_item_sk,w_warehouse_name,inv_quantity_on_hand,w_warehouse_sk,inv_date_sk] + Project [w_warehouse_sk,inv_quantity_on_hand,w_warehouse_name,i_item_sk,inv_date_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] BroadcastHashJoin [inv_item_sk,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt index 83d28dd06..846b7cca1 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt @@ -1,19 +1,19 @@ WholeStageCodegen - Sort [cov,mean,i_item_sk,d_moy,cov,w_warehouse_sk,d_moy,mean] + Sort [i_item_sk,mean,w_warehouse_sk,d_moy,mean,cov,d_moy,cov] InputAdapter - Exchange [cov,mean,i_item_sk,d_moy,cov,w_warehouse_sk,d_moy,mean] #1 + Exchange [i_item_sk,mean,w_warehouse_sk,d_moy,mean,cov,d_moy,cov] #1 WholeStageCodegen BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] - Project [stdev,mean,w_warehouse_sk,i_item_sk,d_moy] + Project [d_moy,w_warehouse_sk,i_item_sk,stdev,mean] Filter [mean,stdev] - HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),m2,count,w_warehouse_sk,avg,d_moy,w_warehouse_name,sum,stddev_samp(cast(inv_quantity_on_hand as double)),i_item_sk,n] [avg(cast(inv_quantity_on_hand as bigint)),m2,count,avg,mean,stdev,sum,stddev_samp(cast(inv_quantity_on_hand as double)),n] + HashAggregate [avg,w_warehouse_sk,d_moy,w_warehouse_name,m2,avg(cast(inv_quantity_on_hand as bigint)),i_item_sk,n,count,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg,m2,stdev,avg(cast(inv_quantity_on_hand as bigint)),n,count,mean,sum,stddev_samp(cast(inv_quantity_on_hand as double))] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 WholeStageCodegen - HashAggregate [m2,sum,count,w_warehouse_sk,avg,d_moy,inv_quantity_on_hand,w_warehouse_name,m2,avg,n,sum,i_item_sk,n,count] [m2,sum,count,avg,m2,avg,n,sum,n,count] - Project [w_warehouse_name,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,d_moy] + HashAggregate [avg,n,w_warehouse_sk,d_moy,count,w_warehouse_name,m2,sum,i_item_sk,inv_quantity_on_hand,m2,n,avg,count,sum] [avg,n,count,m2,sum,m2,n,avg,count,sum] + Project [d_moy,inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name] BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [w_warehouse_name,inv_quantity_on_hand,i_item_sk,inv_date_sk,w_warehouse_sk] + Project [inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name,inv_date_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] BroadcastHashJoin [inv_item_sk,i_item_sk] @@ -41,16 +41,16 @@ WholeStageCodegen InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [i_item_sk,d_moy,stdev,w_warehouse_sk,mean] + Project [mean,stdev,d_moy,w_warehouse_sk,i_item_sk] Filter [mean,stdev] - HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),m2,count,avg,w_warehouse_name,sum,i_item_sk,stddev_samp(cast(inv_quantity_on_hand as double)),n,d_moy,w_warehouse_sk] [avg(cast(inv_quantity_on_hand as bigint)),m2,count,avg,mean,sum,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,n] + HashAggregate [avg,m2,avg(cast(inv_quantity_on_hand as bigint)),d_moy,w_warehouse_sk,n,i_item_sk,count,w_warehouse_name,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg,stdev,m2,avg(cast(inv_quantity_on_hand as bigint)),n,count,sum,mean,stddev_samp(cast(inv_quantity_on_hand as double))] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 WholeStageCodegen - HashAggregate [m2,sum,count,avg,inv_quantity_on_hand,m2,avg,w_warehouse_name,n,sum,i_item_sk,n,d_moy,w_warehouse_sk,count] [m2,sum,count,avg,m2,avg,n,sum,n,count] - Project [inv_quantity_on_hand,w_warehouse_sk,d_moy,i_item_sk,w_warehouse_name] + HashAggregate [avg,n,count,m2,sum,d_moy,w_warehouse_sk,inv_quantity_on_hand,m2,n,i_item_sk,avg,count,w_warehouse_name,sum] [avg,n,count,m2,sum,m2,n,avg,count,sum] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,d_moy,w_warehouse_name] BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_quantity_on_hand,w_warehouse_sk,inv_date_sk,i_item_sk,w_warehouse_name] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,inv_date_sk,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] BroadcastHashJoin [inv_item_sk,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt index c03b8aee2..b055f5dbc 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt @@ -1,36 +1,36 @@ -TakeOrderedAndProject [customer_birth_country,customer_login,customer_last_name,customer_email_address,customer_first_name,customer_preferred_cust_flag,customer_id] +TakeOrderedAndProject [customer_email_address,customer_login,customer_first_name,customer_last_name,customer_birth_country,customer_id,customer_preferred_cust_flag] WholeStageCodegen - Project [customer_birth_country,customer_login,customer_last_name,customer_email_address,customer_first_name,customer_preferred_cust_flag,customer_id] - BroadcastHashJoin [year_total,customer_id,year_total,year_total,customer_id,year_total] - Project [year_total,customer_last_name,customer_first_name,customer_id,customer_login,customer_email_address,customer_id,customer_preferred_cust_flag,customer_birth_country,year_total,year_total] + Project [customer_email_address,customer_login,customer_first_name,customer_last_name,customer_birth_country,customer_id,customer_preferred_cust_flag] + BroadcastHashJoin [year_total,year_total,customer_id,year_total,year_total,customer_id] + Project [year_total,customer_birth_country,customer_preferred_cust_flag,year_total,customer_email_address,customer_first_name,year_total,customer_login,customer_id,customer_id,customer_last_name] BroadcastHashJoin [customer_id,customer_id] - Project [year_total,customer_last_name,customer_first_name,customer_id,customer_login,customer_email_address,customer_id,customer_preferred_cust_flag,customer_birth_country,year_total] - BroadcastHashJoin [year_total,year_total,customer_id,customer_id,year_total,year_total] - Project [year_total,year_total,customer_last_name,customer_first_name,customer_id,customer_login,year_total,customer_email_address,customer_id,customer_preferred_cust_flag,customer_birth_country] + Project [year_total,customer_birth_country,customer_preferred_cust_flag,customer_email_address,customer_first_name,year_total,customer_login,customer_id,customer_id,customer_last_name] + BroadcastHashJoin [year_total,year_total,year_total,year_total,customer_id,customer_id] + Project [customer_birth_country,customer_preferred_cust_flag,customer_email_address,customer_first_name,year_total,customer_login,customer_id,customer_id,customer_last_name,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] BroadcastHashJoin [customer_id,customer_id] InputAdapter Union WholeStageCodegen Filter [year_total] - HashAggregate [c_email_address,c_login,c_last_name,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name,sum] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [d_year,c_login,c_birth_country,c_customer_id,c_last_name,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_preferred_cust_flag,sum,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] InputAdapter - Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #1 + Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #1 WholeStageCodegen - HashAggregate [c_email_address,ss_ext_list_price,c_login,c_last_name,c_customer_id,d_year,ss_ext_sales_price,c_birth_country,ss_ext_wholesale_cost,c_preferred_cust_flag,sum,c_first_name,sum,ss_ext_discount_amt] [sum,sum] - Project [d_year,c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] + HashAggregate [ss_ext_wholesale_cost,d_year,ss_ext_discount_amt,c_login,sum,c_birth_country,c_customer_id,c_last_name,ss_ext_sales_price,c_preferred_cust_flag,sum,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,ss_ext_sales_price,c_last_name,c_first_name,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,c_birth_country,c_login,ss_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,ss_ext_sales_price,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] + Project [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] [ss_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] + Scan parquet default.store_sales [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -43,52 +43,52 @@ TakeOrderedAndProject [customer_birth_country,customer_login,customer_last_name, BroadcastExchange #4 Union WholeStageCodegen - HashAggregate [sum,c_email_address,c_login,c_last_name,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] [sum,year_total,customer_last_name,customer_first_name,customer_id,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_login,customer_email_address,customer_preferred_cust_flag,customer_birth_country] + HashAggregate [d_year,c_login,c_birth_country,sum,c_customer_id,c_last_name,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_preferred_cust_flag,c_first_name,c_email_address] [customer_birth_country,customer_preferred_cust_flag,customer_email_address,customer_first_name,sum,customer_login,customer_id,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_last_name,year_total] InputAdapter - Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #5 + Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #5 WholeStageCodegen - HashAggregate [sum,c_email_address,ss_ext_list_price,c_login,c_last_name,c_customer_id,d_year,ss_ext_sales_price,c_birth_country,ss_ext_wholesale_cost,c_preferred_cust_flag,c_first_name,sum,ss_ext_discount_amt] [sum,sum] - Project [d_year,c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] + HashAggregate [ss_ext_wholesale_cost,d_year,ss_ext_discount_amt,c_login,sum,c_birth_country,sum,c_customer_id,c_last_name,ss_ext_sales_price,c_preferred_cust_flag,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,ss_ext_sales_price,c_last_name,c_first_name,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,c_birth_country,c_login,ss_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,ss_ext_sales_price,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] InputAdapter - ReusedExchange [ss_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] [ss_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ss_ext_discount_amt,ss_ext_wholesale_cost,ss_ext_list_price] #2 + ReusedExchange [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] #2 InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_date_sk,d_year] Filter [d_year,d_date_sk] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [year_total,customer_login,customer_id,customer_last_name,customer_email_address,customer_birth_country,customer_first_name,customer_preferred_cust_flag] [year_total,customer_login,customer_id,customer_last_name,customer_email_address,customer_birth_country,customer_first_name,customer_preferred_cust_flag] - LocalTableScan [year_total,customer_login,customer_email_address,customer_first_name,customer_id,customer_preferred_cust_flag,customer_last_name,customer_birth_country] [year_total,customer_login,customer_email_address,customer_first_name,customer_id,customer_preferred_cust_flag,customer_last_name,customer_birth_country] + LocalTableScan [customer_id,year_total,customer_preferred_cust_flag,customer_last_name,customer_email_address,customer_birth_country,customer_login,customer_first_name] [customer_id,year_total,customer_preferred_cust_flag,customer_last_name,customer_email_address,customer_birth_country,customer_login,customer_first_name] + LocalTableScan [customer_first_name,customer_birth_country,customer_preferred_cust_flag,customer_id,customer_email_address,customer_last_name,customer_login,year_total] [customer_first_name,customer_birth_country,customer_preferred_cust_flag,customer_id,customer_email_address,customer_last_name,customer_login,year_total] InputAdapter BroadcastExchange #7 Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [c_email_address,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),sum,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [d_year,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] InputAdapter - Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #8 + Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #8 WholeStageCodegen - HashAggregate [c_email_address,cs_ext_wholesale_cost,sum,sum,c_login,c_last_name,cs_ext_list_price,cs_ext_sales_price,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name,cs_ext_discount_amt] [sum,sum] - Project [cs_ext_list_price,d_year,c_customer_id,c_birth_country,cs_ext_wholesale_cost,c_login,cs_ext_sales_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,cs_ext_discount_amt] + HashAggregate [d_year,sum,c_login,sum,cs_ext_discount_amt,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,cs_ext_wholesale_cost,c_first_name,cs_ext_sales_price,c_email_address,cs_ext_list_price] [sum,sum] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,cs_ext_discount_amt,c_login,c_last_name,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_list_price,c_customer_id,c_birth_country,cs_ext_wholesale_cost,c_login,cs_ext_sales_price,cs_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,cs_ext_discount_amt] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,cs_ext_discount_amt,c_login,c_last_name,cs_sold_date_sk,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [cs_ext_list_price,cs_bill_customer_sk,cs_ext_wholesale_cost,cs_ext_sales_price,cs_sold_date_sk,cs_ext_discount_amt] + Project [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_list_price,cs_bill_customer_sk,cs_ext_wholesale_cost,cs_ext_sales_price,cs_sold_date_sk,cs_ext_discount_amt] [cs_ext_list_price,cs_bill_customer_sk,cs_ext_wholesale_cost,cs_ext_sales_price,cs_sold_date_sk,cs_ext_discount_amt] + Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 LocalTableScan [customer_id,year_total] [customer_id,year_total] @@ -97,20 +97,20 @@ TakeOrderedAndProject [customer_birth_country,customer_login,customer_last_name, Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [c_email_address,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name,sum] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] InputAdapter - Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #11 + Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #11 WholeStageCodegen - HashAggregate [c_email_address,cs_ext_wholesale_cost,c_login,c_last_name,cs_ext_list_price,cs_ext_sales_price,sum,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name,sum,cs_ext_discount_amt] [sum,sum] - Project [cs_ext_list_price,d_year,c_customer_id,c_birth_country,cs_ext_wholesale_cost,c_login,cs_ext_sales_price,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,cs_ext_discount_amt] + HashAggregate [d_year,sum,c_login,cs_ext_discount_amt,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,cs_ext_wholesale_cost,c_first_name,cs_ext_sales_price,c_email_address,cs_ext_list_price] [sum,sum] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,cs_ext_discount_amt,c_login,c_last_name,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_list_price,c_customer_id,c_birth_country,cs_ext_wholesale_cost,c_login,cs_ext_sales_price,cs_sold_date_sk,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,cs_ext_discount_amt] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,cs_ext_discount_amt,c_login,c_last_name,cs_sold_date_sk,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] InputAdapter - ReusedExchange [cs_ext_list_price,cs_bill_customer_sk,cs_ext_wholesale_cost,cs_ext_sales_price,cs_sold_date_sk,cs_ext_discount_amt] [cs_ext_list_price,cs_bill_customer_sk,cs_ext_wholesale_cost,cs_ext_sales_price,cs_sold_date_sk,cs_ext_discount_amt] #9 + ReusedExchange [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] #9 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 LocalTableScan [customer_id,year_total] [customer_id,year_total] @@ -121,24 +121,24 @@ TakeOrderedAndProject [customer_birth_country,customer_login,customer_last_name, LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [d_year,c_login,c_birth_country,c_customer_id,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),sum,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] InputAdapter - Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #13 + Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #13 WholeStageCodegen - HashAggregate [c_email_address,ws_ext_list_price,c_login,c_last_name,ws_ext_wholesale_cost,ws_ext_discount_amt,c_customer_id,d_year,c_birth_country,sum,ws_ext_sales_price,sum,c_preferred_cust_flag,c_first_name] [sum,sum] - Project [d_year,c_customer_id,c_birth_country,c_login,ws_ext_list_price,ws_ext_wholesale_cost,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_sales_price,ws_ext_discount_amt] + HashAggregate [d_year,ws_ext_wholesale_cost,ws_ext_discount_amt,c_login,sum,ws_ext_sales_price,c_birth_country,c_customer_id,sum,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address] [sum,sum] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_customer_id,c_birth_country,c_login,ws_ext_list_price,ws_ext_wholesale_cost,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] InputAdapter BroadcastExchange #14 WholeStageCodegen - Project [ws_bill_customer_sk,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] + Project [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 InputAdapter @@ -147,19 +147,19 @@ TakeOrderedAndProject [customer_birth_country,customer_login,customer_last_name, LocalTableScan [customer_id,year_total] [customer_id,year_total] LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,sum,c_preferred_cust_flag,c_first_name] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [d_year,c_login,c_birth_country,c_customer_id,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,sum] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] InputAdapter - Exchange [c_email_address,c_login,c_last_name,c_customer_id,d_year,c_birth_country,c_preferred_cust_flag,c_first_name] #16 + Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #16 WholeStageCodegen - HashAggregate [c_email_address,sum,ws_ext_list_price,c_login,c_last_name,ws_ext_wholesale_cost,ws_ext_discount_amt,c_customer_id,d_year,c_birth_country,sum,ws_ext_sales_price,c_preferred_cust_flag,c_first_name] [sum,sum] - Project [d_year,c_customer_id,c_birth_country,c_login,ws_ext_list_price,ws_ext_wholesale_cost,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_sales_price,ws_ext_discount_amt] + HashAggregate [d_year,ws_ext_wholesale_cost,ws_ext_discount_amt,c_login,ws_ext_sales_price,c_birth_country,c_customer_id,sum,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address,sum] [sum,sum] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_customer_id,c_birth_country,c_login,ws_ext_list_price,ws_ext_wholesale_cost,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] [c_customer_id,c_birth_country,c_login,c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_email_address] + Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] InputAdapter - ReusedExchange [ws_bill_customer_sk,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk] #14 + ReusedExchange [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] #14 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt index 44fc2c9ca..2e480b9ea 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt @@ -1,21 +1,21 @@ TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] WholeStageCodegen - HashAggregate [w_state,sum,i_item_id,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] [sales_before,sales_after,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] + HashAggregate [i_item_id,sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),w_state,sum] [sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sales_before,sales_after,sum] InputAdapter Exchange [w_state,i_item_id] #1 WholeStageCodegen - HashAggregate [sum,w_state,d_date,sum,cs_sales_price,cr_refunded_cash,i_item_id,sum,sum] [sum,sum,sum,sum] - Project [cr_refunded_cash,w_state,i_item_id,d_date,cs_sales_price] + HashAggregate [i_item_id,sum,sum,d_date,w_state,sum,cs_sales_price,cr_refunded_cash,sum] [sum,sum,sum,sum] + Project [d_date,cs_sales_price,w_state,cr_refunded_cash,i_item_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cr_refunded_cash,cs_sold_date_sk,w_state,i_item_id,cs_sales_price] + Project [cs_sales_price,w_state,cs_sold_date_sk,cr_refunded_cash,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cr_refunded_cash,cs_item_sk,cs_sold_date_sk,w_state,cs_sales_price] + Project [cs_sales_price,w_state,cs_sold_date_sk,cs_item_sk,cr_refunded_cash] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_warehouse_sk,cr_refunded_cash,cs_item_sk,cs_sold_date_sk,cs_sales_price] + Project [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cr_refunded_cash] BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk,cs_sales_price,cs_order_number] + Project [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] Filter [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk,cs_sales_price,cs_order_number] [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk,cs_sales_price,cs_order_number] + Scan parquet default.catalog_sales [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt index 542a3d114..caac67bf3 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt @@ -15,11 +15,11 @@ TakeOrderedAndProject [i_product_name] WholeStageCodegen Project [i_manufact] Filter [alwaysTrue,item_cnt] - HashAggregate [i_manufact,count,count(1)] [i_manufact,count,count(1),item_cnt,alwaysTrue] + HashAggregate [i_manufact,count,count(1)] [i_manufact,alwaysTrue,item_cnt,count,count(1)] InputAdapter Exchange [i_manufact] #3 WholeStageCodegen HashAggregate [i_manufact,count,count] [count,count] Project [i_manufact] - Filter [i_manufact,i_size,i_color,i_category,i_units] - Scan parquet default.item [i_manufact,i_units,i_category,i_size,i_color] [i_manufact,i_units,i_category,i_size,i_color] + Filter [i_units,i_manufact,i_color,i_size,i_category] + Scan parquet default.item [i_manufact,i_units,i_size,i_category,i_color] [i_manufact,i_units,i_size,i_category,i_color] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt index 016028e76..abcbca371 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt @@ -1,10 +1,10 @@ TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category] WholeStageCodegen - HashAggregate [d_year,sum,i_category,i_category_id,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] + HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),sum,d_year,i_category,i_category_id] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] InputAdapter Exchange [d_year,i_category_id,i_category] #1 WholeStageCodegen - HashAggregate [d_year,sum,ss_ext_sales_price,sum,i_category,i_category_id] [sum,sum] + HashAggregate [sum,d_year,sum,i_category,i_category_id,ss_ext_sales_price] [sum,sum] Project [d_year,ss_ext_sales_price,i_category_id,i_category] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [d_year,ss_item_sk,ss_ext_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt index d1b3116b7..a5e60a60c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt @@ -1,10 +1,10 @@ -TakeOrderedAndProject [thu_sales,s_store_id,tue_sales,s_store_name,mon_sales,fri_sales,sun_sales,sat_sales,wed_sales] +TakeOrderedAndProject [sat_sales,fri_sales,sun_sales,s_store_id,tue_sales,mon_sales,s_store_name,thu_sales,wed_sales] WholeStageCodegen - HashAggregate [sum,s_store_id,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),s_store_name,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum,sum,sum] [thu_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),tue_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),mon_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),fri_sales,sum,sum,sun_sales,sat_sales,sum,wed_sales] + HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum,s_store_id,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),s_store_name,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END))] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sat_sales,fri_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sun_sales,sum,sum,sum,sum,tue_sales,mon_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),thu_sales,wed_sales] InputAdapter Exchange [s_store_name,s_store_id] #1 WholeStageCodegen - HashAggregate [sum,sum,sum,s_store_id,s_store_name,sum,sum,sum,d_day_name,sum,sum,sum,ss_sales_price,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + HashAggregate [sum,sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,s_store_id,ss_sales_price,sum,sum,sum,sum,sum,s_store_name] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] Project [d_day_name,ss_sales_price,s_store_id,s_store_name] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [d_day_name,ss_store_sk,ss_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt index 4547d0f60..99c27e16a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt @@ -12,8 +12,8 @@ TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1, : : : +- *(3) Sort [rank_col#11 ASC NULLS FIRST], false, 0 : : : +- Exchange SinglePartition : : : +- *(2) Project [item_sk#8, rank_col#11] - : : : +- *(2) Filter (isnotnull(avg(ss_net_profit#12)#13) && (cast(avg(ss_net_profit#12)#13 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery9329)), DecimalType(13,7)))) - : : : : +- Subquery subquery9329 + : : : +- *(2) Filter (isnotnull(avg(ss_net_profit#12)#13) && (cast(avg(ss_net_profit#12)#13 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7089)), DecimalType(13,7)))) + : : : : +- Subquery subquery7089 : : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) : : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) : : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) @@ -33,8 +33,8 @@ TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1, : : +- *(6) Sort [rank_col#17 DESC NULLS LAST], false, 0 : : +- Exchange SinglePartition : : +- *(5) Project [item_sk#6, rank_col#17] - : : +- *(5) Filter (isnotnull(avg(ss_net_profit#12)#18) && (cast(avg(ss_net_profit#12)#18 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery9334)), DecimalType(13,7)))) - : : : +- Subquery subquery9334 + : : +- *(5) Filter (isnotnull(avg(ss_net_profit#12)#18) && (cast(avg(ss_net_profit#12)#18 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7094)), DecimalType(13,7)))) + : : : +- Subquery subquery7094 : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt index 6704d2835..55aab5cc2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt @@ -23,15 +23,15 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing] InputAdapter Exchange [ss_store_sk] #3 WholeStageCodegen - HashAggregate [sum,ss_net_profit,count,count,ss_store_sk,sum] [sum,count,sum,count] + HashAggregate [count,ss_store_sk,sum,sum,count,ss_net_profit] [sum,count,sum,count] Project [ss_store_sk,ss_net_profit] Filter [ss_store_sk,ss_addr_sk] Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] [ss_addr_sk,ss_store_sk,ss_net_profit] - HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,item_sk,avg(ss_net_profit),sum,count] + HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,avg(ss_net_profit),item_sk,count] InputAdapter Exchange [ss_item_sk] #2 WholeStageCodegen - HashAggregate [ss_item_sk,sum,sum,count,ss_net_profit,count] [sum,count,sum,count] + HashAggregate [ss_item_sk,sum,sum,count,count,ss_net_profit] [sum,count,sum,count] Project [ss_item_sk,ss_net_profit] Filter [ss_store_sk] Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit] [ss_item_sk,ss_store_sk,ss_net_profit] @@ -55,11 +55,11 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing] InputAdapter Exchange [ss_store_sk] #6 WholeStageCodegen - HashAggregate [count,sum,ss_net_profit,count,ss_store_sk,sum] [sum,count,sum,count] + HashAggregate [count,ss_store_sk,sum,count,sum,ss_net_profit] [sum,count,sum,count] Project [ss_store_sk,ss_net_profit] Filter [ss_store_sk,ss_addr_sk] Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] [ss_addr_sk,ss_store_sk,ss_net_profit] - HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [item_sk,count,rank_col,sum,avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit)] + HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(ss_net_profit),item_sk,count,avg(UnscaledValue(ss_net_profit)),rank_col,sum] InputAdapter ReusedExchange [ss_item_sk,sum,count] [ss_item_sk,sum,count] #2 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt index f4040fa0f..f6d66999d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt @@ -12,7 +12,7 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] BroadcastHashJoin [ws_item_sk,i_item_sk] Project [ws_item_sk,ws_sales_price,ca_city,ca_zip] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ca_city,ws_sales_price,ca_zip,ws_item_sk,ws_sold_date_sk] + Project [ca_zip,ca_city,ws_sold_date_sk,ws_sales_price,ws_item_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] Project [ws_sold_date_sk,ws_item_sk,ws_sales_price,c_current_addr_sk] BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt index acac15c84..3010603c0 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt @@ -1,25 +1,25 @@ -TakeOrderedAndProject [amt,ss_ticket_number,c_last_name,bought_city,profit,c_first_name,ca_city] +TakeOrderedAndProject [bought_city,ss_ticket_number,profit,c_last_name,c_first_name,amt,ca_city] WholeStageCodegen - Project [amt,ss_ticket_number,c_last_name,bought_city,profit,c_first_name,ca_city] + Project [bought_city,ss_ticket_number,profit,c_last_name,c_first_name,amt,ca_city] BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] - Project [profit,c_last_name,amt,c_first_name,bought_city,c_current_addr_sk,ss_ticket_number] + Project [c_current_addr_sk,profit,amt,c_last_name,c_first_name,ss_ticket_number,bought_city] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - HashAggregate [ss_customer_sk,ca_city,sum,sum(UnscaledValue(ss_coupon_amt)),sum,ss_addr_sk,sum(UnscaledValue(ss_net_profit)),ss_ticket_number] [sum,sum(UnscaledValue(ss_coupon_amt)),profit,sum,amt,sum(UnscaledValue(ss_net_profit)),bought_city] + HashAggregate [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_coupon_amt)),sum,ca_city,ss_customer_sk,sum,ss_ticket_number,ss_addr_sk] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_coupon_amt)),profit,sum,amt,sum,bought_city] InputAdapter Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 WholeStageCodegen - HashAggregate [ss_customer_sk,ca_city,sum,ss_coupon_amt,ss_net_profit,sum,sum,ss_addr_sk,sum,ss_ticket_number] [sum,sum,sum,sum] - Project [ca_city,ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + HashAggregate [sum,ca_city,ss_customer_sk,ss_coupon_amt,sum,sum,sum,ss_ticket_number,ss_addr_sk,ss_net_profit] [sum,sum,sum,sum] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ca_city,ss_net_profit,ss_ticket_number] BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_store_sk,ss_coupon_amt,ss_net_profit] + Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_coupon_amt,ss_net_profit] - Filter [ss_hdemo_sk,ss_customer_sk,ss_store_sk,ss_sold_date_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_coupon_amt,ss_net_profit] [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_coupon_amt,ss_net_profit] + Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + Filter [ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt index 47228048b..e7dcf37e4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt @@ -1,52 +1,51 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,s_store_name#3,s_company_name#6,d_year#7,d_moy#8,avg_monthly_sales#2,sum_sales#1,psum#9,nsum#10]) -+- *(23) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] - +- *(23) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight - :- *(23) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] - : +- *(23) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight - : :- *(23) Filter (((((isnotnull(d_year#7) && isnotnull(avg_monthly_sales#2)) && (d_year#7 = 1999)) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) - : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] - : : +- *(8) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 5) - : : +- *(7) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2] - : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] - : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], false, 0 - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, 5) - : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) - : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) - : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] - : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight - : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight - : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight - : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] - : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_category#4)) && isnotnull(i_brand#5)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] - : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] - : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) - : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct ++- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] + +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight + :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] + : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight + : :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13] + : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) + : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] + : : +- *(7) Filter (isnotnull(d_year#7) && (d_year#7 = 1999)) + : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] + : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 5) + : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) + : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) + : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] + : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight + : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight + : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight + : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] + : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] + : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) + : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1))) - : +- *(15) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] - : +- *(15) Filter isnotnull(rn#23) + : +- *(14) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] + : +- *(14) Filter isnotnull(rn#23) : +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#23], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] - : +- *(14) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + : +- *(13) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) - : +- *(13) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : +- *(12) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) : +- ReusedExchange [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33, sum#34], Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1))) - +- *(22) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] - +- *(22) Filter isnotnull(rn#18) + +- *(21) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] + +- *(21) Filter isnotnull(rn#18) +- Window [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] - +- *(21) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +- *(20) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 +- ReusedExchange [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35, d_moy#36, sum_sales#12], Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt index 6d082f8d1..656b50091 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt @@ -1,80 +1,77 @@ -TakeOrderedAndProject [i_category,d_year,avg_monthly_sales,s_store_name,nsum,sum_sales,psum,i_brand,s_company_name,d_moy] +TakeOrderedAndProject [d_year,d_moy,s_store_name,i_category,psum,i_brand,sum_sales,s_company_name,avg_monthly_sales,nsum] WholeStageCodegen - Project [i_category,d_year,sum_sales,sum_sales,avg_monthly_sales,sum_sales,s_store_name,i_brand,s_company_name,d_moy] - BroadcastHashJoin [i_category,s_store_name,i_category,s_company_name,rn,rn,s_store_name,i_brand,i_brand,s_company_name] - Project [s_store_name,rn,d_moy,sum_sales,avg_monthly_sales,sum_sales,d_year,s_company_name,i_category,i_brand] - BroadcastHashJoin [s_store_name,i_category,s_company_name,rn,i_category,i_brand,s_store_name,i_brand,s_company_name,rn] - Filter [d_year,avg_monthly_sales,sum_sales,rn] - InputAdapter - Window [s_store_name,d_moy,d_year,s_company_name,i_category,i_brand] - WholeStageCodegen - Sort [s_store_name,d_moy,d_year,s_company_name,i_category,i_brand] - InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name] #1 - WholeStageCodegen - Project [s_store_name,d_moy,sum_sales,avg_monthly_sales,d_year,s_company_name,i_category,i_brand] - InputAdapter - Window [s_store_name,_w0,d_year,s_company_name,i_category,i_brand] - WholeStageCodegen - Sort [s_store_name,d_year,s_company_name,i_category,i_brand] - InputAdapter - Exchange [s_store_name,d_year,s_company_name,i_category,i_brand] #2 - WholeStageCodegen - HashAggregate [sum(UnscaledValue(ss_sales_price)),s_store_name,d_moy,d_year,s_company_name,i_category,i_brand,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] - InputAdapter - Exchange [s_store_name,d_moy,d_year,s_company_name,i_category,i_brand] #3 - WholeStageCodegen - HashAggregate [s_store_name,d_moy,d_year,ss_sales_price,s_company_name,i_category,i_brand,sum,sum] [sum,sum] - Project [d_year,ss_sales_price,i_brand,i_category,s_company_name,s_store_name,d_moy] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [d_year,ss_sales_price,i_brand,i_category,ss_store_sk,d_moy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sales_price,i_brand,i_category,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_item_sk,i_brand,i_category] - Filter [i_item_sk,i_category,i_brand] - Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk,d_year,d_moy] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [s_store_sk,s_store_name,s_company_name] - Filter [s_store_sk,s_store_name,s_company_name] - Scan parquet default.store [s_store_sk,s_store_name,s_company_name] [s_store_sk,s_store_name,s_company_name] + Project [s_store_name,d_year,avg_monthly_sales,d_moy,sum_sales,sum_sales,i_category,i_brand,s_company_name,sum_sales] + BroadcastHashJoin [s_store_name,s_company_name,i_brand,i_category,i_category,rn,i_brand,s_store_name,rn,s_company_name] + Project [d_year,s_company_name,rn,d_moy,sum_sales,i_brand,sum_sales,i_category,s_store_name,avg_monthly_sales] + BroadcastHashJoin [s_store_name,i_category,i_category,s_company_name,rn,i_brand,rn,s_company_name,s_store_name,i_brand] + Project [d_year,s_company_name,rn,d_moy,i_brand,sum_sales,i_category,s_store_name,avg_monthly_sales] + Filter [avg_monthly_sales,sum_sales,rn] + InputAdapter + Window [d_year,s_company_name,_w0,i_brand,i_category,s_store_name] + WholeStageCodegen + Filter [d_year] + InputAdapter + Window [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] + WholeStageCodegen + Sort [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen + HashAggregate [d_year,s_company_name,d_moy,sum,sum(UnscaledValue(ss_sales_price)),i_brand,i_category,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] #2 + WholeStageCodegen + HashAggregate [d_year,s_company_name,d_moy,sum,i_brand,ss_sales_price,i_category,sum,s_store_name] [sum,sum] + Project [s_store_name,s_company_name,d_moy,d_year,i_category,ss_sales_price,i_brand] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [d_moy,d_year,ss_store_sk,i_category,ss_sales_price,i_brand] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_store_sk,i_category,ss_sales_price,ss_sold_date_sk,i_brand] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_item_sk,i_brand,i_category] + Filter [i_item_sk,i_brand,i_category] + Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_year,d_moy] + Filter [d_year,d_moy,d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_store_sk,s_store_name,s_company_name] + Filter [s_store_sk,s_store_name,s_company_name] + Scan parquet default.store [s_store_sk,s_store_name,s_company_name] [s_store_sk,s_store_name,s_company_name] InputAdapter - BroadcastExchange #7 + BroadcastExchange #6 WholeStageCodegen - Project [i_category,sum_sales,rn,i_brand,s_company_name,s_store_name] + Project [i_category,rn,s_company_name,s_store_name,sum_sales,i_brand] Filter [rn] InputAdapter - Window [d_moy,i_category,d_year,i_brand,s_company_name,s_store_name] + Window [i_category,s_company_name,s_store_name,d_moy,d_year,i_brand] WholeStageCodegen - Sort [d_moy,i_category,d_year,i_brand,s_company_name,s_store_name] + Sort [i_category,s_company_name,s_store_name,d_moy,d_year,i_brand] InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name] #8 + Exchange [i_category,i_brand,s_store_name,s_company_name] #7 WholeStageCodegen - HashAggregate [d_moy,i_category,sum(UnscaledValue(ss_sales_price)),sum,d_year,i_brand,s_company_name,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] + HashAggregate [i_category,s_company_name,s_store_name,sum(UnscaledValue(ss_sales_price)),d_moy,d_year,sum,i_brand] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] InputAdapter - ReusedExchange [d_moy,i_category,sum,d_year,i_brand,s_company_name,s_store_name] [d_moy,i_category,sum,d_year,i_brand,s_company_name,s_store_name] #3 + ReusedExchange [i_category,s_company_name,s_store_name,d_moy,d_year,sum,i_brand] [i_category,s_company_name,s_store_name,d_moy,d_year,sum,i_brand] #2 InputAdapter - BroadcastExchange #9 + BroadcastExchange #8 WholeStageCodegen - Project [i_brand,rn,s_store_name,sum_sales,i_category,s_company_name] + Project [rn,i_category,s_store_name,i_brand,s_company_name,sum_sales] Filter [rn] InputAdapter - Window [i_brand,d_year,s_store_name,d_moy,i_category,s_company_name] + Window [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year] WholeStageCodegen - Sort [i_brand,d_year,s_store_name,d_moy,i_category,s_company_name] + Sort [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year] InputAdapter - ReusedExchange [i_brand,d_year,s_store_name,d_moy,sum_sales,i_category,s_company_name] [i_brand,d_year,s_store_name,d_moy,sum_sales,i_category,s_company_name] #8 + ReusedExchange [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year,sum_sales] [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year,sum_sales] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt index 2e8b5ecc4..78fb16b88 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt @@ -9,12 +9,12 @@ WholeStageCodegen Project [ss_sold_date_sk,ss_quantity] BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] Project [ss_sold_date_sk,ss_addr_sk,ss_quantity,ss_net_profit] - BroadcastHashJoin [cd_education_status,cd_demo_sk,cd_marital_status,ss_sales_price,ss_cdemo_sk] - Project [ss_sales_price,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_net_profit] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,ss_sales_price,cd_education_status,cd_marital_status] + Project [ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_sales_price,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_net_profit] + Project [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] Filter [ss_store_sk,ss_cdemo_sk,ss_addr_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sales_price,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_sales_price,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_net_profit] + Scan parquet default.store_sales [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt index 2b0459139..a714636a3 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt @@ -19,8 +19,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank# : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] - : : : +- *(3) Filter ((((((((isnotnull(ws_net_profit#17) && isnotnull(ws_net_paid#11)) && isnotnull(ws_quantity#9)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) - : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_pro..., ReadSchema: struct 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_quantity), IsNotNull(ws_net_paid), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_item_sk#16)) && isnotnull(wr_order_number#15)) @@ -31,47 +31,45 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank# : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct :- *(14) Project [catalog AS channel#20, item#21, return_ratio#22, return_rank#23, currency_rank#24] : +- *(14) Filter ((return_rank#23 <= 10) || (currency_rank#24 <= 10)) - : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] - : +- *(13) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 - : +- *(13) Project [item#21, return_ratio#22, currency_rank#24] - : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] - : +- *(12) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 - : +- Exchange SinglePartition - : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(cs_item_sk#26, 5) - : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight - : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight - : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] - : : : +- *(10) Filter ((((((((isnotnull(cs_net_paid#30) && isnotnull(cs_quantity#28)) && isnotnull(cs_net_profit#35)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) - : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_order_number#33)) && isnotnull(cr_item_sk#34)) - : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number),..., ReadSchema: struct - : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] + : +- *(13) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 + : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] + : +- *(12) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(cs_item_sk#26, 5) + : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight + : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight + : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] + : : : +- *(10) Filter ((((((((isnotnull(cs_net_profit#35) && isnotnull(cs_net_paid#30)) && isnotnull(cs_quantity#28)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) + : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_item_sk#34)) && isnotnull(cr_order_number#33)) + : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_item_sk), IsNo..., ReadSchema: struct + : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(21) Project [store AS channel#36, item#37, return_ratio#38, return_rank#39, currency_rank#40] +- *(21) Filter ((return_rank#39 <= 10) || (currency_rank#40 <= 10)) - +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] - +- *(20) Sort [return_ratio#38 ASC NULLS FIRST], false, 0 - +- *(20) Project [item#37, return_ratio#38, currency_rank#40] - +- Window [rank(currency_ratio#41) windowspecdefinition(currency_ratio#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#40], [currency_ratio#41 ASC NULLS FIRST] - +- *(19) Sort [currency_ratio#41 ASC NULLS FIRST], false, 0 - +- Exchange SinglePartition - +- *(18) HashAggregate(keys=[ss_item_sk#42], functions=[sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), sum(cast(coalesce(ss_quantity#44, 0) as bigint)), sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) - +- Exchange hashpartitioning(ss_item_sk#42, 5) - +- *(17) HashAggregate(keys=[ss_item_sk#42], functions=[partial_sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) - +- *(17) Project [ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] - +- *(17) BroadcastHashJoin [ss_sold_date_sk#47], [d_date_sk#13], Inner, BuildRight - :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] - : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight - : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] - : : +- *(17) Filter ((((((((isnotnull(ss_net_paid#46) && isnotnull(ss_net_profit#51)) && isnotnull(ss_quantity#44)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) - : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), IsNotNull(ss_quantity), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_item_sk#50)) && isnotnull(sr_ticket_number#49)) - : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_item_sk), IsNotNull(..., ReadSchema: struct - +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file + +- Window [rank(currency_ratio#41) windowspecdefinition(currency_ratio#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#40], [currency_ratio#41 ASC NULLS FIRST] + +- *(20) Sort [currency_ratio#41 ASC NULLS FIRST], false, 0 + +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] + +- *(19) Sort [return_ratio#38 ASC NULLS FIRST], false, 0 + +- Exchange SinglePartition + +- *(18) HashAggregate(keys=[ss_item_sk#42], functions=[sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), sum(cast(coalesce(ss_quantity#44, 0) as bigint)), sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- Exchange hashpartitioning(ss_item_sk#42, 5) + +- *(17) HashAggregate(keys=[ss_item_sk#42], functions=[partial_sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- *(17) Project [ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + +- *(17) BroadcastHashJoin [ss_sold_date_sk#47], [d_date_sk#13], Inner, BuildRight + :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight + : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] + : : +- *(17) Filter ((((((((isnotnull(ss_quantity#44) && isnotnull(ss_net_paid#46)) && isnotnull(ss_net_profit#51)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) + : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_ticket_number#49)) && isnotnull(sr_item_sk#50)) + : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNo..., ReadSchema: struct + +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt index 809d04622..e0a243a63 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt @@ -1,10 +1,10 @@ -TakeOrderedAndProject [return_rank,item,currency_rank,channel,return_ratio] +TakeOrderedAndProject [channel,currency_rank,return_rank,item,return_ratio] WholeStageCodegen - HashAggregate [return_rank,item,currency_rank,channel,return_ratio] + HashAggregate [channel,currency_rank,return_rank,item,return_ratio] InputAdapter - Exchange [return_rank,item,currency_rank,channel,return_ratio] #1 + Exchange [channel,currency_rank,return_rank,item,return_ratio] #1 WholeStageCodegen - HashAggregate [return_rank,item,currency_rank,channel,return_ratio] + HashAggregate [channel,currency_rank,return_rank,item,return_ratio] InputAdapter Union WholeStageCodegen @@ -21,18 +21,18 @@ TakeOrderedAndProject [return_rank,item,currency_rank,channel,return_ratio] InputAdapter Exchange #2 WholeStageCodegen - HashAggregate [sum,ws_item_sk,sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum,sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),sum] [sum,return_ratio,currency_ratio,sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),item,sum,sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),sum] + HashAggregate [sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),ws_item_sk,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum,sum(cast(coalesce(ws_quantity, 0) as bigint))] [currency_ratio,return_ratio,sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum,sum(cast(coalesce(ws_quantity, 0) as bigint))] InputAdapter Exchange [ws_item_sk] #3 WholeStageCodegen - HashAggregate [sum,ws_item_sk,sum,sum,wr_return_quantity,sum,sum,ws_net_paid,ws_quantity,sum,wr_return_amt,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [ws_net_paid,wr_return_quantity,wr_return_amt,ws_quantity,ws_item_sk] + HashAggregate [sum,wr_return_amt,sum,ws_item_sk,sum,sum,sum,ws_net_paid,ws_quantity,wr_return_quantity,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,ws_net_paid,ws_quantity,wr_return_quantity,ws_item_sk] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_net_paid,wr_return_quantity,wr_return_amt,ws_quantity,ws_item_sk,ws_sold_date_sk] + Project [wr_return_amt,ws_net_paid,ws_quantity,ws_sold_date_sk,wr_return_quantity,ws_item_sk] BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - Project [ws_net_paid,ws_order_number,ws_quantity,ws_item_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_order_number,ws_sold_date_sk,ws_net_paid,ws_quantity,ws_net_profit] - Scan parquet default.web_sales [ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_item_sk,ws_sold_date_sk] [ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_item_sk,ws_sold_date_sk] + Project [ws_net_paid,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] + Filter [ws_net_profit,ws_order_number,ws_item_sk,ws_net_paid,ws_sold_date_sk,ws_quantity] + Scan parquet default.web_sales [ws_net_paid,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_item_sk] [ws_net_paid,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_item_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -49,69 +49,67 @@ TakeOrderedAndProject [return_rank,item,currency_rank,channel,return_ratio] Project [item,return_ratio,return_rank,currency_rank] Filter [return_rank,currency_rank] InputAdapter - Window [return_ratio] + Window [currency_ratio] WholeStageCodegen - Sort [return_ratio] - Project [item,return_ratio,currency_rank] - InputAdapter - Window [currency_ratio] - WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Exchange #6 - WholeStageCodegen - HashAggregate [sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum(cast(coalesce(cs_quantity, 0) as bigint)),sum,cs_item_sk,sum,sum] [sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cr_return_quantity, 0) as bigint)),return_ratio,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum(cast(coalesce(cs_quantity, 0) as bigint)),currency_ratio,sum,item,sum,sum] - InputAdapter - Exchange [cs_item_sk] #7 - WholeStageCodegen - HashAggregate [sum,sum,cr_return_amount,cs_net_paid,sum,cr_return_quantity,sum,cs_item_sk,cs_quantity,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [cs_quantity,cs_net_paid,cs_item_sk,cr_return_quantity,cr_return_amount] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,cs_net_paid,cs_item_sk,cs_sold_date_sk,cr_return_quantity,cr_return_amount] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_quantity,cs_net_paid,cs_item_sk,cs_sold_date_sk,cs_order_number] - Filter [cs_net_profit,cs_order_number,cs_net_paid,cs_item_sk,cs_quantity,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_quantity,cs_net_paid,cs_item_sk,cs_sold_date_sk,cs_net_profit,cs_order_number] [cs_quantity,cs_net_paid,cs_item_sk,cs_sold_date_sk,cs_net_profit,cs_order_number] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] - Filter [cr_return_amount,cr_order_number,cr_item_sk] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum,cs_item_sk,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cs_quantity, 0) as bigint))] [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum,item,return_ratio,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cs_quantity, 0) as bigint)),currency_ratio] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen + HashAggregate [sum,cr_return_amount,cr_return_quantity,sum,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,cs_item_sk,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_net_paid,cs_quantity,cr_return_quantity,cr_return_amount,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_net_paid,cs_quantity,cr_return_quantity,cs_sold_date_sk,cr_return_amount,cs_item_sk] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk] + Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_item_sk,cs_sold_date_sk,cs_order_number] + Scan parquet default.catalog_sales [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit] [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Filter [cr_return_amount,cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 WholeStageCodegen Project [item,return_ratio,return_rank,currency_rank] Filter [return_rank,currency_rank] InputAdapter - Window [return_ratio] + Window [currency_ratio] WholeStageCodegen - Sort [return_ratio] - Project [item,return_ratio,currency_rank] - InputAdapter - Window [currency_ratio] - WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Exchange #9 - WholeStageCodegen - HashAggregate [sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),ss_item_sk,sum,sum(cast(coalesce(ss_quantity, 0) as bigint)),sum] [sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum,sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,currency_ratio,return_ratio] - InputAdapter - Exchange [ss_item_sk] #10 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum,ss_quantity,ss_item_sk,sum,sum,sr_return_quantity,sum,sr_return_amt,ss_net_paid] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sr_return_amt,ss_item_sk,ss_quantity,sr_return_quantity,ss_net_paid] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [sr_return_amt,ss_item_sk,ss_quantity,ss_sold_date_sk,sr_return_quantity,ss_net_paid] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_net_paid] - Filter [ss_quantity,ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_net_profit,ss_net_paid] - Scan parquet default.store_sales [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_net_paid,ss_net_profit] [ss_ticket_number,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_net_paid,ss_net_profit] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] - Filter [sr_return_amt,sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen + HashAggregate [sum,ss_item_sk,sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum,sum] [sum,sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(sr_return_quantity, 0) as bigint)),return_ratio,sum,sum,currency_ratio] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen + HashAggregate [ss_net_paid,sum,ss_item_sk,sum,sum,sr_return_amt,sum,sum,ss_quantity,sr_return_quantity,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_quantity,ss_item_sk,ss_net_paid,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_quantity,ss_item_sk,ss_net_paid,sr_return_quantity,ss_sold_date_sk,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_quantity,ss_item_sk,ss_net_paid,ss_sold_date_sk,ss_ticket_number] + Filter [ss_net_paid,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_net_profit,ss_ticket_number] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_net_paid,ss_net_profit,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_net_paid,ss_net_profit,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Filter [sr_return_amt,sr_ticket_number,sr_item_sk] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt index c30a48e5e..a3baee3e1 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt @@ -1,22 +1,22 @@ -TakeOrderedAndProject [profit,id,returns,sales,channel] +TakeOrderedAndProject [profit,channel,sales,returns,id] WholeStageCodegen - HashAggregate [sum(profit),sum,id,sum,sum,sum(sales),spark_grouping_id,sum(returns),channel] [sum(profit),sum,profit,sum,sum,sum(sales),returns,sum(returns),sales] + HashAggregate [sum(returns),channel,spark_grouping_id,sum,sum(profit),sum(sales),sum,sum,id] [sum(returns),profit,sum,sum(profit),sum(sales),sales,sum,sum,returns] InputAdapter Exchange [channel,id,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [sum,sum,id,sales,sum,sum,profit,spark_grouping_id,sum,returns,channel,sum] [sum,sum,sum,sum,sum,sum] - Expand [channel,sales,id,profit,returns] + HashAggregate [sum,channel,spark_grouping_id,sum,sum,sum,profit,returns,sum,sum,id,sales] [sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] InputAdapter Union WholeStageCodegen - HashAggregate [sum(UnscaledValue(net_loss)),sum(UnscaledValue(sales_price)),s_store_id,sum(UnscaledValue(profit)),sum,sum,sum,sum,sum(UnscaledValue(return_amt))] [sum(UnscaledValue(net_loss)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,channel,RETURNS,id,sum,profit,sum,sum,sum(UnscaledValue(return_amt)),sales] + HashAggregate [sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),sum,sum(UnscaledValue(profit)),s_store_id,sum(UnscaledValue(sales_price)),sum,sum] [sales,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),RETURNS,sum,sum(UnscaledValue(profit)),channel,profit,sum(UnscaledValue(sales_price)),id,sum,sum] InputAdapter Exchange [s_store_id] #2 WholeStageCodegen - HashAggregate [s_store_id,profit,return_amt,sum,sum,sum,net_loss,sum,sales_price,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [profit,sales_price,return_amt,s_store_id,net_loss] + HashAggregate [sum,sum,return_amt,net_loss,sum,sum,s_store_id,sum,sales_price,sum,sum,sum,profit] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [s_store_id,sales_price,net_loss,return_amt,profit] BroadcastHashJoin [store_sk,s_store_sk] - Project [profit,sales_price,return_amt,store_sk,net_loss] + Project [store_sk,sales_price,net_loss,return_amt,profit] BroadcastHashJoin [date_sk,d_date_sk] InputAdapter Union @@ -41,14 +41,14 @@ TakeOrderedAndProject [profit,id,returns,sales,channel] Filter [s_store_sk] Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(profit)),cp_catalog_page_id,sum,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),sum(UnscaledValue(sales_price)),sum] [sum,RETURNS,sum(UnscaledValue(profit)),sum,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),profit,sales,channel,sum(UnscaledValue(sales_price)),sum,id] + HashAggregate [cp_catalog_page_id,sum(UnscaledValue(profit)),sum,sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum,sum,sum(UnscaledValue(net_loss)),sum] [sum(UnscaledValue(profit)),sum,sum(UnscaledValue(sales_price)),channel,sum(UnscaledValue(return_amt)),sales,sum,profit,sum,id,sum(UnscaledValue(net_loss)),sum,RETURNS] InputAdapter Exchange [cp_catalog_page_id] #5 WholeStageCodegen - HashAggregate [sum,sum,sum,sales_price,return_amt,cp_catalog_page_id,sum,net_loss,sum,sum,sum,profit,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [profit,return_amt,net_loss,cp_catalog_page_id,sales_price] + HashAggregate [cp_catalog_page_id,sum,sum,sum,sales_price,return_amt,net_loss,sum,profit,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,cp_catalog_page_id,profit,net_loss,return_amt] BroadcastHashJoin [page_sk,cp_catalog_page_sk] - Project [page_sk,profit,return_amt,net_loss,sales_price] + Project [sales_price,profit,page_sk,net_loss,return_amt] BroadcastHashJoin [date_sk,d_date_sk] InputAdapter Union @@ -73,14 +73,14 @@ TakeOrderedAndProject [profit,id,returns,sales,channel] Filter [cp_catalog_page_sk] Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] [cp_catalog_page_sk,cp_catalog_page_id] WholeStageCodegen - HashAggregate [sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum,sum(UnscaledValue(sales_price)),web_site_id,sum(UnscaledValue(net_loss)),sum,sum,sum] [sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),profit,sum,sum(UnscaledValue(sales_price)),RETURNS,sales,id,channel,sum(UnscaledValue(net_loss)),sum,sum,sum] + HashAggregate [web_site_id,sum,sum,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,sum(UnscaledValue(net_loss))] [sales,profit,sum,sum,RETURNS,channel,id,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,sum(UnscaledValue(net_loss))] InputAdapter Exchange [web_site_id] #8 WholeStageCodegen - HashAggregate [sum,sum,sum,sum,net_loss,profit,return_amt,web_site_id,sum,sum,sales_price,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [web_site_id,return_amt,sales_price,profit,net_loss] + HashAggregate [web_site_id,sum,profit,sum,sum,net_loss,sales_price,sum,sum,sum,sum,sum,return_amt] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [web_site_id,profit,sales_price,return_amt,net_loss] BroadcastHashJoin [wsr_web_site_sk,web_site_sk] - Project [wsr_web_site_sk,return_amt,sales_price,profit,net_loss] + Project [profit,wsr_web_site_sk,sales_price,return_amt,net_loss] BroadcastHashJoin [date_sk,d_date_sk] InputAdapter Union @@ -91,9 +91,9 @@ TakeOrderedAndProject [profit,id,returns,sales,channel] WholeStageCodegen Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [wr_order_number,wr_returned_date_sk,wr_net_loss,wr_return_amt,wr_item_sk] + Project [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] Filter [wr_returned_date_sk] - Scan parquet default.web_returns [wr_order_number,wr_returned_date_sk,wr_net_loss,wr_return_amt,wr_item_sk] [wr_order_number,wr_returned_date_sk,wr_net_loss,wr_return_amt,wr_item_sk] + Scan parquet default.web_returns [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] InputAdapter BroadcastExchange #9 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt index 32ec9b5f7..6716b98d6 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt @@ -16,8 +16,8 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_compa : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_item_sk#23,ss_customer_sk#24,ss_store_sk#20,ss_ticket_number#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_stor..., ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[3, bigint, true], input[1, bigint, true], input[2, bigint, true])) : : : +- *(1) Project [sr_returned_date_sk#16, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#25] - : : : +- *(1) Filter (((isnotnull(sr_ticket_number#25) && isnotnull(sr_item_sk#26)) && isnotnull(sr_customer_sk#27)) && isnotnull(sr_returned_date_sk#16)) - : : : +- *(1) FileScan parquet default.store_returns[sr_returned_date_sk#16,sr_item_sk#26,sr_customer_sk#27,sr_ticket_number#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk), IsNotNull(sr_customer_sk), IsNotNull(sr_retu..., ReadSchema: struct + : : : +- *(1) Filter (((isnotnull(sr_ticket_number#25) && isnotnull(sr_customer_sk#27)) && isnotnull(sr_item_sk#26)) && isnotnull(sr_returned_date_sk#16)) + : : : +- *(1) FileScan parquet default.store_returns[sr_returned_date_sk#16,sr_item_sk#26,sr_customer_sk#27,sr_ticket_number#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_retu..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [s_store_sk#21, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] : : +- *(2) Filter isnotnull(s_store_sk#21) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt index 6a6c856c3..fcbf25c1d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt @@ -1,33 +1,33 @@ -TakeOrderedAndProject [s_street_number,s_store_name,s_zip,31 - 60 days ,s_state,s_suite_number,s_company_id,61 - 90 days ,30 days ,s_street_name,s_county,>120 days ,s_city,s_street_type,91 - 120 days ] +TakeOrderedAndProject [s_street_number,s_street_type,s_company_id,31 - 60 days ,61 - 90 days ,s_county,>120 days ,30 days ,91 - 120 days ,s_state,s_street_name,s_city,s_suite_number,s_store_name,s_zip] WholeStageCodegen - HashAggregate [s_street_number,s_store_name,s_zip,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),s_state,s_suite_number,s_company_id,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum,s_street_name,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),s_county,sum,sum,s_city,s_street_type,sum] [sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),31 - 60 days ,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum,30 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,>120 days ,sum,91 - 120 days ,sum] + HashAggregate [s_street_number,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),s_street_type,sum,s_company_id,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),s_county,sum,s_state,s_street_name,sum,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum,s_city,s_suite_number,s_store_name,s_zip,sum] [sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum,31 - 60 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum,>120 days ,30 days ,91 - 120 days ,sum,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum,sum] InputAdapter - Exchange [s_street_number,s_store_name,s_zip,s_state,s_suite_number,s_company_id,s_street_name,s_county,s_city,s_street_type] #1 + Exchange [s_street_number,s_street_type,s_company_id,s_county,s_state,s_street_name,s_city,s_suite_number,s_store_name,s_zip] #1 WholeStageCodegen - HashAggregate [s_street_number,s_store_name,s_zip,sum,sum,sum,s_state,s_suite_number,s_company_id,sum,sum,ss_sold_date_sk,s_street_name,s_county,sum,sum,sum,sr_returned_date_sk,s_city,sum,s_street_type,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,ss_sold_date_sk,s_store_name,s_city,sr_returned_date_sk,s_state,s_company_id] + HashAggregate [sum,sum,s_street_number,ss_sold_date_sk,s_street_type,sum,s_company_id,sum,s_county,sum,s_state,s_street_name,sum,sum,sr_returned_date_sk,sum,sum,s_city,s_suite_number,s_store_name,s_zip,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,ss_sold_date_sk,s_store_name,s_city,sr_returned_date_sk,s_state,s_company_id] + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,ss_sold_date_sk,s_store_name,s_city,sr_returned_date_sk,s_state,s_company_id] + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_sold_date_sk,ss_store_sk,sr_returned_date_sk] - BroadcastHashJoin [ss_customer_sk,sr_item_sk,ss_item_sk,sr_ticket_number,sr_customer_sk,ss_ticket_number] - Project [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_sold_date_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ticket_number,ss_item_sk,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] + Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] - Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] + Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [s_store_sk,s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,s_store_name,s_city,s_state,s_company_id] + Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] Filter [s_store_sk] - Scan parquet default.store [s_store_sk,s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,s_store_name,s_city,s_state,s_company_id] [s_store_sk,s_street_type,s_suite_number,s_county,s_street_name,s_zip,s_street_number,s_store_name,s_city,s_state,s_company_id] + Scan parquet default.store [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt index 69b3733b7..af45acaf5 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt @@ -1,4 +1,4 @@ -TakeOrderedAndProject [store_cumulative,item_sk,web_cumulative,d_date,store_sales,web_sales] +TakeOrderedAndProject [item_sk,store_cumulative,store_sales,web_sales,d_date,web_cumulative] WholeStageCodegen Filter [web_cumulative,store_cumulative] InputAdapter @@ -8,7 +8,7 @@ TakeOrderedAndProject [store_cumulative,item_sk,web_cumulative,d_date,store_sale InputAdapter Exchange [item_sk] #1 WholeStageCodegen - Project [d_date,cume_sales,d_date,item_sk,item_sk,cume_sales] + Project [d_date,d_date,item_sk,item_sk,cume_sales,cume_sales] InputAdapter SortMergeJoin [item_sk,d_date,item_sk,d_date] WholeStageCodegen @@ -28,7 +28,7 @@ TakeOrderedAndProject [store_cumulative,item_sk,web_cumulative,d_date,store_sale InputAdapter Exchange [ws_item_sk,d_date] #4 WholeStageCodegen - HashAggregate [d_date,sum,sum,ws_sales_price,ws_item_sk] [sum,sum] + HashAggregate [sum,ws_item_sk,sum,d_date,ws_sales_price] [sum,sum] Project [ws_item_sk,ws_sales_price,d_date] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Project [ws_sold_date_sk,ws_item_sk,ws_sales_price] @@ -57,7 +57,7 @@ TakeOrderedAndProject [store_cumulative,item_sk,web_cumulative,d_date,store_sale InputAdapter Exchange [ss_item_sk,d_date] #8 WholeStageCodegen - HashAggregate [sum,sum,ss_item_sk,d_date,ss_sales_price] [sum,sum] + HashAggregate [d_date,sum,ss_sales_price,sum,ss_item_sk] [sum,sum] Project [ss_item_sk,ss_sales_price,d_date] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_item_sk,ss_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt index 19793f4d1..00c2d4b14 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt @@ -1,10 +1,10 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand] WholeStageCodegen - HashAggregate [i_brand_id,d_year,sum,sum(UnscaledValue(ss_ext_sales_price)),i_brand] [brand_id,ext_price,brand,sum,sum(UnscaledValue(ss_ext_sales_price))] + HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),d_year,i_brand,i_brand_id,sum] [brand_id,sum(UnscaledValue(ss_ext_sales_price)),brand,sum,ext_price] InputAdapter Exchange [d_year,i_brand,i_brand_id] #1 WholeStageCodegen - HashAggregate [i_brand_id,sum,d_year,ss_ext_sales_price,sum,i_brand] [sum,sum] + HashAggregate [sum,d_year,i_brand,ss_ext_sales_price,i_brand_id,sum] [sum,sum] Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [d_year,ss_item_sk,ss_ext_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt index 6dc6fb614..6a8804ae2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt @@ -13,7 +13,7 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] InputAdapter Exchange [i_manufact_id,d_qoy] #2 WholeStageCodegen - HashAggregate [sum,sum,d_qoy,ss_sales_price,i_manufact_id] [sum,sum] + HashAggregate [d_qoy,sum,ss_sales_price,sum,i_manufact_id] [sum,sum] Project [i_manufact_id,ss_sales_price,d_qoy] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] @@ -22,7 +22,7 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_manufact_id] Filter [i_category,i_class,i_brand,i_item_sk] - Scan parquet default.item [i_manufact_id,i_brand,i_category,i_item_sk,i_class] [i_manufact_id,i_brand,i_category,i_item_sk,i_class] + Scan parquet default.item [i_class,i_manufact_id,i_item_sk,i_category,i_brand] [i_class,i_manufact_id,i_item_sk,i_category,i_brand] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt index 217fd8f04..e94d5f4c0 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt @@ -56,15 +56,15 @@ TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customer : +- *(9) FileScan parquet default.store[s_county#10,s_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(10) Project [d_date_sk#7] - +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery12329)) && (d_month_seq#32 <= Subquery subquery12330)) && isnotnull(d_date_sk#7)) - : :- Subquery subquery12329 + +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery10089)) && (d_month_seq#32 <= Subquery subquery10090)) && isnotnull(d_date_sk#7)) + : :- Subquery subquery10089 : : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) : : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] : : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) : : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - : +- Subquery subquery12330 + : +- Subquery subquery10090 : +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) : +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) : +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) @@ -72,14 +72,14 @@ TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customer : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct +- *(10) FileScan parquet default.date_dim[d_date_sk#7,d_month_seq#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct - :- Subquery subquery12329 + :- Subquery subquery10089 : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - +- Subquery subquery12330 + +- Subquery subquery10090 +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt index b86ab1b6a..3707aa2f8 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt @@ -14,7 +14,7 @@ TakeOrderedAndProject [segment,num_customers,segment_base] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price] BroadcastHashJoin [ca_county,ca_state,s_county,s_state] - Project [ca_state,c_customer_sk,ss_sold_date_sk,ca_county,ss_ext_sales_price] + Project [ca_state,c_customer_sk,ss_ext_sales_price,ca_county,ss_sold_date_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] Project [c_customer_sk,c_current_addr_sk,ss_sold_date_sk,ss_ext_sales_price] BroadcastHashJoin [c_customer_sk,ss_customer_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt index 33903ea48..e5ff08959 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt @@ -1,10 +1,10 @@ TakeOrderedAndProject [ext_price,brand_id,brand] WholeStageCodegen - HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,sum(UnscaledValue(ss_ext_sales_price)),ext_price,brand_id,sum] + HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,ext_price,sum(UnscaledValue(ss_ext_sales_price)),sum,brand_id] InputAdapter Exchange [i_brand,i_brand_id] #1 WholeStageCodegen - HashAggregate [i_brand_id,ss_ext_sales_price,sum,sum,i_brand] [sum,sum] + HashAggregate [i_brand,sum,sum,ss_ext_sales_price,i_brand_id] [sum,sum] Project [ss_ext_sales_price,i_brand_id,i_brand] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_item_sk,ss_ext_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt index eb5871bab..275c06c2d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt @@ -21,8 +21,8 @@ TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast( : : : :- *(4) Project [i_brand#5, i_category#4, cs_sold_date_sk#25, cs_call_center_sk#23, cs_sales_price#22] : : : : +- *(4) BroadcastHashJoin [i_item_sk#27], [cs_item_sk#28], Inner, BuildRight : : : : :- *(4) Project [i_item_sk#27, i_brand#5, i_category#4] - : : : : : +- *(4) Filter ((isnotnull(i_item_sk#27) && isnotnull(i_category#4)) && isnotnull(i_brand#5)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#27,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)], ReadSchema: struct + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#27) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#27,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) : : : : +- *(1) Project [cs_sold_date_sk#25, cs_call_center_sk#23, cs_item_sk#28, cs_sales_price#22] : : : : +- *(1) Filter ((isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#25)) && isnotnull(cs_call_center_sk#23)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt index b482f3f5f..d41aaf1e2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt @@ -1,35 +1,35 @@ -TakeOrderedAndProject [i_category,nsum,cc_name,d_year,psum,sum_sales,avg_monthly_sales,i_brand,d_moy] +TakeOrderedAndProject [d_year,avg_monthly_sales,nsum,d_moy,sum_sales,i_category,i_brand,cc_name,psum] WholeStageCodegen - Project [i_category,d_year,avg_monthly_sales,sum_sales,sum_sales,sum_sales,i_brand,cc_name,d_moy] - BroadcastHashJoin [i_category,i_brand,i_category,rn,cc_name,rn,i_brand,cc_name] - Project [cc_name,d_moy,sum_sales,sum_sales,d_year,rn,avg_monthly_sales,i_category,i_brand] - BroadcastHashJoin [i_category,i_category,i_brand,rn,cc_name,rn,i_brand,cc_name] - Project [cc_name,d_moy,sum_sales,d_year,rn,avg_monthly_sales,i_category,i_brand] + Project [sum_sales,d_year,sum_sales,d_moy,sum_sales,i_category,i_brand,cc_name,avg_monthly_sales] + BroadcastHashJoin [cc_name,i_brand,rn,rn,i_category,i_category,i_brand,cc_name] + Project [d_year,d_moy,sum_sales,rn,i_brand,cc_name,sum_sales,i_category,avg_monthly_sales] + BroadcastHashJoin [i_category,cc_name,rn,i_category,rn,i_brand,cc_name,i_brand] + Project [d_year,d_moy,sum_sales,rn,i_brand,cc_name,i_category,avg_monthly_sales] Filter [avg_monthly_sales,sum_sales,rn] InputAdapter - Window [cc_name,d_year,i_category,i_brand,_w0] + Window [d_year,i_brand,cc_name,i_category,_w0] WholeStageCodegen Filter [d_year] InputAdapter - Window [cc_name,d_moy,d_year,i_category,i_brand] + Window [d_year,d_moy,i_brand,cc_name,i_category] WholeStageCodegen - Sort [cc_name,d_moy,d_year,i_category,i_brand] + Sort [d_year,d_moy,i_brand,cc_name,i_category] InputAdapter Exchange [i_category,i_brand,cc_name] #1 WholeStageCodegen - HashAggregate [sum(UnscaledValue(cs_sales_price)),cc_name,d_moy,sum,d_year,i_category,i_brand] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] + HashAggregate [d_year,d_moy,sum(UnscaledValue(cs_sales_price)),i_brand,sum,cc_name,i_category] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] InputAdapter - Exchange [cc_name,d_moy,d_year,i_category,i_brand] #2 + Exchange [d_year,d_moy,i_brand,cc_name,i_category] #2 WholeStageCodegen - HashAggregate [cc_name,d_moy,cs_sales_price,sum,sum,d_year,i_category,i_brand] [sum,sum] - Project [d_year,i_brand,i_category,cc_name,d_moy,cs_sales_price] + HashAggregate [d_year,d_moy,i_brand,sum,sum,cc_name,i_category,cs_sales_price] [sum,sum] + Project [d_moy,d_year,cs_sales_price,i_category,cc_name,i_brand] BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] - Project [cs_call_center_sk,d_year,i_brand,i_category,d_moy,cs_sales_price] + Project [d_moy,d_year,cs_sales_price,i_category,i_brand,cs_call_center_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_call_center_sk,i_brand,i_category,cs_sold_date_sk,cs_sales_price] + Project [cs_sales_price,cs_sold_date_sk,i_category,i_brand,cs_call_center_sk] BroadcastHashJoin [i_item_sk,cs_item_sk] Project [i_item_sk,i_brand,i_category] - Filter [i_item_sk,i_category,i_brand] + Filter [i_item_sk,i_brand,i_category] Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] InputAdapter BroadcastExchange #3 @@ -52,26 +52,26 @@ TakeOrderedAndProject [i_category,nsum,cc_name,d_year,psum,sum_sales,avg_monthly InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [i_category,sum_sales,rn,cc_name,i_brand] + Project [cc_name,sum_sales,i_brand,rn,i_category] Filter [rn] InputAdapter - Window [i_category,d_year,d_moy,cc_name,i_brand] + Window [cc_name,i_brand,d_moy,d_year,i_category] WholeStageCodegen - Sort [i_category,d_year,d_moy,cc_name,i_brand] + Sort [cc_name,i_brand,d_moy,d_year,i_category] InputAdapter Exchange [i_category,i_brand,cc_name] #7 WholeStageCodegen - HashAggregate [sum(UnscaledValue(cs_sales_price)),i_category,d_year,d_moy,sum,cc_name,i_brand] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] + HashAggregate [sum(UnscaledValue(cs_sales_price)),sum,cc_name,i_brand,d_moy,d_year,i_category] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] InputAdapter - ReusedExchange [i_category,sum,d_year,d_moy,cc_name,i_brand] [i_category,sum,d_year,d_moy,cc_name,i_brand] #2 + ReusedExchange [cc_name,i_brand,d_moy,sum,d_year,i_category] [cc_name,i_brand,d_moy,sum,d_year,i_category] #2 InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [i_brand,i_category,cc_name,sum_sales,rn] + Project [cc_name,rn,i_brand,i_category,sum_sales] Filter [rn] InputAdapter - Window [i_brand,i_category,cc_name,d_moy,d_year] + Window [cc_name,d_year,i_brand,i_category,d_moy] WholeStageCodegen - Sort [i_brand,i_category,cc_name,d_moy,d_year] + Sort [cc_name,d_year,i_brand,i_category,d_moy] InputAdapter - ReusedExchange [i_brand,i_category,cc_name,d_moy,sum_sales,d_year] [i_brand,i_category,cc_name,d_moy,sum_sales,d_year] #7 + ReusedExchange [cc_name,d_year,i_brand,i_category,d_moy,sum_sales] [cc_name,d_year,i_brand,i_category,d_moy,sum_sales] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt index dacb7f6b6..db623ff8f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt @@ -27,13 +27,13 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) : : +- *(2) Project [d_date#17 AS d_date#17#18] - : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery14656)) - : : : +- Subquery subquery14656 + : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12416)) + : : : +- Subquery subquery12416 : : : +- *(1) Project [d_week_seq#19] : : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - : : +- Subquery subquery14656 + : : +- Subquery subquery12416 : : +- *(1) Project [d_week_seq#19] : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct @@ -58,13 +58,13 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : : +- *(7) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) : +- *(6) Project [d_date#17 AS d_date#17#23] - : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery14660)) - : : +- Subquery subquery14660 + : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12420)) + : : +- Subquery subquery12420 : : +- *(1) Project [d_week_seq#19] : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct : +- *(6) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - : +- Subquery subquery14660 + : +- Subquery subquery12420 : +- *(1) Project [d_week_seq#19] : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct @@ -89,13 +89,13 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : +- *(12) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) +- *(11) Project [d_date#17 AS d_date#17#27] - +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery14664)) - : +- Subquery subquery14664 + +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12424)) + : +- Subquery subquery12424 : +- *(1) Project [d_week_seq#19] : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct +- *(11) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - +- Subquery subquery14664 + +- Subquery subquery12424 +- *(1) Project [d_week_seq#19] +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt index c6611f36d..fd4503647 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt @@ -1,7 +1,7 @@ -TakeOrderedAndProject [ws_dev,cs_dev,item_id,ws_item_rev,cs_item_rev,average,ss_dev,ss_item_rev] +TakeOrderedAndProject [cs_item_rev,item_id,ws_dev,ws_item_rev,average,cs_dev,ss_item_rev,ss_dev] WholeStageCodegen Project [item_id,ss_item_rev,cs_item_rev,ws_item_rev] - BroadcastHashJoin [item_id,ws_item_rev,item_id,cs_item_rev,ss_item_rev] + BroadcastHashJoin [cs_item_rev,item_id,ws_item_rev,item_id,ss_item_rev] Project [item_id,ss_item_rev,cs_item_rev] BroadcastHashJoin [item_id,item_id,ss_item_rev,cs_item_rev] Filter [ss_item_rev] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt index e7542c50d..0ffe4eb4e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt @@ -1,16 +1,16 @@ -TakeOrderedAndProject [(sun_sales1 / sun_sales2),s_store_name1,d_week_seq1,(thu_sales1 / thu_sales2),(fri_sales1 / fri_sales2),(tue_sales1 / tue_sales2),(sat_sales1 / sat_sales2),(wed_sales1 / wed_sales2),(mon_sales1 / mon_sales2),s_store_id1] +TakeOrderedAndProject [(wed_sales1 / wed_sales2),d_week_seq1,(mon_sales1 / mon_sales2),(thu_sales1 / thu_sales2),(sat_sales1 / sat_sales2),s_store_name1,(sun_sales1 / sun_sales2),s_store_id1,(fri_sales1 / fri_sales2),(tue_sales1 / tue_sales2)] WholeStageCodegen - Project [sun_sales2,thu_sales1,s_store_name1,sat_sales1,tue_sales2,mon_sales2,d_week_seq1,thu_sales2,wed_sales2,wed_sales1,sat_sales2,mon_sales1,sun_sales1,fri_sales2,fri_sales1,tue_sales1,s_store_id1] + Project [sat_sales1,sat_sales2,sun_sales2,tue_sales1,d_week_seq1,mon_sales1,fri_sales1,wed_sales1,thu_sales1,mon_sales2,s_store_name1,s_store_id1,wed_sales2,thu_sales2,fri_sales2,tue_sales2,sun_sales1] BroadcastHashJoin [s_store_id1,d_week_seq1,s_store_id2,d_week_seq2] - Project [sat_sales,s_store_id,mon_sales,sun_sales,s_store_name,fri_sales,wed_sales,tue_sales,d_week_seq,thu_sales] + Project [fri_sales,tue_sales,sat_sales,s_store_id,sun_sales,d_week_seq,wed_sales,mon_sales,s_store_name,thu_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - Project [fri_sales,d_week_seq,thu_sales,wed_sales,mon_sales,s_store_name,sat_sales,s_store_id,sun_sales,tue_sales] + Project [s_store_id,s_store_name,thu_sales,sun_sales,fri_sales,tue_sales,d_week_seq,wed_sales,sat_sales,mon_sales] BroadcastHashJoin [ss_store_sk,s_store_sk] - HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),d_week_seq,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,ss_store_sk,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum,sum] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),fri_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),thu_sales,sum,wed_sales,mon_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sat_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sun_sales,sum,sum,tue_sales] + HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,sum,ss_store_sk,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),thu_sales,sun_sales,sum,fri_sales,tue_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,wed_sales,sat_sales,sum,mon_sales,sum] InputAdapter Exchange [d_week_seq,ss_store_sk] #1 WholeStageCodegen - HashAggregate [sum,d_week_seq,sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,ss_store_sk,sum,ss_sales_price,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + HashAggregate [sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,ss_store_sk,d_week_seq,ss_sales_price,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] Project [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_store_sk,ss_sales_price] @@ -37,13 +37,13 @@ TakeOrderedAndProject [(sun_sales1 / sun_sales2),s_store_name1,d_week_seq1,(thu_ InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [sat_sales,s_store_id,mon_sales,sun_sales,fri_sales,wed_sales,tue_sales,d_week_seq,thu_sales] + Project [fri_sales,tue_sales,sat_sales,s_store_id,sun_sales,d_week_seq,wed_sales,mon_sales,thu_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - Project [fri_sales,d_week_seq,thu_sales,wed_sales,mon_sales,sat_sales,s_store_id,sun_sales,tue_sales] + Project [s_store_id,thu_sales,sun_sales,fri_sales,tue_sales,d_week_seq,wed_sales,sat_sales,mon_sales] BroadcastHashJoin [ss_store_sk,s_store_sk] - HashAggregate [sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),d_week_seq,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),ss_store_sk,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum,sum] [sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),fri_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,thu_sales,wed_sales,mon_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sat_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum,sun_sales,sum,tue_sales] + HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,ss_store_sk,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),thu_sales,sum,sun_sales,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,wed_sales,sat_sales,sum,mon_sales] InputAdapter - ReusedExchange [sum,d_week_seq,sum,sum,sum,ss_store_sk,sum,sum,sum] [sum,d_week_seq,sum,sum,sum,ss_store_sk,sum,sum,sum] #1 + ReusedExchange [sum,sum,sum,ss_store_sk,d_week_seq,sum,sum,sum,sum] [sum,sum,sum,ss_store_sk,d_week_seq,sum,sum,sum,sum] #1 InputAdapter BroadcastExchange #6 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt index c8985e93a..eb06bf9b7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt @@ -26,8 +26,8 @@ TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state# : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#5,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [d_date_sk#8] - : +- *(3) Filter ((isnotnull(d_month_seq#13) && (d_month_seq#13 = Subquery subquery3222)) && isnotnull(d_date_sk#8)) - : : +- Subquery subquery3222 + : +- *(3) Filter ((isnotnull(d_month_seq#13) && (d_month_seq#13 = Subquery subquery982)) && isnotnull(d_date_sk#8)) + : : +- Subquery subquery982 : : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) : : +- Exchange hashpartitioning(d_month_seq#13, 5) : : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) @@ -35,7 +35,7 @@ TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state# : : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) : : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct - : +- Subquery subquery3222 + : +- Subquery subquery982 : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) : +- Exchange hashpartitioning(d_month_seq#13, 5) : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt index 72f771ed1..59ea8c0d9 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt @@ -2,7 +2,7 @@ TakeOrderedAndProject [cnt,state] WholeStageCodegen Project [state,cnt] Filter [count(1)] - HashAggregate [ca_state,count,count(1)] [cnt,count(1),count(1),state,count] + HashAggregate [ca_state,count,count(1)] [state,cnt,count(1),count,count(1)] InputAdapter Exchange [ca_state] #1 WholeStageCodegen @@ -68,11 +68,11 @@ TakeOrderedAndProject [cnt,state] InputAdapter BroadcastExchange #7 WholeStageCodegen - HashAggregate [i_category,sum,count,avg(UnscaledValue(i_current_price))] [count,i_category,avg(UnscaledValue(i_current_price)),avg(i_current_price),sum] + HashAggregate [i_category,sum,count,avg(UnscaledValue(i_current_price))] [avg(i_current_price),sum,i_category,count,avg(UnscaledValue(i_current_price))] InputAdapter Exchange [i_category] #8 WholeStageCodegen - HashAggregate [count,count,i_category,i_current_price,sum,sum] [sum,count,sum,count] + HashAggregate [sum,sum,count,i_category,i_current_price,count] [sum,count,sum,count] Project [i_current_price,i_category] Filter [i_category] Scan parquet default.item [i_current_price,i_category] [i_current_price,i_category] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt index 7d6d31c1d..39a31d2ee 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt @@ -19,11 +19,11 @@ TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(1 BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_ext_sales_price] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] - Filter [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_store_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] + Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] + Filter [ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_item_sk] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen @@ -77,9 +77,9 @@ TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(1 BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] + Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price] + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] InputAdapter ReusedExchange [s_store_sk] [s_store_sk] #2 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt index f487c91aa..a6a12999a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt @@ -1,21 +1,21 @@ -TakeOrderedAndProject [91 - 120 days ,web_name,>120 days ,30 days ,61 - 90 days ,substring(w_warehouse_name, 1, 20),31 - 60 days ,sm_type] +TakeOrderedAndProject [web_name,substring(w_warehouse_name, 1, 20),91 - 120 days ,31 - 60 days ,>120 days ,30 days ,sm_type,61 - 90 days ] WholeStageCodegen - HashAggregate [sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum,sum,web_name,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,sum,sum,sm_type,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint))] [91 - 120 days ,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum,sum,>120 days ,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),30 days ,61 - 90 days ,substring(w_warehouse_name, 1, 20),31 - 60 days ,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,sum,sum,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint))] + HashAggregate [web_name,sum,sum,sum,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum,sm_type,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint))] [substring(w_warehouse_name, 1, 20),sum,sum,91 - 120 days ,31 - 60 days ,>120 days ,sum,30 days ,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint))] InputAdapter Exchange [substring(w_warehouse_name, 1, 20),sm_type,web_name] #1 WholeStageCodegen - HashAggregate [sum,sum,w_warehouse_name,sum,sum,web_name,sum,substring(w_warehouse_name, 1, 20),sum,ws_ship_date_sk,ws_sold_date_sk,sum,sum,sum,sum,sm_type] [sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum] - Project [web_name,ws_ship_date_sk,w_warehouse_name,sm_type,ws_sold_date_sk] + HashAggregate [web_name,sum,sum,ws_sold_date_sk,w_warehouse_name,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sm_type,sum,sum,ws_ship_date_sk] [sum,sum,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum] + Project [web_name,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] BroadcastHashJoin [ws_ship_date_sk,d_date_sk] - Project [web_name,ws_ship_date_sk,w_warehouse_name,sm_type,ws_sold_date_sk] + Project [web_name,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_ship_date_sk,ws_web_site_sk,w_warehouse_name,sm_type,ws_sold_date_sk] + Project [ws_web_site_sk,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] - Project [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,w_warehouse_name,ws_sold_date_sk] + Project [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,w_warehouse_name] BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] - Project [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] + Project [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk] - Scan parquet default.web_sales [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt index 1bc6b52a7..941d3fe37 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt @@ -13,7 +13,7 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] InputAdapter Exchange [i_manager_id,d_moy] #2 WholeStageCodegen - HashAggregate [sum,i_manager_id,d_moy,sum,ss_sales_price] [sum,sum] + HashAggregate [d_moy,sum,ss_sales_price,i_manager_id,sum] [sum,sum] Project [i_manager_id,ss_sales_price,d_moy] BroadcastHashJoin [ss_store_sk,s_store_sk] Project [i_manager_id,ss_store_sk,ss_sales_price,d_moy] @@ -22,7 +22,7 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_manager_id] Filter [i_category,i_class,i_brand,i_item_sk] - Scan parquet default.item [i_manager_id,i_brand,i_category,i_item_sk,i_class] [i_manager_id,i_brand,i_category,i_item_sk,i_class] + Scan parquet default.item [i_class,i_item_sk,i_manager_id,i_category,i_brand] [i_class,i_item_sk,i_manager_id,i_category,i_brand] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt index 9ff14d371..7336416f6 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt @@ -3,50 +3,50 @@ WholeStageCodegen InputAdapter Exchange [product_name,store_name,cnt] #1 WholeStageCodegen - Project [b_streen_name,s2,s3,b_street_number,c_street_name,b_city,s1,syear,cnt,s2,b_zip,c_city,c_street_number,s1,syear,product_name,c_zip,store_zip,s3,cnt,store_name] - BroadcastHashJoin [store_zip,item_sk,cnt,item_sk,store_name,store_zip,cnt,store_name] - HashAggregate [ca_street_number,sum(UnscaledValue(ss_list_price)),ca_street_name,ca_city,s_store_name,s_zip,count(1),ca_street_number,ca_zip,d_year,i_item_sk,ca_city,d_year,sum,ca_street_name,sum,ca_zip,sum,d_year,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_wholesale_cost)),i_product_name,count] [store_name,sum(UnscaledValue(ss_list_price)),s3,b_streen_name,c_city,count(1),product_name,b_street_number,c_street_number,b_city,s1,store_zip,cnt,s2,item_sk,b_zip,c_zip,sum,sum,sum,c_street_name,syear,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_wholesale_cost)),count] + Project [cnt,b_streen_name,product_name,b_city,b_zip,syear,c_street_name,store_zip,c_street_number,s1,s3,c_zip,s3,s1,b_street_number,s2,store_name,cnt,s2,syear,c_city] + BroadcastHashJoin [cnt,store_zip,store_name,store_zip,store_name,item_sk,cnt,item_sk] + HashAggregate [d_year,ca_street_name,count,sum(UnscaledValue(ss_coupon_amt)),sum,ca_street_number,ca_street_name,sum(UnscaledValue(ss_list_price)),ca_zip,count(1),d_year,ca_city,sum,i_item_sk,sum,ca_street_number,d_year,ca_city,s_store_name,s_zip,sum(UnscaledValue(ss_wholesale_cost)),ca_zip,i_product_name] [c_city,b_streen_name,count,b_zip,sum(UnscaledValue(ss_coupon_amt)),sum,b_city,c_street_name,s3,sum(UnscaledValue(ss_list_price)),cnt,b_street_number,c_street_number,item_sk,count(1),store_zip,s2,product_name,sum,sum,store_name,c_zip,sum(UnscaledValue(ss_wholesale_cost)),s1,syear] InputAdapter - Exchange [ca_street_number,ca_street_name,ca_city,s_store_name,s_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_city,d_year,ca_street_name,ca_zip,d_year,i_product_name] #2 + Exchange [d_year,ca_street_name,ca_street_number,ca_street_name,ca_zip,d_year,ca_city,i_item_sk,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] #2 WholeStageCodegen - HashAggregate [ca_street_number,count,ca_street_name,sum,ca_city,s_store_name,s_zip,ss_wholesale_cost,ss_coupon_amt,ca_street_number,sum,ca_zip,d_year,sum,i_item_sk,ca_city,d_year,sum,ca_street_name,sum,ca_zip,sum,ss_list_price,d_year,i_product_name,count] [count,sum,sum,sum,sum,sum,sum,count] - Project [ca_city,d_year,i_product_name,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,s_zip,i_item_sk,ca_street_number,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] + HashAggregate [d_year,ca_street_name,count,sum,sum,ca_street_number,ss_wholesale_cost,ca_street_name,sum,count,ca_zip,ss_coupon_amt,d_year,sum,ca_city,ss_list_price,sum,i_item_sk,sum,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] [count,sum,sum,sum,count,sum,sum,sum] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,d_year,ca_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_street_name,ca_city,i_product_name,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,hd_income_band_sk,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,hd_income_band_sk,d_year,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_addr_sk,ss_item_sk,s_zip,hd_income_band_sk,hd_income_band_sk,s_store_name,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,hd_income_band_sk,d_year,s_zip,hd_income_band_sk,d_year,ss_wholesale_cost] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_addr_sk,ss_item_sk,s_zip,hd_income_band_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,hd_income_band_sk,d_year,s_zip,d_year,ss_wholesale_cost] BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_addr_sk,ss_item_sk,s_zip,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,cd_marital_status,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,cd_marital_status,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,c_first_shipto_date_sk,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,c_current_addr_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,c_first_shipto_date_sk,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,c_first_sales_date_sk,ss_coupon_amt,c_current_addr_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,c_first_sales_date_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,ss_coupon_amt] + Project [ss_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ss_customer_sk,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_store_sk,ss_coupon_amt] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,d_year,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_item_sk,cs_item_sk] - Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] - Filter [ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_store_sk,ss_sold_date_sk,ss_addr_sk,ss_cdemo_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_ticket_number,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -58,12 +58,12 @@ WholeStageCodegen WholeStageCodegen Project [cs_item_sk] Filter [sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2)))] - HashAggregate [sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),cs_item_sk,sum,sum(UnscaledValue(cs_ext_list_price))] [sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(cs_ext_list_price),sum,sum(UnscaledValue(cs_ext_list_price))] + HashAggregate [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),cs_item_sk,sum,sum(UnscaledValue(cs_ext_list_price)),sum] [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum,sum(UnscaledValue(cs_ext_list_price)),sum] InputAdapter Exchange [cs_item_sk] #5 WholeStageCodegen - HashAggregate [sum,cs_ext_list_price,sum,sum,cs_item_sk,cr_store_credit,cr_refunded_cash,sum,cr_reversed_charge] [sum,sum,sum,sum] - Project [cs_ext_list_price,cr_refunded_cash,cr_store_credit,cs_item_sk,cr_reversed_charge] + HashAggregate [sum,sum,cs_item_sk,sum,cr_refunded_cash,cr_reversed_charge,cr_store_credit,sum,cs_ext_list_price] [sum,sum,sum,sum] + Project [cr_store_credit,cs_item_sk,cr_refunded_cash,cs_ext_list_price,cr_reversed_charge] BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] Project [cs_item_sk,cs_order_number,cs_ext_list_price] Filter [cs_item_sk,cs_order_number] @@ -71,9 +71,9 @@ WholeStageCodegen InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [cr_order_number,cr_refunded_cash,cr_store_credit,cr_item_sk,cr_reversed_charge] + Project [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_order_number,cr_refunded_cash,cr_store_credit,cr_item_sk,cr_reversed_charge] [cr_order_number,cr_refunded_cash,cr_store_credit,cr_item_sk,cr_reversed_charge] + Scan parquet default.catalog_returns [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen @@ -89,9 +89,9 @@ WholeStageCodegen InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_current_addr_sk] - Filter [c_first_shipto_date_sk,c_current_hdemo_sk,c_current_cdemo_sk,c_customer_sk,c_current_addr_sk,c_first_sales_date_sk] - Scan parquet default.customer [c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_current_addr_sk] [c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_current_addr_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] + Filter [c_current_cdemo_sk,c_customer_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_addr_sk] + Scan parquet default.customer [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] InputAdapter BroadcastExchange #10 WholeStageCodegen @@ -125,11 +125,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #14 WholeStageCodegen - Project [ca_address_sk,ca_city,ca_street_number,ca_zip,ca_street_name] + Project [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_city,ca_street_number,ca_zip,ca_street_name] [ca_address_sk,ca_city,ca_street_number,ca_zip,ca_street_name] + Scan parquet default.customer_address [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] InputAdapter - ReusedExchange [ca_city,ca_street_name,ca_address_sk,ca_street_number,ca_zip] [ca_city,ca_street_name,ca_address_sk,ca_street_number,ca_zip] #14 + ReusedExchange [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] #14 InputAdapter BroadcastExchange #15 WholeStageCodegen @@ -147,48 +147,48 @@ WholeStageCodegen InputAdapter BroadcastExchange #17 WholeStageCodegen - HashAggregate [ca_street_number,sum(UnscaledValue(ss_list_price)),ca_street_name,ca_city,s_store_name,s_zip,count(1),sum,ca_street_number,ca_zip,d_year,sum,sum,i_item_sk,ca_city,d_year,ca_street_name,ca_zip,d_year,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_wholesale_cost)),i_product_name,count] [sum(UnscaledValue(ss_list_price)),s2,s3,count(1),sum,syear,cnt,store_zip,sum,item_sk,sum,store_name,s1,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_wholesale_cost)),count] + HashAggregate [d_year,ca_street_name,count,sum(UnscaledValue(ss_coupon_amt)),ca_street_number,sum,ca_street_name,sum(UnscaledValue(ss_list_price)),ca_zip,count(1),d_year,ca_city,sum,i_item_sk,ca_street_number,sum,d_year,ca_city,s_store_name,s_zip,sum(UnscaledValue(ss_wholesale_cost)),ca_zip,i_product_name] [count,sum(UnscaledValue(ss_coupon_amt)),syear,s3,sum,sum(UnscaledValue(ss_list_price)),count(1),store_name,cnt,sum,s2,item_sk,s1,sum,store_zip,sum(UnscaledValue(ss_wholesale_cost))] InputAdapter - Exchange [ca_street_number,ca_street_name,ca_city,s_store_name,s_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_city,d_year,ca_street_name,ca_zip,d_year,i_product_name] #18 + Exchange [d_year,ca_street_name,ca_street_number,ca_street_name,ca_zip,d_year,ca_city,i_item_sk,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] #18 WholeStageCodegen - HashAggregate [ca_street_number,count,ca_street_name,sum,ca_city,s_store_name,s_zip,sum,ss_wholesale_cost,ss_coupon_amt,ca_street_number,ca_zip,d_year,sum,sum,i_item_sk,ca_city,d_year,ca_street_name,ca_zip,sum,ss_list_price,sum,d_year,i_product_name,count] [count,sum,sum,sum,sum,sum,sum,count] - Project [ca_city,d_year,i_product_name,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,s_zip,i_item_sk,ca_street_number,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] + HashAggregate [d_year,ca_street_name,count,ca_street_number,ss_wholesale_cost,sum,sum,ca_street_name,count,ca_zip,ss_coupon_amt,d_year,ca_city,ss_list_price,sum,sum,i_item_sk,sum,ca_street_number,sum,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] [count,sum,sum,count,sum,sum,sum,sum] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,d_year,ca_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_street_name,ca_city,i_product_name,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,d_year,ca_street_name,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,ca_street_number,ca_zip] + Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,hd_income_band_sk,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ca_city,ss_item_sk,s_zip,ca_street_number,hd_income_band_sk,hd_income_band_sk,ca_zip,s_store_name,ca_street_name,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,hd_income_band_sk,d_year,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_addr_sk,ss_item_sk,s_zip,hd_income_band_sk,hd_income_band_sk,s_store_name,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,hd_income_band_sk,d_year,s_zip,hd_income_band_sk,d_year,ss_wholesale_cost] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_addr_sk,ss_item_sk,s_zip,hd_income_band_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,hd_income_band_sk,d_year,s_zip,d_year,ss_wholesale_cost] BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_addr_sk,ss_item_sk,s_zip,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,cd_marital_status,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,cd_marital_status,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,d_year,c_current_addr_sk] + Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,c_first_shipto_date_sk,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,d_year,ss_coupon_amt,c_current_addr_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,c_first_shipto_date_sk,ss_hdemo_sk,ss_cdemo_sk,c_current_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,c_current_hdemo_sk,c_first_sales_date_sk,ss_coupon_amt,c_current_addr_sk] + Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,c_first_sales_date_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,s_zip,ss_promo_sk,s_store_name,ss_coupon_amt] + Project [ss_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ss_customer_sk,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [d_year,ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_store_sk,ss_coupon_amt] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,d_year,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_item_sk,cs_item_sk] - Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Project [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] - Filter [ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_store_sk,ss_sold_date_sk,ss_addr_sk,ss_cdemo_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] [ss_list_price,ss_wholesale_cost,ss_customer_sk,ss_hdemo_sk,ss_cdemo_sk,ss_addr_sk,ss_ticket_number,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_coupon_amt] + Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_ticket_number,ss_addr_sk,ss_item_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] InputAdapter ReusedExchange [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] #3 InputAdapter @@ -202,7 +202,7 @@ WholeStageCodegen InputAdapter ReusedExchange [s_store_sk,s_store_name,s_zip] [s_store_sk,s_store_name,s_zip] #8 InputAdapter - ReusedExchange [c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_current_addr_sk] [c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_current_addr_sk] #9 + ReusedExchange [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] #9 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 InputAdapter @@ -218,9 +218,9 @@ WholeStageCodegen InputAdapter ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 InputAdapter - ReusedExchange [ca_address_sk,ca_city,ca_street_number,ca_zip,ca_street_name] [ca_address_sk,ca_city,ca_street_number,ca_zip,ca_street_name] #14 + ReusedExchange [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] #14 InputAdapter - ReusedExchange [ca_city,ca_street_name,ca_address_sk,ca_street_number,ca_zip] [ca_city,ca_street_name,ca_address_sk,ca_street_number,ca_zip] #14 + ReusedExchange [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] #14 InputAdapter ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt index 397ed939a..a83319f19 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt @@ -1,8 +1,8 @@ -TakeOrderedAndProject [i_current_price,s_store_name,i_wholesale_cost,i_item_desc,revenue,i_brand] +TakeOrderedAndProject [i_item_desc,i_brand,revenue,i_wholesale_cost,i_current_price,s_store_name] WholeStageCodegen - Project [i_current_price,s_store_name,i_wholesale_cost,i_item_desc,revenue,i_brand] + Project [i_item_desc,i_brand,revenue,i_wholesale_cost,i_current_price,s_store_name] BroadcastHashJoin [ss_store_sk,ss_store_sk,revenue,ave] - Project [i_wholesale_cost,ss_store_sk,i_current_price,i_brand,s_store_name,revenue,i_item_desc] + Project [s_store_name,i_current_price,ss_store_sk,revenue,i_brand,i_wholesale_cost,i_item_desc] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [s_store_name,ss_store_sk,ss_item_sk,revenue] BroadcastHashJoin [s_store_sk,ss_store_sk] @@ -17,7 +17,7 @@ TakeOrderedAndProject [i_current_price,s_store_name,i_wholesale_cost,i_item_desc InputAdapter Exchange [ss_store_sk,ss_item_sk] #2 WholeStageCodegen - HashAggregate [ss_sales_price,ss_store_sk,sum,ss_item_sk,sum] [sum,sum] + HashAggregate [ss_item_sk,sum,sum,ss_store_sk,ss_sales_price] [sum,sum] Project [ss_item_sk,ss_store_sk,ss_sales_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] @@ -32,9 +32,9 @@ TakeOrderedAndProject [i_current_price,s_store_name,i_wholesale_cost,i_item_desc InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_wholesale_cost,i_current_price,i_brand,i_item_sk,i_item_desc] + Project [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] Filter [i_item_sk] - Scan parquet default.item [i_wholesale_cost,i_current_price,i_brand,i_item_sk,i_item_desc] [i_wholesale_cost,i_current_price,i_brand,i_item_sk,i_item_desc] + Scan parquet default.item [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] InputAdapter BroadcastExchange #5 WholeStageCodegen @@ -42,12 +42,12 @@ TakeOrderedAndProject [i_current_price,s_store_name,i_wholesale_cost,i_item_desc InputAdapter Exchange [ss_store_sk] #6 WholeStageCodegen - HashAggregate [ss_store_sk,count,revenue,count,sum,sum] [sum,count,sum,count] + HashAggregate [sum,sum,count,revenue,ss_store_sk,count] [sum,count,sum,count] HashAggregate [ss_store_sk,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),revenue,sum] InputAdapter Exchange [ss_store_sk,ss_item_sk] #7 WholeStageCodegen - HashAggregate [ss_item_sk,sum,sum,ss_store_sk,ss_sales_price] [sum,sum] + HashAggregate [sum,ss_store_sk,sum,ss_sales_price,ss_item_sk] [sum,sum] Project [ss_item_sk,ss_store_sk,ss_sales_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt index d17316c39..f053b48d2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt @@ -1,35 +1,35 @@ -TakeOrderedAndProject [jan_net,aug_net,jul_sales,sep_net,mar_sales,may_net,feb_net,apr_sales,apr_net,oct_net,nov_net,jun_sales,jul_sales_per_sq_foot,w_warehouse_name,w_warehouse_sq_ft,oct_sales_per_sq_foot,ship_carriers,sep_sales,may_sales_per_sq_foot,mar_net,may_sales,mar_sales_per_sq_foot,w_county,jan_sales,feb_sales,w_country,nov_sales,jun_sales_per_sq_foot,dec_net,jul_net,aug_sales,w_state,dec_sales,year,w_city,oct_sales,jun_net,sep_sales_per_sq_foot,apr_sales_per_sq_foot,feb_sales_per_sq_foot,aug_sales_per_sq_foot,nov_sales_per_sq_foot,jan_sales_per_sq_foot,dec_sales_per_sq_foot] +TakeOrderedAndProject [dec_net,w_city,oct_sales_per_sq_foot,apr_sales_per_sq_foot,mar_sales,jul_sales_per_sq_foot,feb_sales_per_sq_foot,apr_net,ship_carriers,feb_net,may_sales_per_sq_foot,aug_sales,jun_net,w_county,sep_sales_per_sq_foot,feb_sales,mar_sales_per_sq_foot,w_country,year,w_state,oct_net,w_warehouse_sq_ft,dec_sales,may_sales,aug_net,sep_net,mar_net,nov_net,nov_sales_per_sq_foot,oct_sales,jun_sales,w_warehouse_name,may_net,jul_sales,dec_sales_per_sq_foot,jul_net,jan_net,sep_sales,jun_sales_per_sq_foot,jan_sales,nov_sales,apr_sales,jan_sales_per_sq_foot,aug_sales_per_sq_foot] WholeStageCodegen - HashAggregate [sum,sum,sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(jan_net),sum,sum,sum(oct_net),sum,sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),w_warehouse_name,w_warehouse_sq_ft,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),ship_carriers,sum(jul_sales),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,sum(sep_sales),sum(sep_net),sum,sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),w_county,sum(nov_net),sum,sum,sum,sum(mar_sales),sum,sum,w_country,sum,sum(nov_sales),sum(may_net),sum(jun_sales),sum(aug_sales),sum(jul_net),sum(oct_sales),sum,sum(dec_sales),w_state,sum(aug_net),sum,sum(jun_net),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(mar_net),sum,year,w_city,sum,sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_sales),sum,sum(jan_sales),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_net),sum,sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(dec_net),sum,sum(apr_net),sum,sum(apr_sales),sum(may_sales)] [jan_net,sum,sum,aug_net,jul_sales,sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sep_net,sum(jan_net),sum,mar_sales,sum,sum(oct_net),may_net,feb_net,sum,apr_sales,sum,apr_net,sum,sum,oct_net,nov_net,jun_sales,sum,sum,sum,sum,jul_sales_per_sq_foot,sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),oct_sales_per_sq_foot,sum(jul_sales),sep_sales,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,may_sales_per_sq_foot,mar_net,sum,sum(sep_sales),sum(sep_net),may_sales,sum,sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),mar_sales_per_sq_foot,sum(nov_net),sum,sum,sum,sum(mar_sales),jan_sales,sum,sum,feb_sales,nov_sales,sum,jun_sales_per_sq_foot,sum(nov_sales),sum(may_net),sum(jun_sales),sum(aug_sales),sum(jul_net),sum(oct_sales),sum,dec_net,jul_net,sum(dec_sales),aug_sales,sum(aug_net),dec_sales,sum,sum(jun_net),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(mar_net),sum,sum,oct_sales,jun_net,sep_sales_per_sq_foot,sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),apr_sales_per_sq_foot,sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_sales),sum,sum(jan_sales),feb_sales_per_sq_foot,sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_net),sum,sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,aug_sales_per_sq_foot,nov_sales_per_sq_foot,sum(dec_net),sum,sum(apr_net),sum,jan_sales_per_sq_foot,sum(apr_sales),dec_sales_per_sq_foot,sum(may_sales)] + HashAggregate [sum(may_net),sum,w_city,sum,sum(sep_sales),sum(dec_net),sum(jul_net),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,sum,sum(oct_sales),sum,sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(mar_sales),sum(sep_net),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(oct_net),sum,sum(apr_sales),sum(jan_net),ship_carriers,sum(nov_net),sum,sum(jun_sales),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(dec_sales),w_county,sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(may_sales),w_country,year,sum,sum,sum,sum,w_state,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,w_warehouse_sq_ft,sum,sum,sum,sum(aug_sales),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(nov_sales),sum(aug_net),sum(jul_sales),sum,sum(jan_sales),sum,sum,sum(feb_sales),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,w_warehouse_name,sum,sum,sum(jun_net),sum(mar_net),sum,sum,sum,sum,sum,sum(apr_net),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_net)] [sum(may_net),dec_net,sum,sum,sum(sep_sales),sum(dec_net),sum(jul_net),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),oct_sales_per_sq_foot,apr_sales_per_sq_foot,sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,mar_sales,sum,sum(oct_sales),sum,sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(mar_sales),sum(sep_net),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(oct_net),jul_sales_per_sq_foot,feb_sales_per_sq_foot,sum,sum(apr_sales),sum(jan_net),apr_net,feb_net,sum(nov_net),may_sales_per_sq_foot,aug_sales,sum,jun_net,sum(jun_sales),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(dec_sales),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sep_sales_per_sq_foot,feb_sales,sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(may_sales),mar_sales_per_sq_foot,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),oct_net,sum,dec_sales,sum,sum,may_sales,sum,sum(aug_sales),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(nov_sales),aug_net,sum(aug_net),sum(jul_sales),sum,sep_net,mar_net,sum(jan_sales),nov_net,nov_sales_per_sq_foot,sum,oct_sales,sum,sum(feb_sales),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,jun_sales,sum,may_net,jul_sales,sum,dec_sales_per_sq_foot,jul_net,jan_net,sum,sum(jun_net),sum(mar_net),sum,sum,sep_sales,sum,jun_sales_per_sq_foot,jan_sales,nov_sales,apr_sales,sum,sum,jan_sales_per_sq_foot,aug_sales_per_sq_foot,sum(apr_net),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_net)] InputAdapter - Exchange [w_warehouse_name,w_warehouse_sq_ft,ship_carriers,w_county,w_country,w_state,year,w_city] #1 + Exchange [w_city,ship_carriers,w_county,w_country,year,w_state,w_warehouse_sq_ft,w_warehouse_name] #1 WholeStageCodegen - HashAggregate [sum,sum,apr_net,sum,sum,sum,dec_sales,sum,sum,sep_net,sum,sum,sum,sum,jan_sales,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,jul_sales,w_warehouse_name,sum,w_warehouse_sq_ft,sum,sum,sum,sum,sum,sum,ship_carriers,sum,sum,sum,sum,sum,feb_sales,w_county,sum,sum,sum,sum,sum,sum,nov_net,nov_sales,sum,may_sales,sum,w_country,sum,sum,sum,sep_sales,sum,mar_net,jun_sales,may_net,sum,aug_sales,sum,sum,jun_net,sum,jul_net,sum,w_state,mar_sales,jan_net,sum,apr_sales,sum,feb_net,sum,year,w_city,sum,sum,sum,sum,dec_net,sum,sum,sum,oct_net,sum,sum,sum,sum,aug_net,sum,sum,sum,sum,sum,oct_sales] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + HashAggregate [sum,sum,w_city,sum,may_sales,apr_net,sum,sum,sum,jul_net,sum,sum,jan_net,sum,sum,sep_sales,sum,sum,sum,sum,jan_sales,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,aug_net,dec_sales,jun_sales,sum,sum,sum,feb_net,sum,sep_net,ship_carriers,sum,mar_sales,sum,sum,sum,dec_net,w_county,sum,nov_sales,jul_sales,may_net,w_country,sum,oct_sales,sum,year,sum,sum,sum,sum,sum,w_state,sum,sum,mar_net,sum,jun_net,w_warehouse_sq_ft,sum,sum,sum,aug_sales,sum,sum,sum,sum,sum,feb_sales,sum,oct_net,sum,apr_sales,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,nov_net] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] InputAdapter Union WholeStageCodegen - HashAggregate [sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,w_state,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_warehouse_name,sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum,sum,w_city,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,d_year,sum,sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,w_warehouse_sq_ft,sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_country,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_county,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] [sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,aug_sales,sum,oct_sales,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,nov_sales,may_sales,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),oct_net,sep_net,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),aug_net,sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum,jan_sales,sum,mar_sales,year,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),feb_sales,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jul_sales,sum,sum,nov_net,sep_sales,jul_net,may_net,apr_sales,jun_net,dec_net,jan_net,sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,mar_net,feb_net,sum,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),ship_carriers,apr_net,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jun_sales,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),dec_sales,sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] + HashAggregate [sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,d_year,sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_warehouse_sq_ft,w_county,sum,sum,sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_warehouse_name,sum,sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_city,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_state,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_country] [sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,may_sales,oct_sales,nov_net,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),apr_net,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,ship_carriers,sum,sum,sum,dec_sales,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,apr_sales,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,nov_sales,sum,sum,jul_net,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jan_sales,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jun_net,sep_net,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,feb_net,aug_sales,jul_sales,sum,sum,sum,sum,year,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sep_sales,jan_net,sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,may_net,mar_net,sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),aug_net,mar_sales,oct_net,feb_sales,jun_sales,dec_net] InputAdapter - Exchange [w_state,w_warehouse_name,w_city,d_year,w_warehouse_sq_ft,w_country,w_county] #2 + Exchange [d_year,w_warehouse_sq_ft,w_county,w_warehouse_name,w_city,w_state,w_country] #2 WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,d_moy,sum,sum,w_state,sum,w_warehouse_name,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,sum,sum,d_year,sum,sum,sum,sum,w_warehouse_sq_ft,ws_ext_sales_price,ws_quantity,sum,ws_net_paid,sum,sum,sum,sum,w_country,w_county,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [ws_net_paid,d_year,w_city,w_warehouse_name,w_county,w_state,w_warehouse_sq_ft,ws_quantity,ws_ext_sales_price,d_moy,w_country] + HashAggregate [sum,sum,sum,sum,d_year,sum,sum,sum,sum,d_moy,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_sq_ft,w_county,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name,sum,sum,sum,sum,ws_ext_sales_price,sum,w_city,sum,ws_net_paid,sum,sum,sum,w_state,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,ws_quantity,sum,w_country,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_moy,ws_net_paid,d_year,ws_ext_sales_price,ws_quantity,w_county,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] - Project [ws_net_paid,d_year,w_city,ws_ship_mode_sk,w_warehouse_name,w_county,w_state,w_warehouse_sq_ft,ws_quantity,ws_ext_sales_price,d_moy,w_country] + Project [d_moy,ws_net_paid,d_year,ws_ext_sales_price,ws_quantity,w_county,w_country,ws_ship_mode_sk,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] BroadcastHashJoin [ws_sold_time_sk,t_time_sk] - Project [ws_net_paid,d_year,w_city,ws_ship_mode_sk,w_warehouse_name,w_county,ws_sold_time_sk,w_state,w_warehouse_sq_ft,ws_quantity,ws_ext_sales_price,d_moy,w_country] + Project [d_moy,ws_net_paid,d_year,ws_ext_sales_price,ws_quantity,w_county,w_country,ws_ship_mode_sk,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city,ws_sold_time_sk] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_net_paid,w_city,ws_ship_mode_sk,w_warehouse_name,w_county,ws_sold_time_sk,w_state,w_warehouse_sq_ft,ws_quantity,ws_ext_sales_price,w_country,ws_sold_date_sk] + Project [ws_net_paid,ws_ext_sales_price,ws_quantity,w_county,w_country,ws_ship_mode_sk,w_state,ws_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,ws_sold_time_sk] BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] - Project [ws_net_paid,ws_ship_mode_sk,ws_sold_time_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + Project [ws_net_paid,ws_ext_sales_price,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_sold_time_sk] Filter [ws_warehouse_sk,ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk] - Scan parquet default.web_sales [ws_net_paid,ws_ship_mode_sk,ws_sold_time_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] [ws_net_paid,ws_ship_mode_sk,ws_sold_time_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_sold_date_sk] + Scan parquet default.web_sales [ws_net_paid,ws_ext_sales_price,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_sold_time_sk] [ws_net_paid,ws_ext_sales_price,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_sold_time_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [w_city,w_warehouse_name,w_county,w_state,w_warehouse_sk,w_warehouse_sq_ft,w_country] + Project [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_city,w_warehouse_name,w_county,w_state,w_warehouse_sk,w_warehouse_sq_ft,w_country] [w_city,w_warehouse_name,w_county,w_state,w_warehouse_sk,w_warehouse_sq_ft,w_country] + Scan parquet default.warehouse [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -49,24 +49,24 @@ TakeOrderedAndProject [jan_net,aug_net,jul_sales,sep_net,mar_sales,may_net,feb_n Filter [sm_carrier,sm_ship_mode_sk] Scan parquet default.ship_mode [sm_ship_mode_sk,sm_carrier] [sm_ship_mode_sk,sm_carrier] WholeStageCodegen - HashAggregate [sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_state,w_warehouse_name,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_city,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),d_year,sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_warehouse_sq_ft,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_country,sum,w_county,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] [apr_net,ship_carriers,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum,aug_sales,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sep_net,sum,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),may_net,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,aug_net,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),feb_net,sum,mar_sales,apr_sales,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),nov_sales,oct_net,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jan_sales,nov_net,year,sum,dec_net,feb_sales,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,jun_sales,sum,sep_sales,jul_net,may_sales,sum,jan_net,sum,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,jun_net,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jul_sales,dec_sales,sum,oct_sales,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),mar_net,sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] + HashAggregate [sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),d_year,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,w_warehouse_sq_ft,sum,w_county,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_warehouse_name,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_city,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_state,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_country] [sum,jan_sales,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),oct_net,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),feb_sales,sum,sum,sum,feb_net,sum,sum,sep_sales,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,apr_net,sum,ship_carriers,sum,nov_net,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),may_net,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),dec_sales,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),nov_sales,sum,sum,jun_net,oct_sales,sep_net,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jul_sales,aug_sales,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jul_net,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),dec_net,aug_net,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jan_net,mar_net,mar_sales,jun_sales,sum,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),may_sales,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,year,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),apr_sales] InputAdapter - Exchange [w_state,w_warehouse_name,w_city,d_year,w_warehouse_sq_ft,w_country,w_county] #7 + Exchange [d_year,w_warehouse_sq_ft,w_county,w_warehouse_name,w_city,w_state,w_country] #7 WholeStageCodegen - HashAggregate [sum,sum,cs_quantity,sum,sum,sum,sum,sum,sum,sum,sum,sum,d_moy,w_state,w_warehouse_name,sum,sum,sum,sum,sum,sum,cs_net_paid_inc_tax,sum,sum,sum,cs_sales_price,sum,sum,sum,sum,w_city,sum,sum,d_year,sum,sum,w_warehouse_sq_ft,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_country,sum,sum,w_county,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_year,cs_net_paid_inc_tax,w_city,cs_quantity,w_warehouse_name,w_county,w_state,w_warehouse_sq_ft,d_moy,cs_sales_price,w_country] + HashAggregate [sum,sum,sum,d_year,sum,sum,sum,sum,sum,sum,sum,sum,d_moy,sum,sum,sum,sum,w_warehouse_sq_ft,sum,sum,w_county,sum,sum,sum,sum,w_warehouse_name,sum,sum,sum,sum,sum,sum,w_city,sum,sum,sum,sum,w_state,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,cs_sales_price,cs_quantity,sum,sum,cs_net_paid_inc_tax,sum,sum,sum,sum,w_country,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_quantity,d_moy,d_year,w_county,w_country,cs_sales_price,w_state,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city] BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] - Project [d_year,cs_ship_mode_sk,cs_net_paid_inc_tax,w_city,cs_quantity,w_warehouse_name,w_county,w_state,w_warehouse_sq_ft,d_moy,cs_sales_price,w_country] + Project [cs_quantity,d_moy,d_year,w_county,w_country,cs_sales_price,w_state,cs_net_paid_inc_tax,cs_ship_mode_sk,w_warehouse_name,w_warehouse_sq_ft,w_city] BroadcastHashJoin [cs_sold_time_sk,t_time_sk] - Project [d_year,cs_ship_mode_sk,cs_net_paid_inc_tax,w_city,cs_quantity,w_warehouse_name,w_county,cs_sold_time_sk,w_state,w_warehouse_sq_ft,d_moy,cs_sales_price,w_country] + Project [cs_sold_time_sk,cs_quantity,d_moy,d_year,w_county,w_country,cs_sales_price,w_state,cs_net_paid_inc_tax,cs_ship_mode_sk,w_warehouse_name,w_warehouse_sq_ft,w_city] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ship_mode_sk,cs_net_paid_inc_tax,w_city,cs_quantity,w_warehouse_name,w_county,cs_sold_date_sk,cs_sold_time_sk,w_state,w_warehouse_sq_ft,cs_sales_price,w_country] + Project [cs_sold_time_sk,cs_quantity,w_county,w_country,cs_sales_price,w_state,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk,w_warehouse_name,w_warehouse_sq_ft,w_city] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_ship_mode_sk,cs_net_paid_inc_tax,cs_warehouse_sk,cs_quantity,cs_sold_date_sk,cs_sold_time_sk,cs_sales_price] + Project [cs_sold_time_sk,cs_quantity,cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk] Filter [cs_warehouse_sk,cs_sold_date_sk,cs_sold_time_sk,cs_ship_mode_sk] - Scan parquet default.catalog_sales [cs_ship_mode_sk,cs_net_paid_inc_tax,cs_warehouse_sk,cs_quantity,cs_sold_date_sk,cs_sold_time_sk,cs_sales_price] [cs_ship_mode_sk,cs_net_paid_inc_tax,cs_warehouse_sk,cs_quantity,cs_sold_date_sk,cs_sold_time_sk,cs_sales_price] + Scan parquet default.catalog_sales [cs_sold_time_sk,cs_quantity,cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk] [cs_sold_time_sk,cs_quantity,cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk] InputAdapter - ReusedExchange [w_city,w_warehouse_name,w_county,w_state,w_warehouse_sk,w_warehouse_sq_ft,w_country] [w_city,w_warehouse_name,w_county,w_state,w_warehouse_sk,w_warehouse_sq_ft,w_country] #3 + ReusedExchange [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] #3 InputAdapter ReusedExchange [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] #4 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt index 7858fb88a..9381a8892 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt @@ -1,4 +1,4 @@ -TakeOrderedAndProject [i_category,d_moy,i_brand,d_qoy,d_year,s_store_id,sumsales,i_product_name,i_class,rk] +TakeOrderedAndProject [d_qoy,i_category,i_brand,i_class,d_year,d_moy,rk,i_product_name,s_store_id,sumsales] WholeStageCodegen Filter [rk] InputAdapter @@ -8,27 +8,27 @@ TakeOrderedAndProject [i_category,d_moy,i_brand,d_qoy,d_year,s_store_id,sumsales InputAdapter Exchange [i_category] #1 WholeStageCodegen - HashAggregate [sum,spark_grouping_id,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),i_class,s_store_id,i_category,i_brand,d_qoy,i_product_name,d_year,d_moy] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum] + HashAggregate [s_store_id,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),i_category,sum,i_brand,i_product_name,spark_grouping_id,d_moy,i_class,d_year,d_qoy] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum] InputAdapter - Exchange [spark_grouping_id,i_class,s_store_id,i_category,i_brand,d_qoy,i_product_name,d_year,d_moy] #2 + Exchange [s_store_id,i_category,i_brand,i_product_name,spark_grouping_id,d_moy,i_class,d_year,d_qoy] #2 WholeStageCodegen - HashAggregate [sum,spark_grouping_id,sum,ss_quantity,i_class,s_store_id,i_category,i_brand,d_qoy,ss_sales_price,i_product_name,d_year,d_moy] [sum,sum] - Expand [i_class,i_product_name,d_moy,ss_quantity,s_store_id,d_year,i_category,i_brand,d_qoy,ss_sales_price] - Project [s_store_id,d_moy,ss_quantity,d_year,d_qoy,ss_sales_price,i_category,i_brand,i_product_name,i_class] + HashAggregate [s_store_id,i_category,sum,i_brand,ss_sales_price,i_product_name,spark_grouping_id,d_moy,i_class,ss_quantity,d_year,d_qoy,sum] [sum,sum] + Expand [d_year,i_class,d_qoy,d_moy,i_brand,i_category,s_store_id,i_product_name,ss_sales_price,ss_quantity] + Project [d_year,d_qoy,d_moy,i_brand,s_store_id,ss_sales_price,i_category,ss_quantity,i_class,i_product_name] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [d_year,ss_sales_price,ss_item_sk,ss_quantity,d_qoy,s_store_id,d_moy] + Project [s_store_id,d_qoy,ss_quantity,ss_item_sk,d_moy,d_year,ss_sales_price] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [d_year,ss_sales_price,ss_item_sk,ss_quantity,d_qoy,ss_store_sk,d_moy] + Project [d_qoy,ss_quantity,ss_item_sk,d_moy,d_year,ss_store_sk,ss_sales_price] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + Project [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] [ss_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk,d_year,d_moy,d_qoy] Filter [d_month_seq,d_date_sk] - Scan parquet default.date_dim [d_year,d_date_sk,d_month_seq,d_qoy,d_moy] [d_year,d_date_sk,d_month_seq,d_qoy,d_moy] + Scan parquet default.date_dim [d_qoy,d_moy,d_year,d_month_seq,d_date_sk] [d_qoy,d_moy,d_year,d_month_seq,d_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -38,6 +38,6 @@ TakeOrderedAndProject [i_category,d_moy,i_brand,d_qoy,d_year,s_store_id,sumsales InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_product_name,i_brand,i_category,i_item_sk,i_class] + Project [i_class,i_item_sk,i_product_name,i_category,i_brand] Filter [i_item_sk] - Scan parquet default.item [i_product_name,i_brand,i_category,i_item_sk,i_class] [i_product_name,i_brand,i_category,i_item_sk,i_class] + Scan parquet default.item [i_class,i_item_sk,i_product_name,i_category,i_brand] [i_class,i_item_sk,i_product_name,i_category,i_brand] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt index f9955f4e6..1e9ce42b5 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt @@ -1,25 +1,25 @@ -TakeOrderedAndProject [list_price,ss_ticket_number,extended_price,c_last_name,bought_city,extended_tax,c_first_name,ca_city] +TakeOrderedAndProject [bought_city,ss_ticket_number,extended_price,list_price,c_last_name,c_first_name,extended_tax,ca_city] WholeStageCodegen - Project [list_price,ss_ticket_number,extended_price,c_last_name,bought_city,extended_tax,c_first_name,ca_city] + Project [bought_city,ss_ticket_number,extended_price,list_price,c_last_name,c_first_name,extended_tax,ca_city] BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] - Project [list_price,extended_tax,c_last_name,c_first_name,bought_city,extended_price,c_current_addr_sk,ss_ticket_number] + Project [c_current_addr_sk,extended_price,c_last_name,list_price,c_first_name,ss_ticket_number,extended_tax,bought_city] BroadcastHashJoin [ss_customer_sk,c_customer_sk] - HashAggregate [ss_customer_sk,sum(UnscaledValue(ss_ext_tax)),ca_city,sum(UnscaledValue(ss_ext_list_price)),sum,sum,sum(UnscaledValue(ss_ext_sales_price)),ss_addr_sk,sum,ss_ticket_number] [list_price,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_list_price)),extended_tax,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),bought_city,extended_price,sum] + HashAggregate [sum,sum,ca_city,ss_customer_sk,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_sales_price)),ss_ticket_number,ss_addr_sk,sum(UnscaledValue(ss_ext_list_price)),sum] [sum,extended_price,sum,list_price,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_sales_price)),extended_tax,sum(UnscaledValue(ss_ext_list_price)),sum,bought_city] InputAdapter Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 WholeStageCodegen - HashAggregate [ss_customer_sk,ca_city,ss_ext_list_price,sum,ss_ext_sales_price,sum,sum,sum,sum,ss_ext_tax,ss_addr_sk,sum,ss_ticket_number] [sum,sum,sum,sum,sum,sum] - Project [ca_city,ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_ext_sales_price,ss_ext_list_price] + HashAggregate [sum,ss_ext_tax,sum,ca_city,ss_customer_sk,sum,sum,sum,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number,ss_addr_sk,sum] [sum,sum,sum,sum,sum,sum] + Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ca_city,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number] BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_ext_sales_price,ss_ext_list_price] + Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number] BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_ext_sales_price,ss_ext_list_price] + Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ss_ext_sales_price,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_store_sk,ss_ext_sales_price,ss_ext_list_price] + Project [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_ext_list_price] - Filter [ss_hdemo_sk,ss_customer_sk,ss_store_sk,ss_sold_date_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_ext_list_price] [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_tax,ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_ext_list_price] + Project [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + Filter [ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_addr_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt index 260f8bf20..d6e6152f7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt @@ -1,11 +1,11 @@ -TakeOrderedAndProject [cnt2,cd_gender,cd_education_status,cd_credit_rating,cd_purchase_estimate,cd_marital_status,cnt3,cnt1] +TakeOrderedAndProject [cnt3,cnt1,cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender,cnt2] WholeStageCodegen - HashAggregate [count,cd_gender,cd_education_status,cd_credit_rating,cd_purchase_estimate,cd_marital_status,count(1)] [cnt2,count,cnt3,cnt1,count(1)] + HashAggregate [count(1),cd_purchase_estimate,count,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender] [cnt3,count(1),cnt1,count,cnt2] InputAdapter - Exchange [cd_gender,cd_education_status,cd_credit_rating,cd_purchase_estimate,cd_marital_status] #1 + Exchange [cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender] #1 WholeStageCodegen - HashAggregate [count,cd_gender,cd_education_status,cd_credit_rating,cd_purchase_estimate,cd_marital_status,count] [count,count] - Project [cd_gender,cd_education_status,cd_marital_status,cd_credit_rating,cd_purchase_estimate] + HashAggregate [count,cd_purchase_estimate,count,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender] [count,count] + Project [cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] @@ -59,6 +59,6 @@ TakeOrderedAndProject [cnt2,cd_gender,cd_education_status,cd_credit_rating,cd_pu InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [cd_gender,cd_education_status,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate] + Project [cd_demo_sk,cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_gender,cd_education_status,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate] [cd_gender,cd_education_status,cd_marital_status,cd_credit_rating,cd_demo_sk,cd_purchase_estimate] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] [cd_demo_sk,cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt index 1a0f07f81..886619282 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt @@ -16,8 +16,8 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[ : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#14,ss_item_sk#12,ss_cdemo_sk#16,ss_promo_sk#10,ss_quantity#6,ss_list_price#7,ss_sales_price#9,ss_coupon_amt#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)], ReadSchema: struct + : : : +- *(1) Filter ((((((isnotnull(cd_gender#18) && isnotnull(cd_marital_status#19)) && isnotnull(cd_education_status#20)) && (cd_gender#18 = M)) && (cd_marital_status#19 = S)) && (cd_education_status#20 = College)) && isnotnull(cd_demo_sk#17)) + : : : +- *(1) FileScan parquet default.customer_demographics[cd_demo_sk#17,cd_gender#18,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_g..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_date_sk#15] : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt index 81ffc9845..b72978a57 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt @@ -1,26 +1,26 @@ -TakeOrderedAndProject [agg3,agg1,agg4,agg2,i_item_id] +TakeOrderedAndProject [agg2,i_item_id,agg1,agg4,agg3] WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),count,sum,count,sum,avg(cast(ss_quantity as bigint)),count,i_item_id,sum,count,avg(UnscaledValue(ss_list_price)),sum] [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),agg3,agg1,count,sum,count,sum,agg4,agg2,avg(cast(ss_quantity as bigint)),count,sum,count,avg(UnscaledValue(ss_list_price)),sum] + HashAggregate [i_item_id,count,sum,count,avg(UnscaledValue(ss_sales_price)),sum,avg(cast(ss_quantity as bigint)),sum,sum,avg(UnscaledValue(ss_coupon_amt)),count,count,avg(UnscaledValue(ss_list_price))] [agg2,agg1,count,sum,count,avg(UnscaledValue(ss_sales_price)),sum,agg4,avg(cast(ss_quantity as bigint)),sum,sum,avg(UnscaledValue(ss_coupon_amt)),count,agg3,count,avg(UnscaledValue(ss_list_price))] InputAdapter Exchange [i_item_id] #1 WholeStageCodegen - HashAggregate [count,count,count,sum,count,ss_quantity,ss_coupon_amt,sum,sum,sum,count,count,sum,count,ss_sales_price,i_item_id,sum,count,ss_list_price,sum,sum] [count,count,count,sum,count,sum,sum,sum,count,count,sum,count,sum,count,sum,sum] - Project [ss_list_price,ss_sales_price,ss_quantity,i_item_id,ss_coupon_amt] + HashAggregate [count,i_item_id,count,count,sum,count,sum,count,sum,sum,sum,ss_coupon_amt,count,ss_sales_price,sum,sum,ss_list_price,sum,count,ss_quantity,count] [count,count,count,sum,count,sum,count,sum,sum,sum,count,sum,sum,sum,count,count] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_sales_price,i_item_id] BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_list_price,ss_sales_price,ss_quantity,ss_promo_sk,i_item_id,ss_coupon_amt] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_sales_price,ss_promo_sk,i_item_id] BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_list_price,ss_sales_price,ss_item_sk,ss_quantity,ss_promo_sk,ss_coupon_amt] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_promo_sk] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_list_price,ss_sales_price,ss_item_sk,ss_quantity,ss_promo_sk,ss_sold_date_sk,ss_coupon_amt] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_promo_sk] BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_promo_sk,ss_sold_date_sk,ss_coupon_amt] + Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] Filter [ss_cdemo_sk,ss_sold_date_sk,ss_item_sk,ss_promo_sk] - Scan parquet default.store_sales [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_promo_sk,ss_sold_date_sk,ss_coupon_amt] [ss_list_price,ss_sales_price,ss_cdemo_sk,ss_item_sk,ss_quantity,ss_promo_sk,ss_sold_date_sk,ss_coupon_amt] + Scan parquet default.store_sales [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [cd_demo_sk] - Filter [cd_education_status,cd_gender,cd_marital_status,cd_demo_sk] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt index 7808992d6..2ef724de4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [total_sum,lochierarchy,s_state,rank_within_parent,s_county] +TakeOrderedAndProject [total_sum,lochierarchy,s_state,s_county,rank_within_parent] WholeStageCodegen - Project [total_sum,lochierarchy,s_state,rank_within_parent,s_county] + Project [total_sum,lochierarchy,s_state,s_county,rank_within_parent] InputAdapter Window [_w3,_w1,_w2] WholeStageCodegen @@ -8,11 +8,11 @@ TakeOrderedAndProject [total_sum,lochierarchy,s_state,rank_within_parent,s_count InputAdapter Exchange [_w1,_w2] #1 WholeStageCodegen - HashAggregate [sum(UnscaledValue(ss_net_profit)),s_state,spark_grouping_id,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,_w1,lochierarchy,_w3,_w2,sum] + HashAggregate [sum,s_state,s_county,spark_grouping_id,sum(UnscaledValue(ss_net_profit))] [total_sum,sum,lochierarchy,_w3,sum(UnscaledValue(ss_net_profit)),_w2,_w1] InputAdapter Exchange [s_state,s_county,spark_grouping_id] #2 WholeStageCodegen - HashAggregate [s_state,ss_net_profit,sum,spark_grouping_id,s_county,sum] [sum,sum] + HashAggregate [sum,s_state,s_county,spark_grouping_id,sum,ss_net_profit] [sum,sum] Expand [ss_net_profit,s_state,s_county] Project [ss_net_profit,s_state,s_county] BroadcastHashJoin [ss_store_sk,s_store_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt index 5877ab3c2..0153205a5 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt @@ -3,12 +3,12 @@ WholeStageCodegen InputAdapter Exchange [ext_price,brand_id] #1 WholeStageCodegen - HashAggregate [i_brand_id,sum,t_minute,t_hour,sum(UnscaledValue(ext_price)),i_brand] [ext_price,sum,brand,sum(UnscaledValue(ext_price)),brand_id] + HashAggregate [sum(UnscaledValue(ext_price)),sum,t_minute,t_hour,i_brand,i_brand_id] [sum(UnscaledValue(ext_price)),sum,brand,ext_price,brand_id] InputAdapter Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 WholeStageCodegen - HashAggregate [i_brand_id,sum,t_minute,t_hour,sum,ext_price,i_brand] [sum,sum] - Project [ext_price,t_minute,t_hour,i_brand_id,i_brand] + HashAggregate [ext_price,sum,t_minute,t_hour,i_brand,i_brand_id,sum] [sum,sum] + Project [ext_price,t_minute,i_brand_id,i_brand,t_hour] BroadcastHashJoin [time_sk,t_time_sk] Project [i_brand_id,i_brand,ext_price,time_sk] BroadcastHashJoin [i_item_sk,sold_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt index 40feaedbd..0ef6ff570 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt @@ -56,8 +56,8 @@ TakeOrderedAndProject(limit=100, orderBy=[total_cnt#1 DESC NULLS LAST,i_item_des : : : +- *(7) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(8) Project [d_date_sk#14, d_date#15] - : : +- *(8) Filter (isnotnull(d_date_sk#14) && isnotnull(d_date#15)) - : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_date)], ReadSchema: struct + : : +- *(8) Filter (isnotnull(d_date#15) && isnotnull(d_date_sk#14)) + : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(9) Project [p_promo_sk#12] : +- *(9) Filter isnotnull(p_promo_sk#12) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt index f62c47de4..ee1925612 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt @@ -1,33 +1,33 @@ -TakeOrderedAndProject [no_promo,w_warehouse_name,promo,i_item_desc,total_cnt,d_week_seq] +TakeOrderedAndProject [i_item_desc,w_warehouse_name,promo,no_promo,d_week_seq,total_cnt] WholeStageCodegen - HashAggregate [w_warehouse_name,i_item_desc,count,d_week_seq,count(1)] [no_promo,promo,total_cnt,count,count(1)] + HashAggregate [i_item_desc,count(1),w_warehouse_name,d_week_seq,count] [count(1),promo,no_promo,count,total_cnt] InputAdapter Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1 WholeStageCodegen - HashAggregate [w_warehouse_name,i_item_desc,count,count,d_week_seq] [count,count] + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count,count] [count,count] Project [w_warehouse_name,i_item_desc,d_week_seq] BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] - Project [cs_item_sk,w_warehouse_name,d_week_seq,i_item_desc,cs_order_number] + Project [d_week_seq,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_item_sk,w_warehouse_name,d_week_seq,cs_promo_sk,i_item_desc,cs_order_number] + Project [d_week_seq,cs_promo_sk,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] BroadcastHashJoin [cs_ship_date_sk,d_date_sk,d_date,d_date] - Project [cs_item_sk,w_warehouse_name,d_week_seq,cs_promo_sk,cs_ship_date_sk,d_date,i_item_desc,cs_order_number] + Project [d_date,d_week_seq,cs_promo_sk,cs_ship_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] BroadcastHashJoin [d_week_seq,inv_date_sk,d_week_seq,d_date_sk] - Project [cs_item_sk,w_warehouse_name,d_week_seq,cs_promo_sk,cs_ship_date_sk,inv_date_sk,d_date,i_item_desc,cs_order_number] + Project [d_date,d_week_seq,cs_promo_sk,cs_ship_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_item_sk,w_warehouse_name,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,i_item_desc,cs_order_number] + Project [cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc] BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] - Project [cs_item_sk,w_warehouse_name,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,i_item_desc,cs_order_number] + Project [cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc,cs_bill_hdemo_sk] BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_bill_cdemo_sk,cs_item_sk,w_warehouse_name,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,i_item_desc,cs_order_number] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc,cs_bill_hdemo_sk] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_bill_cdemo_sk,cs_item_sk,w_warehouse_name,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,cs_order_number] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,cs_bill_hdemo_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,inv_warehouse_sk,cs_order_number] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,inv_warehouse_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,inv_date_sk,cs_bill_hdemo_sk] BroadcastHashJoin [cs_item_sk,inv_item_sk,inv_quantity_on_hand,cs_quantity] - Project [cs_bill_cdemo_sk,cs_quantity,cs_item_sk,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number] - Filter [cs_quantity,cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_sold_date_sk,cs_bill_cdemo_sk] - Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_quantity,cs_item_sk,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number] [cs_bill_cdemo_sk,cs_quantity,cs_item_sk,cs_promo_sk,cs_bill_hdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number] + Project [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] + Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk] + Scan parquet default.catalog_sales [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen @@ -68,13 +68,13 @@ TakeOrderedAndProject [no_promo,w_warehouse_name,promo,i_item_desc,total_cnt,d_w BroadcastExchange #8 WholeStageCodegen Project [d_date_sk,d_week_seq] - Filter [d_week_seq,d_date_sk] + Filter [d_date_sk,d_week_seq] Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] InputAdapter BroadcastExchange #9 WholeStageCodegen Project [d_date_sk,d_date] - Filter [d_date_sk,d_date] + Filter [d_date,d_date_sk] Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] InputAdapter BroadcastExchange #10 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt index e8e43bd18..f94077a9b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt @@ -3,7 +3,7 @@ WholeStageCodegen InputAdapter Exchange [cnt] #1 WholeStageCodegen - Project [c_last_name,ss_ticket_number,c_preferred_cust_flag,c_first_name,c_salutation,cnt] + Project [ss_ticket_number,c_salutation,cnt,c_last_name,c_preferred_cust_flag,c_first_name] BroadcastHashJoin [ss_customer_sk,c_customer_sk] Filter [cnt] HashAggregate [ss_ticket_number,ss_customer_sk,count,count(1)] [count(1),cnt,count] @@ -17,9 +17,9 @@ WholeStageCodegen BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] + Project [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_hdemo_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -41,6 +41,6 @@ WholeStageCodegen InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] + Project [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] Filter [c_customer_sk] - Scan parquet default.customer [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] [c_last_name,c_preferred_cust_flag,c_first_name,c_customer_sk,c_salutation] + Scan parquet default.customer [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt index 831e86b6b..c3144e1d9 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt @@ -1,22 +1,22 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] WholeStageCodegen Project [customer_id,customer_first_name,customer_last_name] - BroadcastHashJoin [customer_id,year_total,year_total,year_total,year_total,customer_id] - Project [year_total,year_total,customer_id,customer_id,customer_last_name,customer_first_name,year_total] + BroadcastHashJoin [year_total,year_total,year_total,year_total,customer_id,customer_id] + Project [customer_first_name,year_total,year_total,customer_last_name,year_total,customer_id,customer_id] BroadcastHashJoin [customer_id,customer_id] BroadcastHashJoin [customer_id,customer_id] InputAdapter Union WholeStageCodegen Filter [year_total] - HashAggregate [sum,c_last_name,sum(UnscaledValue(ss_net_paid)),c_customer_id,d_year,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] + HashAggregate [d_year,sum(UnscaledValue(ss_net_paid)),c_customer_id,c_last_name,sum,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 WholeStageCodegen - HashAggregate [sum,sum,c_last_name,c_customer_id,d_year,ss_net_paid,c_first_name] [sum,sum] - Project [d_year,c_customer_id,c_last_name,c_first_name,ss_net_paid] + HashAggregate [d_year,sum,ss_net_paid,c_customer_id,c_last_name,sum,c_first_name] [sum,sum] + Project [c_customer_id,d_year,ss_net_paid,c_last_name,c_first_name] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,ss_sold_date_sk,c_last_name,c_first_name,ss_net_paid] + Project [c_customer_id,ss_net_paid,c_last_name,c_first_name,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] Filter [c_customer_sk,c_customer_id] @@ -38,14 +38,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] BroadcastExchange #4 Union WholeStageCodegen - HashAggregate [c_last_name,sum,sum(UnscaledValue(ss_net_paid)),c_customer_id,d_year,c_first_name] [year_total,sum,sum(UnscaledValue(ss_net_paid)),customer_id,customer_last_name,customer_first_name] + HashAggregate [d_year,sum(UnscaledValue(ss_net_paid)),sum,c_customer_id,c_last_name,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_first_name,sum,customer_last_name,year_total,customer_id] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 WholeStageCodegen - HashAggregate [c_last_name,sum,c_customer_id,d_year,ss_net_paid,sum,c_first_name] [sum,sum] - Project [d_year,c_customer_id,c_last_name,c_first_name,ss_net_paid] + HashAggregate [d_year,ss_net_paid,sum,c_customer_id,c_last_name,c_first_name,sum] [sum,sum] + Project [c_customer_id,d_year,ss_net_paid,c_last_name,c_first_name] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,ss_sold_date_sk,c_last_name,c_first_name,ss_net_paid] + Project [c_customer_id,ss_net_paid,c_last_name,c_first_name,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] Filter [c_customer_sk,c_customer_id] @@ -65,14 +65,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [c_last_name,sum,c_customer_id,d_year,sum(UnscaledValue(ws_net_paid)),c_first_name] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + HashAggregate [d_year,sum,sum(UnscaledValue(ws_net_paid)),c_customer_id,c_last_name,c_first_name] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 WholeStageCodegen - HashAggregate [c_last_name,sum,sum,c_customer_id,d_year,c_first_name,ws_net_paid] [sum,sum] - Project [ws_net_paid,d_year,c_customer_id,c_last_name,c_first_name] + HashAggregate [d_year,sum,sum,ws_net_paid,c_customer_id,c_last_name,c_first_name] [sum,sum] + Project [c_customer_id,ws_net_paid,d_year,c_last_name,c_first_name] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_net_paid,c_customer_id,c_last_name,c_first_name,ws_sold_date_sk] + Project [c_customer_id,ws_net_paid,c_last_name,ws_sold_date_sk,c_first_name] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] Filter [c_customer_sk,c_customer_id] @@ -90,14 +90,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [c_last_name,sum,c_customer_id,d_year,sum(UnscaledValue(ws_net_paid)),c_first_name] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + HashAggregate [d_year,sum(UnscaledValue(ws_net_paid)),c_customer_id,c_last_name,c_first_name,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 WholeStageCodegen - HashAggregate [c_last_name,sum,c_customer_id,d_year,sum,c_first_name,ws_net_paid] [sum,sum] - Project [ws_net_paid,d_year,c_customer_id,c_last_name,c_first_name] + HashAggregate [d_year,ws_net_paid,c_customer_id,sum,c_last_name,c_first_name,sum] [sum,sum] + Project [c_customer_id,ws_net_paid,d_year,c_last_name,c_first_name] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_net_paid,c_customer_id,c_last_name,c_first_name,ws_sold_date_sk] + Project [c_customer_id,ws_net_paid,c_last_name,ws_sold_date_sk,c_first_name] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] Filter [c_customer_sk,c_customer_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt index b0e2b8b35..5f9929f2d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt @@ -24,8 +24,8 @@ TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], out : : : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [cd_demo_sk#25, cd_marital_status#19, cd_education_status#20] - : : : : +- *(3) Filter ((isnotnull(cd_demo_sk#25) && isnotnull(cd_marital_status#19)) && isnotnull(cd_education_status#20)) - : : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)], ReadSchema: struct + : : : : +- *(3) Filter ((isnotnull(cd_demo_sk#25) && isnotnull(cd_education_status#20)) && isnotnull(cd_marital_status#19)) + : : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, string, true], input[2, string, true])) : : : +- *(4) Project [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] - : : : +- *(4) Filter ((isnotnull(cd_marital_status#22) && isnotnull(cd_demo_sk#21)) && isnotnull(cd_education_status#23)) - : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk), IsNotNull(cd_education_status)], ReadSchema: struct + : : : +- *(4) Filter ((isnotnull(cd_education_status#23) && isnotnull(cd_marital_status#22)) && isnotnull(cd_demo_sk#21)) + : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [ca_address_sk#15, ca_state#16] : : +- *(5) Filter ((isnotnull(ca_country#33) && (ca_country#33 = United States)) && isnotnull(ca_address_sk#15)) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt index c4cb39c7a..b9fdfc26f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt @@ -1,33 +1,33 @@ -TakeOrderedAndProject [avg(wr_refunded_cash),avg(wr_fee),avg(ws_quantity),substring(r_reason_desc, 1, 20),aggOrder] +TakeOrderedAndProject [avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_desc, 1, 20),avg(wr_refunded_cash)] WholeStageCodegen - HashAggregate [sum,avg(cast(ws_quantity as bigint)),count,r_reason_desc,count,sum,count,avg(UnscaledValue(wr_refunded_cash)),avg(UnscaledValue(wr_fee)),sum] [sum,avg(wr_refunded_cash),avg(cast(ws_quantity as bigint)),avg(wr_fee),count,count,sum,count,avg(UnscaledValue(wr_refunded_cash)),avg(UnscaledValue(wr_fee)),avg(ws_quantity),substring(r_reason_desc, 1, 20),sum,aggOrder] + HashAggregate [r_reason_desc,count,sum,avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),sum,count,count,avg(cast(ws_quantity as bigint)),sum] [count,avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_desc, 1, 20),sum,avg(UnscaledValue(wr_fee)),avg(wr_refunded_cash),avg(UnscaledValue(wr_refunded_cash)),sum,count,count,avg(cast(ws_quantity as bigint)),sum] InputAdapter Exchange [r_reason_desc] #1 WholeStageCodegen - HashAggregate [sum,sum,count,wr_refunded_cash,count,r_reason_desc,wr_fee,count,sum,sum,count,count,sum,sum,count,ws_quantity] [sum,sum,count,count,count,sum,sum,count,count,sum,sum,count] + HashAggregate [r_reason_desc,count,wr_refunded_cash,sum,count,wr_fee,sum,sum,sum,count,count,count,count,sum,ws_quantity,sum] [count,sum,count,sum,sum,sum,count,count,count,count,sum,sum] Project [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] BroadcastHashJoin [wr_reason_sk,r_reason_sk] Project [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [wr_fee,wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk] + Project [wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk,wr_fee] BroadcastHashJoin [wr_refunded_addr_sk,ca_address_sk,ca_state,ws_net_profit] - Project [wr_fee,ws_net_profit,wr_refunded_addr_sk,wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk] - BroadcastHashJoin [cd_marital_status,cd_education_status,cd_marital_status,cd_education_status,cd_demo_sk,wr_returning_cdemo_sk] - Project [wr_fee,ws_net_profit,wr_returning_cdemo_sk,wr_refunded_addr_sk,cd_education_status,wr_reason_sk,cd_marital_status,ws_quantity,wr_refunded_cash,ws_sold_date_sk] - BroadcastHashJoin [wr_refunded_cdemo_sk,cd_education_status,cd_demo_sk,cd_marital_status,ws_sales_price] - Project [wr_fee,ws_net_profit,wr_returning_cdemo_sk,wr_refunded_addr_sk,ws_sales_price,wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk,wr_refunded_cdemo_sk] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,wr_refunded_cash,ws_sold_date_sk,wr_fee] + BroadcastHashJoin [cd_education_status,cd_demo_sk,wr_returning_cdemo_sk,cd_education_status,cd_marital_status,cd_marital_status] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,cd_marital_status,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,cd_education_status] + BroadcastHashJoin [cd_education_status,ws_sales_price,cd_demo_sk,cd_marital_status,wr_refunded_cdemo_sk] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,ws_sales_price] BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Project [wr_fee,ws_net_profit,wr_returning_cdemo_sk,wr_refunded_addr_sk,ws_web_page_sk,ws_sales_price,wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk,wr_refunded_cdemo_sk] + Project [wr_refunded_addr_sk,wr_reason_sk,ws_web_page_sk,ws_quantity,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,ws_sales_price] BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] - Project [ws_net_profit,ws_order_number,ws_web_page_sk,ws_sales_price,ws_quantity,ws_item_sk,ws_sold_date_sk] + Project [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_net_profit,ws_order_number,ws_web_page_sk,ws_sales_price,ws_quantity,ws_item_sk,ws_sold_date_sk] [ws_net_profit,ws_order_number,ws_web_page_sk,ws_sales_price,ws_quantity,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [wr_fee,wr_order_number,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk,wr_item_sk,wr_refunded_cash,wr_refunded_cdemo_sk] - Filter [wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk,wr_item_sk,wr_order_number,wr_returning_cdemo_sk] - Scan parquet default.web_returns [wr_fee,wr_order_number,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk,wr_item_sk,wr_refunded_cash,wr_refunded_cdemo_sk] [wr_fee,wr_order_number,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk,wr_item_sk,wr_refunded_cash,wr_refunded_cdemo_sk] + Project [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] + Filter [wr_item_sk,wr_reason_sk,wr_order_number,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_refunded_cdemo_sk] + Scan parquet default.web_returns [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -38,13 +38,13 @@ TakeOrderedAndProject [avg(wr_refunded_cash),avg(wr_fee),avg(ws_quantity),substr BroadcastExchange #4 WholeStageCodegen Project [cd_demo_sk,cd_marital_status,cd_education_status] - Filter [cd_demo_sk,cd_marital_status,cd_education_status] + Filter [cd_demo_sk,cd_education_status,cd_marital_status] Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [cd_demo_sk,cd_marital_status,cd_education_status] - Filter [cd_marital_status,cd_education_status,cd_demo_sk] + Filter [cd_education_status,cd_demo_sk,cd_marital_status] Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #6 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt index 7ee3cf5f4..71aae564f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [rank_within_parent,i_class,total_sum,lochierarchy,i_category] +TakeOrderedAndProject [i_category,total_sum,rank_within_parent,lochierarchy,i_class] WholeStageCodegen - Project [rank_within_parent,i_class,total_sum,lochierarchy,i_category] + Project [i_category,total_sum,rank_within_parent,lochierarchy,i_class] InputAdapter Window [_w3,_w1,_w2] WholeStageCodegen @@ -8,11 +8,11 @@ TakeOrderedAndProject [rank_within_parent,i_class,total_sum,lochierarchy,i_categ InputAdapter Exchange [_w1,_w2] #1 WholeStageCodegen - HashAggregate [i_class,sum,sum(UnscaledValue(ws_net_paid)),i_category,spark_grouping_id] [sum,sum(UnscaledValue(ws_net_paid)),_w1,_w3,total_sum,lochierarchy,_w2] + HashAggregate [i_category,sum(UnscaledValue(ws_net_paid)),sum,i_class,spark_grouping_id] [_w1,total_sum,sum(UnscaledValue(ws_net_paid)),sum,_w2,lochierarchy,_w3] InputAdapter Exchange [i_category,i_class,spark_grouping_id] #2 WholeStageCodegen - HashAggregate [i_class,sum,i_category,ws_net_paid,spark_grouping_id,sum] [sum,sum] + HashAggregate [i_category,sum,ws_net_paid,i_class,sum,spark_grouping_id] [sum,sum] Expand [ws_net_paid,i_category,i_class] Project [ws_net_paid,i_category,i_class] BroadcastHashJoin [ws_item_sk,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt index 6fc2010b4..c5008bdd8 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt @@ -6,10 +6,10 @@ WholeStageCodegen HashAggregate [count,count] [count,count] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [d_date,c_last_name,d_date,c_last_name,c_first_name,c_first_name] + BroadcastHashJoin [c_first_name,d_date,d_date,c_last_name,c_last_name,c_first_name] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_first_name,d_date,c_last_name,d_date,c_first_name,c_last_name] + BroadcastHashJoin [d_date,c_last_name,c_first_name,c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] InputAdapter Exchange [c_last_name,c_first_name,d_date] #2 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt index 98d2e6837..31fd0675f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt @@ -1,24 +1,24 @@ -TakeOrderedAndProject [avg_monthly_sales,d_moy,sum_sales,s_store_name,s_company_name,i_category,i_brand,i_class] +TakeOrderedAndProject [s_company_name,d_moy,sum_sales,i_brand,i_category,avg_monthly_sales,s_store_name,i_class] WholeStageCodegen - Project [sum_sales,s_store_name,d_moy,s_company_name,i_category,avg_monthly_sales,i_brand,i_class] + Project [s_company_name,d_moy,sum_sales,i_brand,avg_monthly_sales,i_category,i_class,s_store_name] Filter [avg_monthly_sales,sum_sales] InputAdapter - Window [s_store_name,_w0,s_company_name,i_category,i_brand] + Window [s_company_name,i_brand,i_category,s_store_name,_w0] WholeStageCodegen Sort [i_category,i_brand,s_store_name,s_company_name] InputAdapter Exchange [i_category,i_brand,s_store_name,s_company_name] #1 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ss_sales_price)),s_store_name,d_moy,s_company_name,i_category,i_brand,i_class] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + HashAggregate [s_company_name,d_moy,i_brand,sum(UnscaledValue(ss_sales_price)),i_category,sum,i_class,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] InputAdapter - Exchange [s_store_name,d_moy,s_company_name,i_category,i_brand,i_class] #2 + Exchange [s_company_name,d_moy,i_brand,i_category,i_class,s_store_name] #2 WholeStageCodegen - HashAggregate [sum,sum,s_store_name,d_moy,ss_sales_price,s_company_name,i_category,i_brand,i_class] [sum,sum] - Project [ss_sales_price,i_brand,i_category,s_company_name,s_store_name,i_class,d_moy] + HashAggregate [s_company_name,d_moy,i_brand,ss_sales_price,i_category,sum,sum,i_class,s_store_name] [sum,sum] + Project [i_class,s_store_name,s_company_name,d_moy,i_category,ss_sales_price,i_brand] BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_sales_price,i_brand,i_category,ss_store_sk,i_class,d_moy] + Project [i_class,d_moy,ss_store_sk,i_category,ss_sales_price,i_brand] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sales_price,i_brand,i_category,ss_sold_date_sk,ss_store_sk,i_class] + Project [i_class,ss_store_sk,i_category,ss_sales_price,ss_sold_date_sk,i_brand] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_brand,i_class,i_category] Filter [i_category,i_class,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt index f0259e956..0ed6b1727 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt @@ -1,104 +1,104 @@ == Physical Plan == -*(1) Project [CASE WHEN (Subquery subquery3390 > 62316685) THEN Subquery subquery3391 ELSE Subquery subquery3392 END AS bucket1#1, CASE WHEN (Subquery subquery3394 > 19045798) THEN Subquery subquery3395 ELSE Subquery subquery3396 END AS bucket2#2, CASE WHEN (Subquery subquery3398 > 365541424) THEN Subquery subquery3399 ELSE Subquery subquery3400 END AS bucket3#3, CASE WHEN (Subquery subquery3402 > 216357808) THEN Subquery subquery3403 ELSE Subquery subquery3404 END AS bucket4#4, CASE WHEN (Subquery subquery3406 > 184483884) THEN Subquery subquery3407 ELSE Subquery subquery3408 END AS bucket5#5] -: :- Subquery subquery3390 +*(1) Project [CASE WHEN (Subquery subquery1150 > 62316685) THEN Subquery subquery1151 ELSE Subquery subquery1152 END AS bucket1#1, CASE WHEN (Subquery subquery1154 > 19045798) THEN Subquery subquery1155 ELSE Subquery subquery1156 END AS bucket2#2, CASE WHEN (Subquery subquery1158 > 365541424) THEN Subquery subquery1159 ELSE Subquery subquery1160 END AS bucket3#3, CASE WHEN (Subquery subquery1162 > 216357808) THEN Subquery subquery1163 ELSE Subquery subquery1164 END AS bucket4#4, CASE WHEN (Subquery subquery1166 > 184483884) THEN Subquery subquery1167 ELSE Subquery subquery1168 END AS bucket5#5] +: :- Subquery subquery1150 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery3391 +: :- Subquery subquery1151 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery3392 +: :- Subquery subquery1152 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery3394 +: :- Subquery subquery1154 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery3395 +: :- Subquery subquery1155 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery3396 +: :- Subquery subquery1156 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery3398 +: :- Subquery subquery1158 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery3399 +: :- Subquery subquery1159 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery3400 +: :- Subquery subquery1160 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery3402 +: :- Subquery subquery1162 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery3403 +: :- Subquery subquery1163 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery3404 +: :- Subquery subquery1164 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery3406 +: :- Subquery subquery1166 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct -: :- Subquery subquery3407 +: :- Subquery subquery1167 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) : : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct -: +- Subquery subquery3408 +: +- Subquery subquery1168 : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : +- Exchange SinglePartition : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt index c55f7a6a8..1851df752 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt @@ -16,7 +16,7 @@ WholeStageCodegen InputAdapter Exchange #2 WholeStageCodegen - HashAggregate [count,sum,count,sum,ss_ext_discount_amt] [sum,count,sum,count] + HashAggregate [sum,ss_ext_discount_amt,sum,count,count] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] @@ -26,7 +26,7 @@ WholeStageCodegen InputAdapter Exchange #3 WholeStageCodegen - HashAggregate [count,sum,ss_net_paid,sum,count] [sum,count,sum,count] + HashAggregate [sum,count,ss_net_paid,sum,count] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] @@ -46,7 +46,7 @@ WholeStageCodegen InputAdapter Exchange #5 WholeStageCodegen - HashAggregate [sum,count,count,ss_ext_discount_amt,sum] [sum,count,sum,count] + HashAggregate [count,sum,count,ss_ext_discount_amt,sum] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] @@ -56,7 +56,7 @@ WholeStageCodegen InputAdapter Exchange #6 WholeStageCodegen - HashAggregate [count,sum,sum,ss_net_paid,count] [sum,count,sum,count] + HashAggregate [sum,sum,ss_net_paid,count,count] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] @@ -76,7 +76,7 @@ WholeStageCodegen InputAdapter Exchange #8 WholeStageCodegen - HashAggregate [sum,count,count,sum,ss_ext_discount_amt] [sum,count,sum,count] + HashAggregate [sum,ss_ext_discount_amt,count,count,sum] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] @@ -86,7 +86,7 @@ WholeStageCodegen InputAdapter Exchange #9 WholeStageCodegen - HashAggregate [count,sum,sum,ss_net_paid,count] [sum,count,sum,count] + HashAggregate [sum,count,count,sum,ss_net_paid] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] @@ -106,7 +106,7 @@ WholeStageCodegen InputAdapter Exchange #11 WholeStageCodegen - HashAggregate [count,sum,count,ss_ext_discount_amt,sum] [sum,count,sum,count] + HashAggregate [ss_ext_discount_amt,sum,count,count,sum] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] @@ -116,7 +116,7 @@ WholeStageCodegen InputAdapter Exchange #12 WholeStageCodegen - HashAggregate [count,sum,count,ss_net_paid,sum] [sum,count,sum,count] + HashAggregate [count,sum,ss_net_paid,count,sum] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] @@ -136,7 +136,7 @@ WholeStageCodegen InputAdapter Exchange #14 WholeStageCodegen - HashAggregate [count,count,sum,sum,ss_ext_discount_amt] [sum,count,sum,count] + HashAggregate [sum,ss_ext_discount_amt,sum,count,count] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] @@ -146,7 +146,7 @@ WholeStageCodegen InputAdapter Exchange #15 WholeStageCodegen - HashAggregate [sum,count,ss_net_paid,count,sum] [sum,count,sum,count] + HashAggregate [count,ss_net_paid,sum,count,sum] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt index 06086853c..4b2c97c9f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt @@ -3,22 +3,22 @@ WholeStageCodegen InputAdapter Exchange [Returns_Loss] #1 WholeStageCodegen - HashAggregate [cc_name,sum(UnscaledValue(cr_net_loss)),cc_manager,cd_education_status,cd_marital_status,sum,cc_call_center_id] [Manager,sum(UnscaledValue(cr_net_loss)),Call_Center_Name,Returns_Loss,sum,Call_Center] + HashAggregate [cc_call_center_id,sum(UnscaledValue(cr_net_loss)),sum,cc_name,cd_education_status,cc_manager,cd_marital_status] [sum(UnscaledValue(cr_net_loss)),sum,Manager,Call_Center_Name,Returns_Loss,Call_Center] InputAdapter - Exchange [cc_name,cc_manager,cd_education_status,cd_marital_status,cc_call_center_id] #2 + Exchange [cc_call_center_id,cc_name,cd_education_status,cc_manager,cd_marital_status] #2 WholeStageCodegen - HashAggregate [cc_name,cc_manager,cd_education_status,cd_marital_status,cr_net_loss,sum,cc_call_center_id,sum] [sum,sum] - Project [cr_net_loss,cc_call_center_id,cc_manager,cd_education_status,cc_name,cd_marital_status] + HashAggregate [cc_call_center_id,sum,cc_name,cd_education_status,cc_manager,cr_net_loss,sum,cd_marital_status] [sum,sum] + Project [cr_net_loss,cd_marital_status,cc_call_center_id,cc_name,cc_manager,cd_education_status] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [cr_net_loss,cc_call_center_id,cc_manager,cd_education_status,cc_name,c_current_hdemo_sk,cd_marital_status] + Project [cr_net_loss,c_current_hdemo_sk,cd_marital_status,cc_call_center_id,cc_name,cc_manager,cd_education_status] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [cr_net_loss,cc_call_center_id,cc_manager,c_current_cdemo_sk,cc_name,c_current_hdemo_sk] + Project [c_current_cdemo_sk,cr_net_loss,c_current_hdemo_sk,cc_call_center_id,cc_name,cc_manager] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [cr_net_loss,cc_call_center_id,cc_manager,c_current_cdemo_sk,cc_name,c_current_hdemo_sk,c_current_addr_sk] + Project [c_current_cdemo_sk,c_current_addr_sk,cr_net_loss,c_current_hdemo_sk,cc_call_center_id,cc_name,cc_manager] BroadcastHashJoin [cr_returning_customer_sk,c_customer_sk] - Project [cr_net_loss,cc_call_center_id,cc_manager,cr_returning_customer_sk,cc_name] + Project [cr_returning_customer_sk,cr_net_loss,cc_call_center_id,cc_name,cc_manager] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_net_loss,cc_call_center_id,cc_manager,cr_returning_customer_sk,cc_name,cr_returned_date_sk] + Project [cr_returning_customer_sk,cr_net_loss,cc_call_center_id,cr_returned_date_sk,cc_name,cc_manager] BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] Project [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] Filter [cc_call_center_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt index 09820658d..6137607f6 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt @@ -24,11 +24,11 @@ TakeOrderedAndProject [Excess Discount Amount ] BroadcastExchange #3 WholeStageCodegen Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [ws_item_sk,sum,count,avg(UnscaledValue(ws_ext_discount_amt))] [sum,ws_item_sk,count,avg(UnscaledValue(ws_ext_discount_amt)),(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] + HashAggregate [ws_item_sk,sum,count,avg(UnscaledValue(ws_ext_discount_amt))] [sum,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(ws_ext_discount_amt)),count,ws_item_sk] InputAdapter Exchange [ws_item_sk] #4 WholeStageCodegen - HashAggregate [sum,ws_ext_discount_amt,count,sum,ws_item_sk,count] [sum,count,sum,count] + HashAggregate [ws_ext_discount_amt,ws_item_sk,sum,sum,count,count] [sum,count,sum,count] Project [ws_item_sk,ws_ext_discount_amt] BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Project [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt index 4fe1e3a08..5c7f07a1b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt @@ -7,9 +7,9 @@ TakeOrderedAndProject [sumsales,ss_customer_sk] HashAggregate [ss_customer_sk,act_sales,sum,sum] [sum,sum] Project [ss_customer_sk,sr_return_quantity,ss_quantity,ss_sales_price] BroadcastHashJoin [sr_reason_sk,r_reason_sk] - Project [ss_sales_price,ss_customer_sk,ss_quantity,sr_return_quantity,sr_reason_sk] + Project [ss_quantity,sr_return_quantity,ss_customer_sk,sr_reason_sk,ss_sales_price] BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Scan parquet default.store_sales [ss_sales_price,ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity] [ss_sales_price,ss_customer_sk,ss_ticket_number,ss_item_sk,ss_quantity] + Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_ticket_number] [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt index 02cf6314c..cc709ac3c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -1,27 +1,27 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),count(ws_order_number),count,sum(UnscaledValue(ws_net_profit))] [order count ,total shipping cost ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),total net profit ,count(ws_order_number),count,sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum] [sum,sum(UnscaledValue(ws_net_profit)),order count ,total net profit ,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum,total shipping cost ] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [ws_order_number,sum,sum,count,sum(UnscaledValue(ws_ext_ship_cost)),count(ws_order_number),sum,sum,sum(UnscaledValue(ws_net_profit))] [sum,sum,count,sum(UnscaledValue(ws_ext_ship_cost)),count(ws_order_number),count,sum,sum,sum(UnscaledValue(ws_net_profit))] - HashAggregate [ws_order_number,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,count,sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,sum] [sum,count,sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),sum,count,sum] + HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,sum] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),sum,sum] InputAdapter Exchange [ws_order_number] #2 WholeStageCodegen - HashAggregate [ws_order_number,ws_ext_ship_cost,sum(UnscaledValue(ws_ext_ship_cost)),ws_net_profit,sum,sum,sum(UnscaledValue(ws_net_profit))] [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,ws_net_profit,ws_ext_ship_cost] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),sum,sum] Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] BroadcastHashJoin [ws_web_site_sk,web_site_sk] Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] - Project [ws_net_profit,ws_ext_ship_cost,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number] BroadcastHashJoin [ws_ship_date_sk,d_date_sk] BroadcastHashJoin [ws_order_number,wr_order_number] - Project [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Project [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number,ws_warehouse_sk] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number,ws_warehouse_sk] [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt index 03adb7e35..5b6f33ebd 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -1,26 +1,26 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] WholeStageCodegen - HashAggregate [sum,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum,sum(UnscaledValue(ws_net_profit))] [sum,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),total shipping cost ,order count ,count,total net profit ,sum,sum(UnscaledValue(ws_net_profit))] + HashAggregate [count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] [order count ,total shipping cost ,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,total net profit ,sum(UnscaledValue(ws_ext_ship_cost)),sum] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum,ws_order_number,sum,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum,sum,sum(UnscaledValue(ws_net_profit))] [sum,sum,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,count,sum,sum,sum(UnscaledValue(ws_net_profit))] - HashAggregate [sum,ws_order_number,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),sum,ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),sum] [sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] + HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),sum] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] InputAdapter Exchange [ws_order_number] #2 WholeStageCodegen - HashAggregate [ws_order_number,sum,sum(UnscaledValue(ws_ext_ship_cost)),ws_ext_ship_cost,sum,ws_net_profit,sum(UnscaledValue(ws_net_profit))] [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum,sum,sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),ws_net_profit,ws_ext_ship_cost] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] BroadcastHashJoin [ws_web_site_sk,web_site_sk] Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] - Project [ws_net_profit,ws_ext_ship_cost,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number] BroadcastHashJoin [ws_ship_date_sk,d_date_sk] BroadcastHashJoin [ws_order_number,wr_order_number] BroadcastHashJoin [ws_order_number,ws_order_number] - Project [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] + Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] [ws_net_profit,ws_ext_ship_cost,ws_ship_date_sk,ws_web_site_sk,ws_ship_addr_sk,ws_order_number] + Scan parquet default.web_sales [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt index b2bfbd555..a3cc25d23 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt @@ -1,10 +1,10 @@ CollectLimit WholeStageCodegen - HashAggregate [sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] [store_and_catalog,sum,store_only,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),catalog_only,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] + HashAggregate [sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] [store_only,sum,catalog_only,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_and_catalog] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum,sum,sum,customer_sk,customer_sk,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + HashAggregate [sum,sum,sum,customer_sk,sum,customer_sk,sum,sum] [sum,sum,sum,sum,sum,sum] Project [customer_sk,customer_sk] InputAdapter SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt index b6a936a1c..be26ab7ec 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt @@ -1,10 +1,10 @@ WholeStageCodegen - Project [i_current_price,itemrevenue,revenueratio,i_item_desc,i_category,i_class] - Sort [revenueratio,i_item_desc,i_item_id,i_category,i_class] + Project [revenueratio,i_item_desc,itemrevenue,i_category,i_current_price,i_class] + Sort [revenueratio,i_item_id,i_item_desc,i_category,i_class] InputAdapter - Exchange [revenueratio,i_item_desc,i_item_id,i_category,i_class] #1 + Exchange [revenueratio,i_item_id,i_item_desc,i_category,i_class] #1 WholeStageCodegen - Project [i_current_price,itemrevenue,i_item_desc,_w0,i_item_id,i_category,_we0,i_class] + Project [i_item_id,i_item_desc,_w0,itemrevenue,i_category,_we0,i_current_price,i_class] InputAdapter Window [_w1,i_class] WholeStageCodegen @@ -12,14 +12,14 @@ WholeStageCodegen InputAdapter Exchange [i_class] #2 WholeStageCodegen - HashAggregate [i_current_price,sum(UnscaledValue(ss_ext_sales_price)),i_item_desc,i_item_id,i_category,sum,i_class] [itemrevenue,_w1,sum(UnscaledValue(ss_ext_sales_price)),_w0,sum] + HashAggregate [i_item_id,i_item_desc,sum,sum(UnscaledValue(ss_ext_sales_price)),i_category,i_current_price,i_class] [sum,sum(UnscaledValue(ss_ext_sales_price)),_w0,itemrevenue,_w1] InputAdapter - Exchange [i_current_price,i_item_desc,i_item_id,i_category,i_class] #3 + Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #3 WholeStageCodegen - HashAggregate [i_current_price,sum,i_item_desc,ss_ext_sales_price,i_item_id,i_category,sum,i_class] [sum,sum] - Project [i_current_price,i_category,i_item_id,ss_ext_sales_price,i_class,i_item_desc] + HashAggregate [i_item_id,i_item_desc,sum,i_category,ss_ext_sales_price,sum,i_current_price,i_class] [sum,sum] + Project [i_class,i_current_price,ss_ext_sales_price,i_category,i_item_desc,i_item_id] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_current_price,i_category,ss_sold_date_sk,i_item_id,ss_ext_sales_price,i_class,i_item_desc] + Project [i_class,i_current_price,ss_ext_sales_price,i_category,ss_sold_date_sk,i_item_desc,i_item_id] BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] Filter [ss_item_sk,ss_sold_date_sk] @@ -27,9 +27,9 @@ WholeStageCodegen InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] + Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] Filter [i_category,i_item_sk] - Scan parquet default.item [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] [i_current_price,i_category,i_item_sk,i_item_id,i_class,i_item_desc] + Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] InputAdapter BroadcastExchange #5 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt index 9b05c64d9..934bf3e47 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt @@ -1,21 +1,21 @@ -TakeOrderedAndProject [30 days ,cc_name,91 - 120 days ,61 - 90 days ,substring(w_warehouse_name, 1, 20),31 - 60 days ,sm_type,>120 days ] +TakeOrderedAndProject [substring(w_warehouse_name, 1, 20),61 - 90 days ,cc_name,>120 days ,91 - 120 days ,30 days ,sm_type,31 - 60 days ] WholeStageCodegen - HashAggregate [sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),cc_name,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sm_type] [sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),30 days ,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum,sum,91 - 120 days ,sum,sum,sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,substring(w_warehouse_name, 1, 20),31 - 60 days ,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),>120 days ] + HashAggregate [sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,cc_name,sum,sm_type,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum] [sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum,>120 days ,sum,91 - 120 days ,30 days ,31 - 60 days ,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum] InputAdapter Exchange [substring(w_warehouse_name, 1, 20),sm_type,cc_name] #1 WholeStageCodegen - HashAggregate [cc_name,sum,sum,cs_ship_date_sk,w_warehouse_name,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,cs_sold_date_sk,sum,sum,sm_type] [sum,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum] - Project [w_warehouse_name,cs_ship_date_sk,sm_type,cs_sold_date_sk,cc_name] + HashAggregate [sum,sum,sum,substring(w_warehouse_name, 1, 20),w_warehouse_name,sum,sum,cs_sold_date_sk,sum,cc_name,sum,sum,sm_type,cs_ship_date_sk,sum,sum] [sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum] + Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cc_name] BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - Project [w_warehouse_name,cs_ship_date_sk,sm_type,cs_sold_date_sk,cc_name] + Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cc_name] BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] - Project [cs_call_center_sk,w_warehouse_name,cs_ship_date_sk,sm_type,cs_sold_date_sk] + Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cs_call_center_sk] BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] - Project [cs_call_center_sk,cs_ship_mode_sk,w_warehouse_name,cs_ship_date_sk,cs_sold_date_sk] + Project [cs_ship_date_sk,cs_sold_date_sk,cs_ship_mode_sk,w_warehouse_name,cs_call_center_sk] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_ship_date_sk,cs_sold_date_sk] + Project [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk] - Scan parquet default.catalog_sales [cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_ship_date_sk,cs_sold_date_sk] [cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_ship_date_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen From 97e84410270acfcd216b0fccd4464cf2bcfcecba Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 10 Mar 2021 16:48:03 -0800 Subject: [PATCH 09/31] added sorting for fixing build pipeline --- .../microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala index 659b5debb..08b18aaeb 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala @@ -198,7 +198,7 @@ trait PlanStabilitySuite extends TPCDSBase with Logging { * "sum(sr_return_amt#14)", so we remove all of these using regex */ def cleanUpReferences(references: AttributeSet): String = { - referenceRegex.replaceAllIn(references.map(_.name).mkString(","), "") + referenceRegex.replaceAllIn(references.toSeq.map(_.name).sorted.mkString(","), "") } /** From 411450f8b97f441c09397d850e5981e5ad657b60 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 10 Mar 2021 17:42:02 -0800 Subject: [PATCH 10/31] udpated plans with sorting --- .../approved-plans-v1_4/q1/simplified.txt | 42 ++-- .../approved-plans-v1_4/q10/simplified.txt | 42 ++-- .../approved-plans-v1_4/q11/simplified.txt | 94 +++---- .../approved-plans-v1_4/q12/simplified.txt | 28 +-- .../approved-plans-v1_4/q13/simplified.txt | 40 +-- .../approved-plans-v1_4/q14a/simplified.txt | 232 +++++++++--------- .../approved-plans-v1_4/q14b/simplified.txt | 196 +++++++-------- .../approved-plans-v1_4/q15/explain.txt | 2 +- .../approved-plans-v1_4/q15/simplified.txt | 26 +- .../approved-plans-v1_4/q16/simplified.txt | 40 +-- .../approved-plans-v1_4/q17/simplified.txt | 64 ++--- .../approved-plans-v1_4/q18/simplified.txt | 54 ++-- .../approved-plans-v1_4/q19/simplified.txt | 48 ++-- .../approved-plans-v1_4/q2/simplified.txt | 34 +-- .../approved-plans-v1_4/q20/simplified.txt | 24 +- .../approved-plans-v1_4/q21/simplified.txt | 36 +-- .../approved-plans-v1_4/q22/simplified.txt | 30 +-- .../approved-plans-v1_4/q23a/simplified.txt | 92 +++---- .../approved-plans-v1_4/q23b/simplified.txt | 108 ++++---- .../approved-plans-v1_4/q24a/simplified.txt | 116 ++++----- .../approved-plans-v1_4/q24b/simplified.txt | 116 ++++----- .../approved-plans-v1_4/q25/simplified.txt | 70 +++--- .../approved-plans-v1_4/q26/simplified.txt | 34 +-- .../approved-plans-v1_4/q27/simplified.txt | 44 ++-- .../approved-plans-v1_4/q28/simplified.txt | 72 +++--- .../approved-plans-v1_4/q29/simplified.txt | 68 ++--- .../approved-plans-v1_4/q3/simplified.txt | 28 +-- .../approved-plans-v1_4/q30/simplified.txt | 60 ++--- .../approved-plans-v1_4/q31/simplified.txt | 142 +++++------ .../approved-plans-v1_4/q32/simplified.txt | 26 +- .../approved-plans-v1_4/q33/explain.txt | 8 +- .../approved-plans-v1_4/q33/simplified.txt | 70 +++--- .../approved-plans-v1_4/q34/simplified.txt | 40 +-- .../approved-plans-v1_4/q35/simplified.txt | 40 +-- .../approved-plans-v1_4/q36/simplified.txt | 38 +-- .../approved-plans-v1_4/q37/simplified.txt | 28 +-- .../approved-plans-v1_4/q38/simplified.txt | 80 +++--- .../approved-plans-v1_4/q39a/simplified.txt | 68 ++--- .../approved-plans-v1_4/q39b/simplified.txt | 68 ++--- .../approved-plans-v1_4/q4/simplified.txt | 144 +++++------ .../approved-plans-v1_4/q40/simplified.txt | 38 +-- .../approved-plans-v1_4/q41/simplified.txt | 12 +- .../approved-plans-v1_4/q42/simplified.txt | 30 +-- .../approved-plans-v1_4/q43/simplified.txt | 26 +- .../approved-plans-v1_4/q44/simplified.txt | 46 ++-- .../approved-plans-v1_4/q45/simplified.txt | 46 ++-- .../approved-plans-v1_4/q46/simplified.txt | 50 ++-- .../approved-plans-v1_4/q47/simplified.txt | 80 +++--- .../approved-plans-v1_4/q48/simplified.txt | 32 +-- .../approved-plans-v1_4/q49/simplified.txt | 94 +++---- .../approved-plans-v1_4/q5/simplified.txt | 92 +++---- .../approved-plans-v1_4/q50/simplified.txt | 44 ++-- .../approved-plans-v1_4/q51/simplified.txt | 68 ++--- .../approved-plans-v1_4/q52/simplified.txt | 28 +-- .../approved-plans-v1_4/q53/simplified.txt | 30 +-- .../approved-plans-v1_4/q54/simplified.txt | 82 +++---- .../approved-plans-v1_4/q55/simplified.txt | 28 +-- .../approved-plans-v1_4/q56/explain.txt | 8 +- .../approved-plans-v1_4/q56/simplified.txt | 80 +++--- .../approved-plans-v1_4/q57/simplified.txt | 76 +++--- .../approved-plans-v1_4/q58/simplified.txt | 72 +++--- .../approved-plans-v1_4/q59/simplified.txt | 50 ++-- .../approved-plans-v1_4/q6/simplified.txt | 48 ++-- .../approved-plans-v1_4/q60/explain.txt | 8 +- .../approved-plans-v1_4/q60/simplified.txt | 78 +++--- .../approved-plans-v1_4/q61/simplified.txt | 74 +++--- .../approved-plans-v1_4/q62/simplified.txt | 40 +-- .../approved-plans-v1_4/q63/simplified.txt | 30 +-- .../approved-plans-v1_4/q64/simplified.txt | 192 +++++++-------- .../approved-plans-v1_4/q65/simplified.txt | 56 ++--- .../approved-plans-v1_4/q66/simplified.txt | 74 +++--- .../approved-plans-v1_4/q67/simplified.txt | 44 ++-- .../approved-plans-v1_4/q68/simplified.txt | 52 ++-- .../approved-plans-v1_4/q69/simplified.txt | 42 ++-- .../approved-plans-v1_4/q7/simplified.txt | 40 +-- .../approved-plans-v1_4/q70/simplified.txt | 54 ++-- .../approved-plans-v1_4/q71/simplified.txt | 50 ++-- .../approved-plans-v1_4/q72/simplified.txt | 72 +++--- .../approved-plans-v1_4/q73/simplified.txt | 36 +-- .../approved-plans-v1_4/q74/simplified.txt | 90 +++---- .../approved-plans-v1_4/q75/simplified.txt | 170 ++++++------- .../approved-plans-v1_4/q76/simplified.txt | 62 ++--- .../approved-plans-v1_4/q77/simplified.txt | 100 ++++---- .../approved-plans-v1_4/q78/simplified.txt | 76 +++--- .../approved-plans-v1_4/q79/simplified.txt | 40 +-- .../approved-plans-v1_4/q8/explain.txt | 6 +- .../approved-plans-v1_4/q8/simplified.txt | 32 +-- .../approved-plans-v1_4/q80/simplified.txt | 122 ++++----- .../approved-plans-v1_4/q81/simplified.txt | 60 ++--- .../approved-plans-v1_4/q82/simplified.txt | 26 +- .../approved-plans-v1_4/q83/simplified.txt | 68 ++--- .../approved-plans-v1_4/q84/simplified.txt | 16 +- .../approved-plans-v1_4/q85/simplified.txt | 66 ++--- .../approved-plans-v1_4/q86/simplified.txt | 30 +-- .../approved-plans-v1_4/q87/simplified.txt | 80 +++--- .../approved-plans-v1_4/q88/simplified.txt | 100 ++++---- .../approved-plans-v1_4/q89/simplified.txt | 42 ++-- .../approved-plans-v1_4/q9/simplified.txt | 70 +++--- .../approved-plans-v1_4/q90/simplified.txt | 32 +-- .../approved-plans-v1_4/q91/simplified.txt | 48 ++-- .../approved-plans-v1_4/q92/simplified.txt | 38 +-- .../approved-plans-v1_4/q93/simplified.txt | 24 +- .../approved-plans-v1_4/q94/simplified.txt | 42 ++-- .../approved-plans-v1_4/q95/simplified.txt | 50 ++-- .../approved-plans-v1_4/q96/simplified.txt | 16 +- .../approved-plans-v1_4/q97/simplified.txt | 20 +- .../approved-plans-v1_4/q98/simplified.txt | 32 +-- .../approved-plans-v1_4/q99/simplified.txt | 30 +-- 108 files changed, 3141 insertions(+), 3141 deletions(-) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt index 6104e2ac9..4f838850c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt @@ -1,47 +1,47 @@ TakeOrderedAndProject [c_customer_id] WholeStageCodegen Project [c_customer_id] - BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + BroadcastHashJoin [c_customer_sk,ctr_customer_sk] Project [ctr_customer_sk] BroadcastHashJoin [ctr_store_sk,s_store_sk] Project [ctr_customer_sk,ctr_store_sk] - BroadcastHashJoin [ctr_store_sk,ctr_store_skL,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_store_sk,ctr_store_skL,ctr_total_return] Filter [ctr_total_return] - HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_customer_sk,ctr_total_return,ctr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] + HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_customer_sk,ctr_store_sk,ctr_total_return,sum,sum(UnscaledValue(sr_return_amt))] InputAdapter Exchange [sr_customer_sk,sr_store_sk] #1 WholeStageCodegen - HashAggregate [sr_customer_sk,sum,sr_return_amt,sum,sr_store_sk] [sum,sum] - Project [sr_customer_sk,sr_store_sk,sr_return_amt] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] - Filter [sr_returned_date_sk,sr_store_sk,sr_customer_sk] - Scan parquet default.store_returns [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + HashAggregate [sr_customer_sk,sr_return_amt,sr_store_sk,sum,sum] [sum,sum] + Project [sr_customer_sk,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] + Filter [sr_customer_sk,sr_returned_date_sk,sr_store_sk] + Scan parquet default.store_returns [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #3 WholeStageCodegen Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [ctr_store_sk,sum,count,avg(ctr_total_return)] [avg(ctr_total_return),ctr_store_skL,sum,count,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + HashAggregate [avg(ctr_total_return),count,ctr_store_sk,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_store_skL,sum] InputAdapter Exchange [ctr_store_sk] #4 WholeStageCodegen - HashAggregate [ctr_store_sk,ctr_total_return,sum,count,sum,count] [sum,count,sum,count] - HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [sum(UnscaledValue(sr_return_amt)),ctr_store_sk,ctr_total_return,sum] + HashAggregate [count,count,ctr_store_sk,ctr_total_return,sum,sum] [count,count,sum,sum] + HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_store_sk,ctr_total_return,sum,sum(UnscaledValue(sr_return_amt))] InputAdapter Exchange [sr_customer_sk,sr_store_sk] #5 WholeStageCodegen - HashAggregate [sum,sr_customer_sk,sum,sr_return_amt,sr_store_sk] [sum,sum] - Project [sr_customer_sk,sr_store_sk,sr_return_amt] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + HashAggregate [sr_customer_sk,sr_return_amt,sr_store_sk,sum,sum] [sum,sum] + Project [sr_customer_sk,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] Filter [sr_returned_date_sk,sr_store_sk] - Scan parquet default.store_returns [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + Scan parquet default.store_returns [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #2 InputAdapter @@ -49,10 +49,10 @@ TakeOrderedAndProject [c_customer_id] WholeStageCodegen Project [s_store_sk] Filter [s_state,s_store_sk] - Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [c_customer_sk,c_customer_id] + Project [c_customer_id,c_customer_sk] Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk,c_customer_id] [c_customer_sk,c_customer_id] + Scan parquet default.customer [c_customer_id,c_customer_sk] [c_customer_id,c_customer_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt index 876a5eab0..bba77af4d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt @@ -1,44 +1,44 @@ -TakeOrderedAndProject [cnt5,cnt6,cnt1,cnt4,cd_purchase_estimate,cnt2,cnt3,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] +TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] WholeStageCodegen - HashAggregate [count,count(1),cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] [cnt5,cnt6,cnt1,count,cnt4,count(1),cnt2,cnt3] + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count,count(1)] [cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count,count(1)] InputAdapter - Exchange [cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] #1 + Exchange [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 WholeStageCodegen - HashAggregate [count,cd_purchase_estimate,count,cd_education_status,cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] [count,count] - Project [cd_dep_college_count,cd_dep_employed_count,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count,count] [count,count] + Project [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_cdemo_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] Filter [exists,exists] BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] Filter [c_current_addr_sk,c_current_cdemo_sk] - Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_sold_date_sk] Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] [ss_sold_date_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [ws_bill_customer_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_sold_date_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter @@ -46,20 +46,20 @@ TakeOrderedAndProject [cnt5,cnt6,cnt1,cnt4,cd_purchase_estimate,cnt2,cnt3,cd_edu WholeStageCodegen Project [cs_ship_customer_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ship_customer_sk] + Project [cs_ship_customer_sk,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_customer_sk] [cs_sold_date_sk,cs_ship_customer_sk] + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] [cs_ship_customer_sk,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter BroadcastExchange #6 WholeStageCodegen Project [ca_address_sk] - Filter [ca_county,ca_address_sk] + Filter [ca_address_sk,ca_county] Scan parquet default.customer_address [ca_address_sk,ca_county] [ca_address_sk,ca_county] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + Project [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt index 75734357c..b2c7f30e5 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt @@ -1,63 +1,63 @@ TakeOrderedAndProject [customer_preferred_cust_flag] WholeStageCodegen Project [customer_preferred_cust_flag] - BroadcastHashJoin [year_total,year_total,customer_id,year_total,year_total,customer_id] - Project [customer_preferred_cust_flag,year_total,year_total,customer_id,year_total] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_id,customer_preferred_cust_flag,year_total,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] - Project [customer_id,year_total,customer_preferred_cust_flag,year_total] + Project [customer_id,customer_preferred_cust_flag,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] InputAdapter Union WholeStageCodegen Filter [year_total] - HashAggregate [d_year,sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_first_name,c_email_address] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2))))] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),year_total] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #1 + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #1 WholeStageCodegen - HashAggregate [d_year,sum,ss_ext_discount_amt,c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,c_last_name,c_first_name,c_birth_country,ss_ext_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,sum,sum] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] + Project [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] + Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk,d_year] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] LocalTableScan [customer_id,year_total] [customer_id,year_total] InputAdapter BroadcastExchange #4 Union WholeStageCodegen - HashAggregate [d_year,sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_first_name,c_email_address] [customer_preferred_cust_flag,sum,year_total,customer_id,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2))))] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2))))] [customer_id,customer_preferred_cust_flag,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),year_total] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #5 + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #5 WholeStageCodegen - HashAggregate [d_year,sum,ss_ext_discount_amt,c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,c_last_name,c_first_name,c_birth_country,ss_ext_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,sum,sum] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] InputAdapter - ReusedExchange [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] #2 + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_date_sk,d_year] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] LocalTableScan [customer_id,customer_preferred_cust_flag,year_total] [customer_id,customer_preferred_cust_flag,year_total] InputAdapter @@ -66,24 +66,24 @@ TakeOrderedAndProject [customer_preferred_cust_flag] LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [d_year,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2))))] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),year_total] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #8 + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #8 WholeStageCodegen - HashAggregate [d_year,ws_ext_discount_amt,sum,c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] + Project [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 InputAdapter @@ -91,19 +91,19 @@ TakeOrderedAndProject [customer_preferred_cust_flag] Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [d_year,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2))))] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),year_total] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #11 + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #11 WholeStageCodegen - HashAggregate [d_year,ws_ext_discount_amt,sum,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address,sum] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] InputAdapter - ReusedExchange [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] #9 + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt index 497db4cde..b815e3d91 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,i_category,revenueratio,i_current_price,i_class] +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] WholeStageCodegen - Project [i_item_id,i_item_desc,itemrevenue,_we0,i_category,i_current_price,i_class,_w0] + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] InputAdapter Window [_w1,i_class] WholeStageCodegen @@ -8,27 +8,27 @@ TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,i_category,revenueratio InputAdapter Exchange [i_class] #1 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,i_category,sum,i_current_price,sum(UnscaledValue(ws_ext_sales_price)),i_class] [itemrevenue,_w1,sum,sum(UnscaledValue(ws_ext_sales_price)),_w0] + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ws_ext_sales_price))] InputAdapter - Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #2 + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,ws_ext_sales_price,i_category,sum,i_current_price,i_class,sum] [sum,sum] - Project [i_class,i_current_price,ws_ext_sales_price,i_category,i_item_desc,i_item_id] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [i_class,i_current_price,ws_ext_sales_price,ws_sold_date_sk,i_category,i_item_desc,i_item_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] Filter [i_category,i_item_sk] - Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt index 0b02236c0..cd146d06c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt @@ -1,22 +1,22 @@ WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum,avg(cast(ss_quantity as bigint)),count,sum,avg(UnscaledValue(ss_ext_sales_price)),count,avg(UnscaledValue(ss_ext_wholesale_cost)),count] [sum,avg(ss_ext_sales_price),sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum,sum(ss_ext_wholesale_cost),avg(ss_ext_wholesale_cost),avg(cast(ss_quantity as bigint)),avg(ss_quantity),count,sum,avg(UnscaledValue(ss_ext_sales_price)),count,avg(UnscaledValue(ss_ext_wholesale_cost)),count] + HashAggregate [avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),avg(cast(ss_quantity as bigint)),count,count,count,sum,sum,sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost))] [avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),avg(cast(ss_quantity as bigint)),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),avg(ss_quantity),count,count,count,sum,sum,sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum(ss_ext_wholesale_cost)] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [ss_ext_wholesale_cost,count,sum,sum,count,sum,count,sum,sum,count,sum,sum,sum,ss_ext_sales_price,count,ss_quantity,count] [count,sum,sum,count,sum,count,sum,sum,count,sum,sum,sum,count,count] - Project [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] - BroadcastHashJoin [hd_dep_count,ss_hdemo_sk,ss_sales_price,cd_education_status,hd_demo_sk,cd_marital_status] - Project [ss_quantity,cd_marital_status,ss_ext_sales_price,ss_sales_price,ss_hdemo_sk,cd_education_status,ss_ext_wholesale_cost] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [ss_quantity,ss_ext_sales_price,ss_sales_price,ss_cdemo_sk,ss_hdemo_sk,ss_ext_wholesale_cost] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_ext_sales_price,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] - BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] - Project [ss_addr_sk,ss_quantity,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] - Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] [ss_addr_sk,ss_quantity,ss_store_sk,ss_ext_sales_price,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ext_wholesale_cost] + HashAggregate [count,count,count,count,count,count,ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity] + BroadcastHashJoin [cd_education_status,cd_marital_status,hd_demo_sk,hd_dep_count,ss_hdemo_sk,ss_sales_price] + Project [cd_education_status,cd_marital_status,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] + Project [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_addr_sk,ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen @@ -27,20 +27,20 @@ WholeStageCodegen BroadcastExchange #3 WholeStageCodegen Project [ca_address_sk,ca_state] - Filter [ca_country,ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] [ca_address_sk,ca_state,ca_country] + Filter [ca_address_sk,ca_country] + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [cd_demo_sk,cd_marital_status,cd_education_status] + Project [cd_demo_sk,cd_education_status,cd_marital_status] Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] InputAdapter BroadcastExchange #6 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt index 93bae0792..837757bfe 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt @@ -1,267 +1,267 @@ -TakeOrderedAndProject [sum(sales),i_brand_id,sum(number_sales),channel,i_category_id,i_class_id] +TakeOrderedAndProject [channel,i_brand_id,i_category_id,i_class_id,sum(number_sales),sum(sales)] WholeStageCodegen - HashAggregate [sum(sales),spark_grouping_id,i_brand_id,sum,sum,channel,i_category_id,i_class_id,sum(number_salesL)] [sum(sales),sum(sales),sum(number_sales),sum,sum,sum(number_salesL)] + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,spark_grouping_id,sum,sum,sum(number_salesL),sum(sales)] [sum,sum,sum(number_salesL),sum(number_sales),sum(sales),sum(sales)] InputAdapter - Exchange [spark_grouping_id,i_brand_id,channel,i_category_id,i_class_id] #1 + Exchange [channel,i_brand_id,i_category_id,i_class_id,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [sales,spark_grouping_id,sum,i_brand_id,sum,number_sales,sum,sum,channel,i_category_id,i_class_id] [sum,sum,sum,sum] - Expand [channel,sales,i_category_id,number_sales,i_class_id,i_brand_id] + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales,spark_grouping_id,sum,sum,sum,sum] [sum,sum,sum,sum] + Expand [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] InputAdapter Union WholeStageCodegen - Project [channel,sales,i_category_id,number_sales,i_class_id,i_brand_id] + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #1 WholeStageCodegen - HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] InputAdapter Union WholeStageCodegen - Project [ss_quantity,ss_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_list_price,ss_quantity,ss_sold_date_sk] Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #14 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] WholeStageCodegen - Project [cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 WholeStageCodegen - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,i_category_id,i_brand_id,sum,i_class_id] [sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,number_sales,channel,sum] + HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #2 + Exchange [i_brand_id,i_category_id,i_class_id] #2 WholeStageCodegen - HashAggregate [count,count,sum,ss_list_price,i_category_id,i_brand_id,ss_quantity,sum,i_class_id] [sum,count,sum,count] - Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,i_class_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,ss_sold_date_sk,i_class_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] + HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [i_item_sk] - BroadcastHashJoin [category_id,class_id,i_category_id,i_brand_id,i_class_id,brand_id] - Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_class_id,i_brand_id,i_category_id] - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_brand_id,i_category_id,i_class_id] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - HashAggregate [brand_id,class_id,category_id] - HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [class_id,i_class_id,i_category_id,brand_id,i_brand_id,category_id] - HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + HashAggregate [brand_id,category_id,class_id] InputAdapter - Exchange [brand_id,class_id,category_id] #5 + Exchange [brand_id,category_id,class_id] #5 WholeStageCodegen - HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,class_id,brand_id,category_id] - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_sold_date_sk,ss_item_sk] + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk,ss_sold_date_sk] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] [ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_item_sk,i_class_id,i_brand_id,i_category_id] - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_sold_date_sk,cs_item_sk] + Project [cs_item_sk,cs_sold_date_sk] Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] [cs_sold_date_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] [cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #7 InputAdapter BroadcastExchange #10 WholeStageCodegen - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_sold_date_sk,ws_item_sk] + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk,ws_sold_date_sk] Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] [ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] [ws_item_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #9 InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #7 InputAdapter BroadcastExchange #11 WholeStageCodegen BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter ReusedExchange [ss_item_sk] [ss_item_sk] #3 InputAdapter BroadcastExchange #12 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] WholeStageCodegen - Project [sales,channel,i_category_id,i_brand_id,number_sales,i_class_id] + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #2 WholeStageCodegen - HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] InputAdapter Union WholeStageCodegen - Project [ss_quantity,ss_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_list_price,ss_quantity,ss_sold_date_sk] Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #14 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] WholeStageCodegen - Project [cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 WholeStageCodegen - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum,i_category_id,i_brand_id,count,count(1),i_class_id] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sales,channel,sum,number_sales,count,count(1)] + HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #15 + Exchange [i_brand_id,i_category_id,i_class_id] #15 WholeStageCodegen - HashAggregate [sum,cs_list_price,sum,i_category_id,count,i_brand_id,cs_quantity,count,i_class_id] [sum,count,sum,count] - Project [cs_quantity,i_category_id,i_brand_id,i_class_id,cs_list_price] + HashAggregate [count,count,cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id,sum,sum] [count,count,sum,sum] + Project [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,i_category_id,cs_sold_date_sk,i_brand_id,i_class_id,cs_list_price] + Project [cs_list_price,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_item_sk,i_item_sk] BroadcastHashJoin [cs_item_sk,ss_item_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] + Project [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] + Scan parquet default.catalog_sales [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] InputAdapter ReusedExchange [ss_item_sk] [ss_item_sk] #3 InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #11 InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #12 WholeStageCodegen - Project [number_sales,i_category_id,i_brand_id,i_class_id,sales,channel] + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #3 WholeStageCodegen - HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [count,list_price,sum,quantity,sum,count] [sum,count,sum,count] + HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] InputAdapter Union WholeStageCodegen - Project [ss_quantity,ss_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_list_price,ss_quantity,ss_sold_date_sk] Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #14 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] WholeStageCodegen - Project [cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 WholeStageCodegen - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [count(1),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,i_category_id,i_brand_id,i_class_id] [count(1),number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum,count,sales,channel] + HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #16 + Exchange [i_brand_id,i_category_id,i_class_id] #16 WholeStageCodegen - HashAggregate [count,sum,sum,count,i_category_id,i_brand_id,i_class_id,ws_list_price,ws_quantity] [sum,count,sum,count] - Project [ws_quantity,i_category_id,ws_list_price,i_brand_id,i_class_id] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_quantity,i_category_id,ws_list_price,ws_sold_date_sk,i_brand_id,i_class_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - BroadcastHashJoin [ws_item_sk,ss_item_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] + HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,sum,sum,ws_list_price,ws_quantity] [count,count,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + BroadcastHashJoin [ss_item_sk,ws_item_sk] + Project [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] + Scan parquet default.web_sales [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] InputAdapter ReusedExchange [ss_item_sk] [ss_item_sk] #3 InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #11 InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #12 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt index d0ac00bfe..ee18cd267 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt @@ -1,226 +1,226 @@ -TakeOrderedAndProject [i_brand_id,i_category_id,number_sales,i_class_id,i_brand_id,sales,sales,number_sales,i_category_id,channel,channel,i_class_id] +TakeOrderedAndProject [channel,channel,i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,number_sales,number_sales,sales,sales] WholeStageCodegen - BroadcastHashJoin [i_category_id,i_category_id,i_brand_id,i_class_id,i_brand_id,i_class_id] - Project [number_sales,i_category_id,channel,sales,i_brand_id,i_class_id] + BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id] + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #2 WholeStageCodegen - HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] InputAdapter Exchange #12 WholeStageCodegen - HashAggregate [quantity,sum,count,list_price,count,sum] [sum,count,sum,count] + HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] InputAdapter Union WholeStageCodegen - Project [ss_quantity,ss_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_list_price,ss_quantity,ss_sold_date_sk] Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #13 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] WholeStageCodegen - Project [cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #13 WholeStageCodegen - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #13 - HashAggregate [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sum,i_category_id,i_brand_id,count,i_class_id] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),number_sales,count(1),sum,channel,sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count] + HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #1 + Exchange [i_brand_id,i_category_id,i_class_id] #1 WholeStageCodegen - HashAggregate [sum,sum,ss_list_price,i_category_id,i_brand_id,ss_quantity,count,i_class_id,count] [sum,count,sum,count] - Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,i_class_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_list_price,i_category_id,i_brand_id,ss_sold_date_sk,i_class_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] + HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [i_item_sk] - BroadcastHashJoin [brand_id,category_id,i_category_id,i_brand_id,class_id,i_class_id] - Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_class_id,i_brand_id,i_category_id] - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_brand_id,i_category_id,i_class_id] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - HashAggregate [brand_id,class_id,category_id] - HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [i_class_id,i_category_id,class_id,category_id,i_brand_id,brand_id] - HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + HashAggregate [brand_id,category_id,class_id] InputAdapter - Exchange [brand_id,class_id,category_id] #4 + Exchange [brand_id,category_id,class_id] #4 WholeStageCodegen - HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,class_id,category_id,brand_id] - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_sold_date_sk,ss_item_sk] + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk,ss_sold_date_sk] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] [ss_sold_date_sk,ss_item_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_item_sk,i_class_id,i_brand_id,i_category_id] - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_sold_date_sk,cs_item_sk] + Project [cs_item_sk,cs_sold_date_sk] Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] [cs_sold_date_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] [cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [i_item_sk,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #6 InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_sold_date_sk,ws_item_sk] + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk,ws_sold_date_sk] Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] [ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] [ws_item_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #8 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #8 InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #6 InputAdapter BroadcastExchange #10 WholeStageCodegen BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_item_sk,i_brand_id,i_class_id,i_category_id] - Filter [i_item_sk,i_class_id,i_category_id,i_brand_id] - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter ReusedExchange [ss_item_sk] [ss_item_sk] #2 InputAdapter BroadcastExchange #11 WholeStageCodegen Project [d_date_sk] - Filter [d_week_seq,d_date_sk] + Filter [d_date_sk,d_week_seq] Subquery #1 WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_dom,d_moy] - Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] + Filter [d_dom,d_moy,d_year] + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] Subquery #1 WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_dom,d_moy] - Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] + Filter [d_dom,d_moy,d_year] + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] InputAdapter BroadcastExchange #14 WholeStageCodegen - Project [i_brand_id,i_class_id,sales,number_sales,i_category_id,channel] + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #4 WholeStageCodegen - HashAggregate [sum,count,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2)))] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count] + HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] InputAdapter Exchange #12 WholeStageCodegen - HashAggregate [quantity,sum,count,list_price,count,sum] [sum,count,sum,count] + HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] InputAdapter Union WholeStageCodegen - Project [ss_quantity,ss_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_list_price] + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_list_price,ss_quantity,ss_sold_date_sk] Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_quantity,ss_list_price] + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #13 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] WholeStageCodegen - Project [cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] [cs_sold_date_sk,cs_quantity,cs_list_price] + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #13 WholeStageCodegen - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] [ws_sold_date_sk,ws_quantity,ws_list_price] + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #13 - HashAggregate [i_brand_id,sum,i_class_id,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,i_category_id] [sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),count,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),channel] + HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #15 + Exchange [i_brand_id,i_category_id,i_class_id] #15 WholeStageCodegen - HashAggregate [i_brand_id,sum,sum,i_class_id,count,count,ss_list_price,i_category_id,ss_quantity] [sum,count,sum,count] - Project [ss_quantity,i_category_id,ss_list_price,i_brand_id,i_class_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,i_category_id,ss_list_price,i_brand_id,ss_sold_date_sk,i_class_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] + HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter ReusedExchange [ss_item_sk] [ss_item_sk] #2 InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #10 InputAdapter BroadcastExchange #16 WholeStageCodegen Project [d_date_sk] - Filter [d_week_seq,d_date_sk] + Filter [d_date_sk,d_week_seq] Subquery #3 WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_dom,d_moy] - Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] + Filter [d_dom,d_moy,d_year] + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] Subquery #3 WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_dom,d_moy] - Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] [d_week_seq,d_year,d_moy,d_dom] + Filter [d_dom,d_moy,d_year] + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt index e74a4a6e2..d70d7905c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST], output=[ca_zip#1,sum(cs_sales_price)#2]) +- *(5) HashAggregate(keys=[ca_zip#1], functions=[sum(UnscaledValue(cs_sales_price#3))]) - +- Exchange hashpartitioning(ca_zip#1, 200) + +- Exchange hashpartitioning(ca_zip#1, 5) +- *(4) HashAggregate(keys=[ca_zip#1], functions=[partial_sum(UnscaledValue(cs_sales_price#3))]) +- *(4) Project [cs_sales_price#3, ca_zip#1] +- *(4) BroadcastHashJoin [cs_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt index 5d72165cb..aa1157759 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt @@ -1,25 +1,25 @@ TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] WholeStageCodegen - HashAggregate [ca_zip,sum,sum(UnscaledValue(cs_sales_price))] [sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price),sum] + HashAggregate [ca_zip,sum,sum(UnscaledValue(cs_sales_price))] [sum,sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price)] InputAdapter Exchange [ca_zip] #1 WholeStageCodegen HashAggregate [ca_zip,cs_sales_price,sum,sum] [sum,sum] - Project [cs_sales_price,ca_zip] + Project [ca_zip,cs_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_sales_price,ca_zip] - BroadcastHashJoin [ca_zip,cs_sales_price,ca_address_sk,c_current_addr_sk,ca_state] - Project [cs_sold_date_sk,cs_sales_price,c_current_addr_sk] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_sales_price] + Project [ca_zip,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_state,ca_zip,cs_sales_price] + Project [c_current_addr_sk,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_sales_price] [cs_sold_date_sk,cs_bill_customer_sk,cs_sales_price] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [c_customer_sk,c_current_addr_sk] - Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_customer_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -30,5 +30,5 @@ TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] - Filter [d_qoy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt index 57b72893e..fe14e6333 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -1,32 +1,32 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] WholeStageCodegen - HashAggregate [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),total net profit ,order count ,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),total shipping cost ] + HashAggregate [count,count(cs_order_number),sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [count,count(cs_order_number),order count ,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),total net profit ,total shipping cost ] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number] [sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,count,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] - HashAggregate [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number] [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] + HashAggregate [count,count(cs_order_number),cs_order_number,sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [count,count,count(cs_order_number),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + HashAggregate [cs_order_number,sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] InputAdapter Exchange [cs_order_number] #2 WholeStageCodegen - HashAggregate [cs_ext_ship_cost,sum(UnscaledValue(cs_net_profit)),sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),cs_order_number,cs_net_profit] [sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost))] - Project [cs_order_number,cs_ext_ship_cost,cs_net_profit] - BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] - Project [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] - BroadcastHashJoin [cs_ship_addr_sk,ca_address_sk] - Project [cs_ship_addr_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + HashAggregate [cs_ext_ship_cost,cs_net_profit,cs_order_number,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + Project [cs_ext_ship_cost,cs_net_profit,cs_order_number] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] + BroadcastHashJoin [ca_address_sk,cs_ship_addr_sk] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk] BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - BroadcastHashJoin [cs_order_number,cr_order_number] - Project [cs_ship_addr_sk,cs_ship_date_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + BroadcastHashJoin [cr_order_number,cs_order_number] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk] BroadcastHashJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] - Project [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] - Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk] - Scan parquet default.catalog_sales [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] [cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk,cs_ext_ship_cost,cs_order_number,cs_net_profit,cs_call_center_sk] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] + Filter [cs_call_center_sk,cs_ship_addr_sk,cs_ship_date_sk] + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [cs_warehouse_sk,cs_order_number] - Scan parquet default.catalog_sales [cs_warehouse_sk,cs_order_number] [cs_warehouse_sk,cs_order_number] + Project [cs_order_number,cs_warehouse_sk] + Scan parquet default.catalog_sales [cs_order_number,cs_warehouse_sk] [cs_order_number,cs_warehouse_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -36,16 +36,16 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [ca_address_sk] - Filter [ca_state,ca_address_sk] + Filter [ca_address_sk,ca_state] Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] InputAdapter BroadcastExchange #7 WholeStageCodegen Project [cc_call_center_sk] - Filter [cc_county,cc_call_center_sk] + Filter [cc_call_center_sk,cc_county] Scan parquet default.call_center [cc_call_center_sk,cc_county] [cc_call_center_sk,cc_county] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt index 454790ddb..780e633e2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt @@ -1,66 +1,66 @@ -TakeOrderedAndProject [as_store_returns_quantityave,catalog_sales_quantitycount,i_item_id,i_item_desc,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitystdev,catalog_sales_quantitycov,s_state,store_returns_quantitycov,catalog_sales_quantityave,store_sales_quantitycov,as_store_returns_quantitystdev,as_store_returns_quantitycount,catalog_sales_quantitystdev] +TakeOrderedAndProject [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,i_item_desc,i_item_id,s_state,store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev] WholeStageCodegen - HashAggregate [count,avg(cast(ss_quantity as bigint)),i_item_id,stddev_samp(cast(ss_quantity as double)),i_item_desc,stddev_samp(cast(sr_return_quantity as double)),m2,avg(cast(sr_return_quantity as bigint)),n,count,m2,avg,avg,s_state,count(sr_return_quantity),sum,avg,n,count,m2,count(cs_quantity),sum,stddev_samp(cast(cs_quantity as double)),avg(cast(cs_quantity as bigint)),count,n,sum,count,count(ss_quantity),count] [count,avg(cast(ss_quantity as bigint)),as_store_returns_quantityave,catalog_sales_quantitycount,stddev_samp(cast(ss_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),m2,avg(cast(sr_return_quantity as bigint)),n,count,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitystdev,m2,avg,avg,catalog_sales_quantitycov,count(sr_return_quantity),store_returns_quantitycov,sum,catalog_sales_quantityave,store_sales_quantitycov,avg,n,count,as_store_returns_quantitystdev,m2,count(cs_quantity),sum,stddev_samp(cast(cs_quantity as double)),avg(cast(cs_quantity as bigint)),count,as_store_returns_quantitycount,n,sum,count,count(ss_quantity),count,catalog_sales_quantitystdev] + HashAggregate [avg,avg,avg,avg(cast(cs_quantity as bigint)),avg(cast(sr_return_quantity as bigint)),avg(cast(ss_quantity as bigint)),count,count,count,count,count,count,count(cs_quantity),count(sr_return_quantity),count(ss_quantity),i_item_desc,i_item_id,m2,m2,m2,n,n,n,s_state,stddev_samp(cast(cs_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),stddev_samp(cast(ss_quantity as double)),sum,sum,sum] [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,avg,avg,avg,avg(cast(cs_quantity as bigint)),avg(cast(sr_return_quantity as bigint)),avg(cast(ss_quantity as bigint)),catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,count,count,count,count,count,count,count(cs_quantity),count(sr_return_quantity),count(ss_quantity),m2,m2,m2,n,n,n,stddev_samp(cast(cs_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),stddev_samp(cast(ss_quantity as double)),store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev,sum,sum,sum] InputAdapter - Exchange [i_item_id,i_item_desc,s_state] #1 + Exchange [i_item_desc,i_item_id,s_state] #1 WholeStageCodegen - HashAggregate [count,m2,i_item_id,m2,i_item_desc,m2,count,avg,avg,m2,count,n,count,sum,n,sum,n,count,m2,avg,avg,s_state,avg,sum,sr_return_quantity,avg,count,count,count,n,count,m2,sum,count,ss_quantity,cs_quantity,n,sum,count,n,sum,count] [count,m2,m2,m2,count,avg,avg,m2,count,n,count,sum,n,sum,n,count,m2,avg,avg,avg,sum,avg,count,count,count,n,count,m2,sum,count,n,sum,count,n,sum,count] - Project [ss_quantity,cs_quantity,sr_return_quantity,s_state,i_item_desc,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,sr_return_quantity,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity] + HashAggregate [avg,avg,avg,avg,avg,avg,count,count,count,count,count,count,count,count,count,count,count,count,cs_quantity,i_item_desc,i_item_id,m2,m2,m2,m2,m2,m2,n,n,n,n,n,n,s_state,sr_return_quantity,ss_quantity,sum,sum,sum,sum,sum,sum] [avg,avg,avg,avg,avg,avg,count,count,count,count,count,count,count,count,count,count,count,count,m2,m2,m2,m2,m2,m2,n,n,n,n,n,n,sum,sum,sum,sum,sum,sum] + Project [cs_quantity,i_item_desc,i_item_id,s_state,sr_return_quantity,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [cs_quantity,s_state,sr_return_quantity,ss_item_sk,ss_quantity] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [cs_quantity,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,sr_returned_date_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_returned_date_sk] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [ss_quantity,ss_item_sk,ss_store_sk,sr_return_quantity,sr_customer_sk,sr_item_sk,ss_sold_date_sk,sr_returned_date_sk] - BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] - Project [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] - Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] - Filter [d_quarter_name,d_date_sk] + Filter [d_date_sk,d_quarter_name] Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk] - Filter [d_quarter_name,d_date_sk] + Filter [d_date_sk,d_quarter_name] Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_date_sk] - Filter [d_quarter_name,d_date_sk] + Filter [d_date_sk,d_quarter_name] Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [s_store_sk,s_state] + Project [s_state,s_store_sk] Filter [s_store_sk] - Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [i_item_sk,i_item_id,i_item_desc] + Project [i_item_desc,i_item_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] [i_item_sk,i_item_id,i_item_desc] + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt index d045bd95e..5a4e9d503 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt @@ -1,38 +1,38 @@ -TakeOrderedAndProject [ca_country,agg7,agg6,ca_county,agg2,agg1,agg4,agg3,ca_state,agg5,i_item_id] +TakeOrderedAndProject [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_county,ca_state,i_item_id] WholeStageCodegen - HashAggregate [ca_country,avg(cast(cs_quantity as decimal(12,2))),count,sum,avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,ca_county,avg(cast(cs_list_price as decimal(12,2))),sum,sum,count,sum,count,avg(cast(cs_net_profit as decimal(12,2))),spark_grouping_id,sum,count,ca_state,avg(cast(cd_dep_count as decimal(12,2))),count,avg(cast(cs_sales_price as decimal(12,2))),count,count,avg(cast(c_birth_year as decimal(12,2))),i_item_id] [avg(cast(cs_quantity as decimal(12,2))),count,agg7,sum,avg(cast(cs_coupon_amt as decimal(12,2))),sum,sum,agg6,avg(cast(cs_list_price as decimal(12,2))),agg2,agg1,sum,sum,count,sum,count,agg4,agg3,avg(cast(cs_net_profit as decimal(12,2))),sum,count,agg5,avg(cast(cd_dep_count as decimal(12,2))),count,avg(cast(cs_sales_price as decimal(12,2))),count,count,avg(cast(c_birth_year as decimal(12,2)))] + HashAggregate [avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),ca_country,ca_county,ca_state,count,count,count,count,count,count,count,i_item_id,spark_grouping_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum] InputAdapter - Exchange [ca_country,ca_county,spark_grouping_id,ca_state,i_item_id] #1 + Exchange [ca_country,ca_county,ca_state,i_item_id,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [ca_country,cs_list_price,cs_coupon_amt,sum,count,sum,count,count,sum,sum,count,sum,sum,count,sum,count,ca_county,sum,sum,count,sum,count,count,sum,sum,sum,spark_grouping_id,cs_net_profit,sum,count,ca_state,cs_sales_price,cs_quantity,cd_dep_count,count,count,c_birth_year,count,count,i_item_id] [sum,count,sum,count,count,sum,sum,count,sum,sum,count,sum,count,sum,sum,count,sum,count,count,sum,sum,sum,sum,count,count,count,count,count] - Expand [cs_list_price,cs_coupon_amt,ca_country,i_item_id,ca_county,cs_net_profit,cs_sales_price,cs_quantity,cd_dep_count,ca_state,c_birth_year] - Project [cs_list_price,cs_coupon_amt,i_item_id,ca_county,ca_country,cs_net_profit,cs_sales_price,cs_quantity,cd_dep_count,c_birth_year,ca_state] + HashAggregate [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id,spark_grouping_id,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Expand [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [ca_state,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cd_dep_count,ca_county,cs_item_sk,cs_net_profit,ca_country,cs_list_price] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ca_state,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,ca_county,cs_item_sk,cs_net_profit,ca_country,cs_list_price] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] + Project [c_birth_year,c_current_addr_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,c_current_addr_sk,cs_coupon_amt,cs_quantity,c_birth_year,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_coupon_amt,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cd_dep_count,cs_item_sk,cs_net_profit,cs_list_price] - BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] - Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk] - Scan parquet default.catalog_sales [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] [cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_net_profit,cs_list_price] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Project [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [cd_demo_sk,cd_dep_count] - Filter [cd_gender,cd_education_status,cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + Filter [cd_demo_sk,cd_education_status,cd_gender] + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] - Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] - Scan parquet default.customer [c_current_cdemo_sk,c_current_addr_sk,c_birth_year,c_customer_sk,c_birth_month] [c_current_cdemo_sk,c_current_addr_sk,c_birth_year,c_customer_sk,c_birth_month] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_birth_month,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Scan parquet default.customer [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -42,18 +42,18 @@ TakeOrderedAndProject [ca_country,agg7,agg6,ca_county,agg2,agg1,agg4,agg3,ca_sta InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [ca_address_sk,ca_county,ca_state,ca_country] - Filter [ca_state,ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] [ca_address_sk,ca_county,ca_state,ca_country] + Project [ca_address_sk,ca_country,ca_county,ca_state] + Filter [ca_address_sk,ca_state] + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_county,ca_state] [ca_address_sk,ca_country,ca_county,ca_state] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt index c008da8ae..fb69c76b7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt @@ -1,41 +1,41 @@ -TakeOrderedAndProject [ext_price,brand_id,i_manufact,i_manufact_id,brand] +TakeOrderedAndProject [brand,brand_id,ext_price,i_manufact,i_manufact_id] WholeStageCodegen - HashAggregate [i_manufact,sum(UnscaledValue(ss_ext_sales_price)),i_brand,i_brand_id,i_manufact_id,sum] [ext_price,brand_id,sum(UnscaledValue(ss_ext_sales_price)),brand,sum] + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter - Exchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 + Exchange [i_brand,i_brand_id,i_manufact,i_manufact_id] #1 WholeStageCodegen - HashAggregate [i_manufact,i_brand,sum,ss_ext_sales_price,i_brand_id,i_manufact_id,sum] [sum,sum] - Project [i_manufact,i_manufact_id,ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_store_sk,s_store_sk,ca_zip,s_zip] - Project [i_manufact,i_manufact_id,ca_zip,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand] + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] + BroadcastHashJoin [ca_zip,s_store_sk,s_zip,ss_store_sk] + Project [ca_zip,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,i_manufact,i_manufact_id,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [i_manufact,i_manufact_id,ss_store_sk,ss_customer_sk,ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + Project [c_current_addr_sk,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_customer_sk,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_store_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] - Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_manufact,i_manufact_id,i_item_sk,i_brand_id,i_brand] - Filter [i_manager_id,i_item_sk] - Scan parquet default.item [i_manufact,i_manufact_id,i_item_sk,i_manager_id,i_brand_id,i_brand] [i_manufact,i_manufact_id,i_item_sk,i_manager_id,i_brand_id,i_brand] + Project [i_brand,i_brand_id,i_item_sk,i_manufact,i_manufact_id] + Filter [i_item_sk,i_manager_id] + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [c_customer_sk,c_current_addr_sk] - Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_customer_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen @@ -46,5 +46,5 @@ TakeOrderedAndProject [ext_price,brand_id,i_manufact,i_manufact_id,brand] BroadcastExchange #6 WholeStageCodegen Project [s_store_sk,s_zip] - Filter [s_zip,s_store_sk] + Filter [s_store_sk,s_zip] Scan parquet default.store [s_store_sk,s_zip] [s_store_sk,s_zip] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt index a3121688c..7e604b292 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt @@ -3,50 +3,50 @@ WholeStageCodegen InputAdapter Exchange [d_week_seq1] #1 WholeStageCodegen - Project [wed_sales2,mon_sales1,sat_sales2,d_week_seq1,fri_sales2,thu_sales2,sat_sales1,tue_sales1,sun_sales2,sun_sales1,tue_sales2,wed_sales1,mon_sales2,thu_sales1,fri_sales1] + Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] BroadcastHashJoin [d_week_seq1,d_week_seq2] - Project [sun_sales,thu_sales,sat_sales,wed_sales,fri_sales,tue_sales,d_week_seq,mon_sales] + Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),mon_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum,thu_sales,sum,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum,sat_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum,wed_sales] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] InputAdapter Exchange [d_week_seq] #2 WholeStageCodegen - HashAggregate [d_day_name,sum,sum,sum,sum,sum,sum,sum,d_week_seq,sum,sum,sum,sum,sales_price,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,d_week_seq,d_day_name] - BroadcastHashJoin [sold_date_sk,d_date_sk] + HashAggregate [d_day_name,d_week_seq,sales_price,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,sales_price] + BroadcastHashJoin [d_date_sk,sold_date_sk] InputAdapter Union WholeStageCodegen - Project [ws_sold_date_sk,ws_ext_sales_price] + Project [ws_ext_sales_price,ws_sold_date_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_ext_sales_price] + Scan parquet default.web_sales [ws_ext_sales_price,ws_sold_date_sk] [ws_ext_sales_price,ws_sold_date_sk] WholeStageCodegen - Project [cs_sold_date_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_ext_sales_price] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk] [cs_ext_sales_price,cs_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [d_date_sk,d_week_seq,d_day_name] + Project [d_date_sk,d_day_name,d_week_seq] Filter [d_date_sk,d_week_seq] - Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] [d_date_sk,d_week_seq,d_day_name] + Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] [d_date_sk,d_day_name,d_week_seq] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_week_seq] + Filter [d_week_seq,d_year] Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [sun_sales,thu_sales,sat_sales,wed_sales,fri_sales,tue_sales,d_week_seq,mon_sales] + Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),mon_sales,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),thu_sales,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum,sun_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sat_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum,wed_sales] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] InputAdapter - ReusedExchange [sum,sum,sum,sum,d_week_seq,sum,sum,sum] [sum,sum,sum,sum,d_week_seq,sum,sum,sum] #2 + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_week_seq] + Filter [d_week_seq,d_year] Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt index ebfef4216..a6a8b7457 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,revenueratio,i_category,i_current_price,i_class] +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] WholeStageCodegen - Project [i_item_id,i_item_desc,itemrevenue,_we0,i_category,_w0,i_current_price,i_class] + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] InputAdapter Window [_w1,i_class] WholeStageCodegen @@ -8,27 +8,27 @@ TakeOrderedAndProject [i_item_id,i_item_desc,itemrevenue,revenueratio,i_category InputAdapter Exchange [i_class] #1 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum(UnscaledValue(cs_ext_sales_price)),i_category,sum,i_current_price,i_class] [itemrevenue,sum(UnscaledValue(cs_ext_sales_price)),_w1,sum,_w0] + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(cs_ext_sales_price))] InputAdapter - Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #2 + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum,i_category,sum,i_current_price,cs_ext_sales_price,i_class] [sum,sum] - Project [i_class,i_current_price,i_category,i_item_desc,cs_ext_sales_price,i_item_id] + HashAggregate [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum] [sum,sum] + Project [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [i_class,i_current_price,cs_sold_date_sk,i_category,i_item_desc,cs_ext_sales_price,i_item_id] + Project [cs_ext_sales_price,cs_sold_date_sk,i_category,i_class,i_current_price,i_item_desc,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] Filter [i_category,i_item_sk] - Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt index b74976c2f..472388330 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt @@ -1,35 +1,35 @@ -TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] +TakeOrderedAndProject [i_item_id,inv_after,inv_before,w_warehouse_name] WholeStageCodegen - Filter [inv_before,inv_after] - HashAggregate [i_item_id,sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,w_warehouse_name,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum] [sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_before,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum,inv_after] + Filter [inv_after,inv_before] + HashAggregate [i_item_id,sum,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),w_warehouse_name] [inv_after,inv_before,sum,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint))] InputAdapter - Exchange [w_warehouse_name,i_item_id] #1 + Exchange [i_item_id,w_warehouse_name] #1 WholeStageCodegen - HashAggregate [i_item_id,sum,d_date,w_warehouse_name,sum,inv_quantity_on_hand,sum,sum] [sum,sum,sum,sum] - Project [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_date_sk,inv_quantity_on_hand,w_warehouse_name,i_item_id] - BroadcastHashJoin [inv_item_sk,i_item_sk] + HashAggregate [d_date,i_item_id,inv_quantity_on_hand,sum,sum,sum,sum,w_warehouse_name] [sum,sum,sum,sum] + Project [d_date,i_item_id,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_id,inv_date_sk,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [i_item_sk,inv_item_sk] Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] - Filter [inv_warehouse_sk,inv_item_sk,inv_date_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [w_warehouse_sk,w_warehouse_name] + Project [w_warehouse_name,w_warehouse_sk] Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id,i_current_price] [i_item_sk,i_item_id,i_current_price] + Scan parquet default.item [i_current_price,i_item_id,i_item_sk] [i_current_price,i_item_id,i_item_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [d_date_sk,d_date] + Project [d_date,d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt index 49b575687..d10f9d4ef 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt @@ -1,32 +1,32 @@ -TakeOrderedAndProject [i_category,i_class,i_product_name,i_brand,qoh] +TakeOrderedAndProject [i_brand,i_category,i_class,i_product_name,qoh] WholeStageCodegen - HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),i_category,count,spark_grouping_id,sum,i_class,i_product_name,i_brand] [avg(cast(inv_quantity_on_hand as bigint)),qoh,sum,count] + HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),count,i_brand,i_category,i_class,i_product_name,spark_grouping_id,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] InputAdapter - Exchange [i_category,spark_grouping_id,i_class,i_product_name,i_brand] #1 + Exchange [i_brand,i_category,i_class,i_product_name,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [sum,i_category,count,count,spark_grouping_id,sum,i_class,i_product_name,inv_quantity_on_hand,i_brand] [sum,count,sum,count] - Expand [i_product_name,i_class,i_brand,i_category,inv_quantity_on_hand] - Project [i_brand,i_category,inv_quantity_on_hand,i_class,i_product_name] + HashAggregate [count,count,i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,spark_grouping_id,sum,sum] [count,count,sum,sum] + Expand [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_class,inv_quantity_on_hand,inv_warehouse_sk,i_product_name,i_category,i_brand] - BroadcastHashJoin [inv_item_sk,i_item_sk] - Project [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_class,i_item_sk,i_product_name,i_category,i_brand] + Project [i_brand,i_category,i_class,i_item_sk,i_product_name] Filter [i_item_sk] - Scan parquet default.item [i_class,i_item_sk,i_product_name,i_category,i_brand] [i_class,i_item_sk,i_product_name,i_category,i_brand] + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] [i_brand,i_category,i_class,i_item_sk,i_product_name] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt index e3c9d92e2..a75e4e5f9 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt @@ -1,6 +1,6 @@ CollectLimit WholeStageCodegen - HashAggregate [sum,sum(sales)] [sum(sales),sum(sales),sum] + HashAggregate [sum,sum(sales)] [sum,sum(sales),sum(sales)] InputAdapter Exchange #1 WholeStageCodegen @@ -8,44 +8,44 @@ CollectLimit InputAdapter Union WholeStageCodegen - Project [cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,cs_list_price,cs_quantity,cs_sold_date_sk] BroadcastHashJoin [cs_item_sk,item_sk] - Project [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] + Project [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [item_sk] Filter [count(1)] - HashAggregate [count(1),d_date,i_item_sk,substring(i_item_desc, 1, 30),count] [count(1),item_sk,count(1),count] + HashAggregate [count,count(1),d_date,i_item_sk,substring(i_item_desc, 1, 30)] [count,count(1),count(1),item_sk] InputAdapter - Exchange [substring(i_item_desc, 1, 30),i_item_sk,d_date] #3 + Exchange [d_date,i_item_sk,substring(i_item_desc, 1, 30)] #3 WholeStageCodegen - HashAggregate [i_item_desc,count,d_date,i_item_sk,substring(i_item_desc, 1, 30),count] [count,substring(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk] - Filter [ss_sold_date_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] [ss_sold_date_sk,ss_item_sk] + HashAggregate [count,count,d_date,i_item_desc,i_item_sk,substring(i_item_desc, 1, 30)] [count,count,substring(i_item_desc, 1, 30)] + Project [d_date,i_item_desc,i_item_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_date,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [d_date_sk,d_date] - Filter [d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date,d_year] [d_date_sk,d_date,d_year] + Project [d_date,d_date_sk] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date,d_date_sk,d_year] [d_date,d_date_sk,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_item_sk,i_item_desc] + Project [i_item_desc,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_desc] [i_item_sk,i_item_desc] + Scan parquet default.item [i_item_desc,i_item_sk] [i_item_desc,i_item_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen @@ -53,23 +53,23 @@ CollectLimit Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #1 WholeStageCodegen - HashAggregate [max,max(csales)] [max(csales),tpcds_cmax,max] + HashAggregate [max,max(csales)] [max,max(csales),tpcds_cmax] InputAdapter Exchange #9 WholeStageCodegen HashAggregate [csales,max,max] [max,max] - HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),csales,sum] + HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [csales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter Exchange [c_customer_sk] #10 WholeStageCodegen - HashAggregate [sum,c_customer_sk,ss_sales_price,sum,ss_quantity] [sum,sum] - Project [ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price,sum,sum] [sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen @@ -80,15 +80,15 @@ CollectLimit BroadcastExchange #11 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),sum] + HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter Exchange [c_customer_sk] #7 WholeStageCodegen - HashAggregate [sum,sum,c_customer_sk,ss_sales_price,ss_quantity] [sum,sum] - Project [ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price,sum,sum] [sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] Project [ss_customer_sk,ss_quantity,ss_sales_price] Filter [ss_customer_sk] Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] [ss_customer_sk,ss_quantity,ss_sales_price] @@ -102,18 +102,18 @@ CollectLimit BroadcastExchange #12 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] WholeStageCodegen - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - BroadcastHashJoin [ws_item_sk,item_sk] - Project [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [item_sk,ws_item_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] InputAdapter ReusedExchange [item_sk] [item_sk] #2 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt index ead663422..7880be764 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt @@ -1,50 +1,50 @@ -TakeOrderedAndProject [c_last_name,c_first_name,sales] +TakeOrderedAndProject [c_first_name,c_last_name,sales] Union WholeStageCodegen - HashAggregate [c_last_name,c_first_name,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sales,sum] + HashAggregate [c_first_name,c_last_name,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] [sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter - Exchange [c_last_name,c_first_name] #1 + Exchange [c_first_name,c_last_name] #1 WholeStageCodegen - HashAggregate [cs_list_price,sum,sum,c_last_name,cs_quantity,c_first_name] [sum,sum] - Project [cs_quantity,cs_list_price,c_first_name,c_last_name] + HashAggregate [c_first_name,c_last_name,cs_list_price,cs_quantity,sum,sum] [sum,sum] + Project [c_first_name,c_last_name,cs_list_price,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,c_last_name,cs_sold_date_sk,c_first_name,cs_list_price] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] + Project [c_first_name,c_last_name,cs_list_price,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,cs_list_price,cs_quantity,cs_sold_date_sk] BroadcastHashJoin [cs_item_sk,item_sk] - Project [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] + Project [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] [cs_quantity,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk,cs_list_price] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [item_sk] Filter [count(1)] - HashAggregate [d_date,count,i_item_sk,count(1),substring(i_item_desc, 1, 30)] [count(1),item_sk,count(1),count] + HashAggregate [count,count(1),d_date,i_item_sk,substring(i_item_desc, 1, 30)] [count,count(1),count(1),item_sk] InputAdapter - Exchange [substring(i_item_desc, 1, 30),i_item_sk,d_date] #3 + Exchange [d_date,i_item_sk,substring(i_item_desc, 1, 30)] #3 WholeStageCodegen - HashAggregate [i_item_desc,d_date,count,count,i_item_sk,substring(i_item_desc, 1, 30)] [count,substring(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk] - Filter [ss_sold_date_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] [ss_sold_date_sk,ss_item_sk] + HashAggregate [count,count,d_date,i_item_desc,i_item_sk,substring(i_item_desc, 1, 30)] [count,count,substring(i_item_desc, 1, 30)] + Project [d_date,i_item_desc,i_item_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_date,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [d_date_sk,d_date] - Filter [d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date,d_year] [d_date_sk,d_date,d_year] + Project [d_date,d_date_sk] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date,d_date_sk,d_year] [d_date,d_date_sk,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_item_sk,i_item_desc] + Project [i_item_desc,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_desc] [i_item_sk,i_item_desc] + Scan parquet default.item [i_item_desc,i_item_sk] [i_item_desc,i_item_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen @@ -52,23 +52,23 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales] Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #1 WholeStageCodegen - HashAggregate [max,max(csales)] [max(csales),tpcds_cmax,max] + HashAggregate [max,max(csales)] [max,max(csales),tpcds_cmax] InputAdapter Exchange #9 WholeStageCodegen HashAggregate [csales,max,max] [max,max] - HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),csales,sum] + HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [csales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter Exchange [c_customer_sk] #10 WholeStageCodegen - HashAggregate [sum,sum,c_customer_sk,ss_sales_price,ss_quantity] [sum,sum] - Project [ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price,sum,sum] [sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen @@ -79,15 +79,15 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales] BroadcastExchange #11 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),sum] + HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter Exchange [c_customer_sk] #7 WholeStageCodegen - HashAggregate [sum,c_customer_sk,ss_sales_price,sum,ss_quantity] [sum,sum] - Project [ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price,sum,sum] [sum,sum] + Project [c_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] Project [ss_customer_sk,ss_quantity,ss_sales_price] Filter [ss_customer_sk] Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] [ss_customer_sk,ss_quantity,ss_sales_price] @@ -110,24 +110,24 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales] BroadcastExchange #13 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] WholeStageCodegen - HashAggregate [c_last_name,c_first_name,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sales,sum] + HashAggregate [c_first_name,c_last_name,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] [sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter - Exchange [c_last_name,c_first_name] #14 + Exchange [c_first_name,c_last_name] #14 WholeStageCodegen - HashAggregate [sum,c_last_name,ws_list_price,c_first_name,sum,ws_quantity] [sum,sum] - Project [ws_quantity,ws_list_price,c_first_name,c_last_name] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_quantity,c_last_name,ws_list_price,ws_sold_date_sk,c_first_name] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - BroadcastHashJoin [ws_item_sk,item_sk] - Project [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + HashAggregate [c_first_name,c_last_name,sum,sum,ws_list_price,ws_quantity] [sum,sum] + Project [c_first_name,c_last_name,ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_first_name,c_last_name,ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [item_sk,ws_item_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] [ws_quantity,ws_list_price,ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] InputAdapter ReusedExchange [item_sk] [item_sk] #2 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt index 08aae40a6..d8bda2e69 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt @@ -1,111 +1,111 @@ WholeStageCodegen - Project [c_last_name,c_first_name,s_store_name,paid] + Project [c_first_name,c_last_name,paid,s_store_name] Filter [sum(netpaid)] Subquery #1 WholeStageCodegen - HashAggregate [sum,count,avg(netpaid)] [avg(netpaid),(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),sum,count] + HashAggregate [avg(netpaid),count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] InputAdapter Exchange #8 WholeStageCodegen - HashAggregate [sum,count,count,netpaid,sum] [sum,count,sum,count] - HashAggregate [i_units,i_color,i_size,s_state,sum(UnscaledValue(ss_net_paid)),sum,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [count,count,netpaid,sum,sum] [count,count,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] InputAdapter - Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #9 + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #9 WholeStageCodegen - HashAggregate [i_units,i_color,i_size,ss_net_paid,s_state,sum,i_manager_id,c_last_name,sum,i_current_price,c_first_name,s_store_name,ca_state] [sum,sum] - Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] - BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] - Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [sr_item_sk,sr_ticket_number] - Filter [sr_ticket_number,sr_item_sk] + Filter [sr_item_sk,sr_ticket_number] Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [s_store_sk,s_store_name,s_state,s_zip] + Project [s_state,s_store_name,s_store_sk,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] InputAdapter BroadcastExchange #10 WholeStageCodegen - Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] Filter [i_item_sk] - Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] - Filter [c_customer_sk,c_birth_country] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] + Filter [c_birth_country,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [ca_state,ca_zip,ca_country] + Project [ca_country,ca_state,ca_zip] Filter [ca_zip] - Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] - HashAggregate [c_first_name,s_store_name,c_last_name,sum(netpaid),sum] [sum(netpaid),paid,sum(netpaid),sum] + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] + HashAggregate [c_first_name,c_last_name,s_store_name,sum,sum(netpaid)] [paid,sum,sum(netpaid),sum(netpaid)] InputAdapter - Exchange [c_last_name,c_first_name,s_store_name] #1 + Exchange [c_first_name,c_last_name,s_store_name] #1 WholeStageCodegen - HashAggregate [c_first_name,s_store_name,c_last_name,sum,netpaid,sum] [sum,sum] - HashAggregate [i_units,i_color,i_size,sum,s_state,sum(UnscaledValue(ss_net_paid)),i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [c_first_name,c_last_name,netpaid,s_store_name,sum,sum] [sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] InputAdapter - Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #2 + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 WholeStageCodegen - HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,sum,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] [sum,sum] - Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] - BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] - Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [sr_item_sk,sr_ticket_number] - Filter [sr_ticket_number,sr_item_sk] + Filter [sr_item_sk,sr_ticket_number] Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [s_store_sk,s_store_name,s_state,s_zip] + Project [s_state,s_store_name,s_store_sk,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] Filter [i_color,i_item_sk] - Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] - Filter [c_customer_sk,c_birth_country] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] + Filter [c_birth_country,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [ca_state,ca_zip,ca_country] + Project [ca_country,ca_state,ca_zip] Filter [ca_zip] - Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt index 87db312c3..d8bda2e69 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt @@ -1,111 +1,111 @@ WholeStageCodegen - Project [c_last_name,c_first_name,s_store_name,paid] + Project [c_first_name,c_last_name,paid,s_store_name] Filter [sum(netpaid)] Subquery #1 WholeStageCodegen - HashAggregate [sum,count,avg(netpaid)] [avg(netpaid),(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),sum,count] + HashAggregate [avg(netpaid),count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] InputAdapter Exchange #8 WholeStageCodegen - HashAggregate [count,sum,count,netpaid,sum] [sum,count,sum,count] - HashAggregate [i_units,sum,i_color,i_size,s_state,i_manager_id,c_last_name,sum(UnscaledValue(ss_net_paid)),i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [count,count,netpaid,sum,sum] [count,count,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] InputAdapter - Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #9 + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #9 WholeStageCodegen - HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,sum,s_store_name,ca_state] [sum,sum] - Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] - BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] - Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [sr_item_sk,sr_ticket_number] - Filter [sr_ticket_number,sr_item_sk] + Filter [sr_item_sk,sr_ticket_number] Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [s_store_sk,s_store_name,s_state,s_zip] + Project [s_state,s_store_name,s_store_sk,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] InputAdapter BroadcastExchange #10 WholeStageCodegen - Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] Filter [i_item_sk] - Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] - Filter [c_customer_sk,c_birth_country] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] + Filter [c_birth_country,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [ca_state,ca_zip,ca_country] + Project [ca_country,ca_state,ca_zip] Filter [ca_zip] - Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] - HashAggregate [c_first_name,s_store_name,c_last_name,sum(netpaid),sum] [sum(netpaid),paid,sum(netpaid),sum] + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] + HashAggregate [c_first_name,c_last_name,s_store_name,sum,sum(netpaid)] [paid,sum,sum(netpaid),sum(netpaid)] InputAdapter - Exchange [c_last_name,c_first_name,s_store_name] #1 + Exchange [c_first_name,c_last_name,s_store_name] #1 WholeStageCodegen - HashAggregate [c_first_name,s_store_name,c_last_name,netpaid,sum,sum] [sum,sum] - HashAggregate [i_units,sum,i_color,i_size,s_state,i_manager_id,c_last_name,sum(UnscaledValue(ss_net_paid)),i_current_price,c_first_name,s_store_name,ca_state] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + HashAggregate [c_first_name,c_last_name,netpaid,s_store_name,sum,sum] [sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] InputAdapter - Exchange [i_units,i_color,i_size,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,s_store_name,ca_state] #2 + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 WholeStageCodegen - HashAggregate [i_units,sum,i_color,i_size,ss_net_paid,s_state,i_manager_id,c_last_name,i_current_price,c_first_name,sum,s_store_name,ca_state] [sum,sum] - Project [s_store_name,ca_state,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_state,i_manager_id,i_color,c_first_name] - BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,c_last_name,s_zip,s_state,i_manager_id,i_color,c_first_name,c_birth_country] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [s_store_name,i_units,i_current_price,i_size,ss_net_paid,ss_customer_sk,s_zip,s_state,i_manager_id,i_color] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_name,ss_item_sk,ss_net_paid,ss_customer_sk,s_zip,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] - Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_net_paid,ss_customer_sk,ss_ticket_number] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [sr_item_sk,sr_ticket_number] - Filter [sr_ticket_number,sr_item_sk] + Filter [sr_item_sk,sr_ticket_number] Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [s_store_sk,s_store_name,s_state,s_zip] + Project [s_state,s_store_name,s_store_sk,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip,s_state,s_market_id] [s_store_name,s_store_sk,s_zip,s_state,s_market_id] + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] Filter [i_color,i_item_sk] - Scan parquet default.item [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] [i_units,i_current_price,i_size,i_item_sk,i_manager_id,i_color] + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name,c_birth_country] - Filter [c_customer_sk,c_birth_country] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] [c_customer_sk,c_first_name,c_last_name,c_birth_country] + Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] + Filter [c_birth_country,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [ca_state,ca_zip,ca_country] + Project [ca_country,ca_state,ca_zip] Filter [ca_zip] - Scan parquet default.customer_address [ca_state,ca_zip,ca_country] [ca_state,ca_zip,ca_country] + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt index 20dec06b1..df34b61ae 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt @@ -1,66 +1,66 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,store_sales_profit,catalog_sales_profit,s_store_id,store_returns_loss,s_store_name] +TakeOrderedAndProject [catalog_sales_profit,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_loss,store_sales_profit] WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum,sum,s_store_id,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit)),s_store_name,sum] [sum,store_sales_profit,sum,catalog_sales_profit,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),store_returns_loss,sum(UnscaledValue(ss_net_profit)),sum] + HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit))] [catalog_sales_profit,store_returns_loss,store_sales_profit,sum,sum,sum,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit))] InputAdapter - Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum,sum,s_store_id,cs_net_profit,sum,sum,sum,s_store_name,sum,sr_net_loss,ss_net_profit] [sum,sum,sum,sum,sum,sum] - Project [s_store_id,s_store_name,ss_net_profit,cs_net_profit,i_item_desc,sr_net_loss,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_id,s_store_name,ss_item_sk,ss_net_profit,cs_net_profit,sr_net_loss] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_net_profit,cs_net_profit,sr_net_loss] + HashAggregate [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [cs_net_profit,s_store_id,s_store_name,sr_net_loss,ss_item_sk,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [cs_net_profit,sr_net_loss,ss_item_sk,ss_net_profit,ss_store_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,sr_net_loss] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,sr_returned_date_sk,sr_net_loss] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,cs_sold_date_sk,ss_net_profit,cs_net_profit,ss_sold_date_sk,sr_returned_date_sk,sr_net_loss] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [ss_item_sk,ss_store_sk,sr_customer_sk,sr_item_sk,ss_net_profit,ss_sold_date_sk,sr_returned_date_sk,sr_net_loss] - BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] - Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_ticket_number] + Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + Project [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] - Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] [sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk,sr_net_loss] + Project [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] + Project [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [s_store_sk,s_store_id,s_store_name] + Project [s_store_id,s_store_name,s_store_sk] Filter [s_store_sk] - Scan parquet default.store [s_store_sk,s_store_id,s_store_name] [s_store_sk,s_store_id,s_store_name] + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [i_item_sk,i_item_id,i_item_desc] + Project [i_item_desc,i_item_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] [i_item_sk,i_item_id,i_item_desc] + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt index da9dc6cdf..bfb96b05c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt @@ -1,42 +1,42 @@ -TakeOrderedAndProject [i_item_id,agg3,agg1,agg2,agg4] +TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] WholeStageCodegen - HashAggregate [i_item_id,count,avg(UnscaledValue(cs_list_price)),sum,sum,count,sum,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_coupon_amt)),count,avg(cast(cs_quantity as bigint)),sum,count] [count,avg(UnscaledValue(cs_list_price)),sum,sum,count,sum,avg(UnscaledValue(cs_sales_price)),avg(UnscaledValue(cs_coupon_amt)),agg3,count,agg1,agg2,agg4,avg(cast(cs_quantity as bigint)),sum,count] + HashAggregate [avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_sales_price)),avg(cast(cs_quantity as bigint)),count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_sales_price)),avg(cast(cs_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] InputAdapter Exchange [i_item_id] #1 WholeStageCodegen - HashAggregate [cs_list_price,cs_coupon_amt,i_item_id,count,count,sum,sum,count,sum,sum,count,count,count,sum,sum,sum,count,cs_sales_price,cs_quantity,sum,count] [count,count,sum,sum,count,sum,sum,count,count,count,sum,sum,sum,count,sum,count] - Project [cs_coupon_amt,cs_quantity,cs_sales_price,cs_list_price,i_item_id] + HashAggregate [count,count,count,count,count,count,count,count,cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_list_price,i_item_id] + Project [cs_coupon_amt,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_item_sk,cs_list_price] + Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_promo_sk,cs_coupon_amt,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] - BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] - Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_item_sk,cs_promo_sk] - Scan parquet default.catalog_sales [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] [cs_promo_sk,cs_coupon_amt,cs_bill_cdemo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_item_sk,cs_list_price] + Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Project [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] + Filter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [cd_demo_sk] - Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [p_promo_sk] Filter [p_channel_email,p_channel_event,p_promo_sk] - Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event] [p_promo_sk,p_channel_email,p_channel_event] + Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] [p_channel_email,p_channel_event,p_promo_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt index 5eead43da..e3a2d7ca1 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt @@ -1,43 +1,43 @@ -TakeOrderedAndProject [agg4,i_item_id,s_state,agg3,g_state,agg1,agg2] +TakeOrderedAndProject [agg1,agg2,agg3,agg4,g_state,i_item_id,s_state] WholeStageCodegen - HashAggregate [i_item_id,sum,count,count,avg(UnscaledValue(ss_list_price)),sum,count,avg(cast(ss_quantity as bigint)),sum,s_state,count,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),sum,spark_grouping_id] [agg4,sum,count,count,avg(UnscaledValue(ss_list_price)),sum,count,avg(cast(ss_quantity as bigint)),sum,agg3,count,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),sum,g_state,agg1,agg2] + HashAggregate [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,i_item_id,s_state,spark_grouping_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,g_state,sum,sum,sum,sum] InputAdapter Exchange [i_item_id,s_state,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [i_item_id,sum,sum,count,sum,count,count,sum,sum,count,sum,ss_coupon_amt,count,ss_sales_price,sum,ss_list_price,count,s_state,count,count,ss_quantity,sum,spark_grouping_id] [sum,sum,count,sum,count,count,sum,sum,count,sum,count,sum,count,count,count,sum] - Expand [i_item_id,ss_coupon_amt,ss_sales_price,ss_list_price,ss_quantity,s_state] - Project [i_item_id,ss_coupon_amt,s_state,ss_sales_price,ss_list_price,ss_quantity] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,s_state,ss_sales_price] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] - Filter [ss_cdemo_sk,ss_sold_date_sk,ss_store_sk,ss_item_sk] - Scan parquet default.store_sales [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_store_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk] + HashAggregate [count,count,count,count,count,count,count,count,i_item_id,s_state,spark_grouping_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Expand [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + Project [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [cd_demo_sk] - Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [s_store_sk,s_state] + Project [s_state,s_store_sk] Filter [s_state,s_store_sk] - Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt index 2b1d35702..2b46940ec 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt @@ -5,91 +5,91 @@ CollectLimit BroadcastNestedLoopJoin BroadcastNestedLoopJoin WholeStageCodegen - HashAggregate [count(ss_list_price),sum,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count] [count(ss_list_price),sum,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),B1_CNTD,B1_LP,B1_CNT,count,count] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B1_CNT,B1_CNTD,B1_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [count(ss_list_price),count,sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),ss_list_price,count,count] [count(ss_list_price),count,sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,count] - HashAggregate [count(ss_list_price),sum,sum,count,count,avg(UnscaledValue(ss_list_price)),ss_list_price,count,count] [count(ss_list_price),sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count,count] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] InputAdapter Exchange [ss_list_price] #2 WholeStageCodegen - HashAggregate [count(ss_list_price),sum,count,avg(UnscaledValue(ss_list_price)),ss_list_price,count] [count(ss_list_price),sum,sum,count,count,avg(UnscaledValue(ss_list_price)),count,count] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] Project [ss_list_price] - Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] - Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] BroadcastExchange #3 WholeStageCodegen - HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,count(ss_list_price),sum] [count,B2_CNT,count,count(ss_list_price),B2_LP,avg(UnscaledValue(ss_list_price)),count,B2_CNTD,count(ss_list_price),sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B2_CNT,B2_CNTD,B2_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] InputAdapter Exchange #4 WholeStageCodegen - HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,ss_list_price,sum,count,count,count(ss_list_price),sum] [count,count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,sum,count,count,count(ss_list_price),sum] - HashAggregate [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,ss_list_price,sum,count,sum] [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,sum,count,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] InputAdapter Exchange [ss_list_price] #5 WholeStageCodegen - HashAggregate [count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),ss_list_price,sum,count] [count,count,count(ss_list_price),avg(UnscaledValue(ss_list_price)),count,sum,count,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] Project [ss_list_price] - Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] - Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] BroadcastExchange #6 WholeStageCodegen - HashAggregate [count(ss_list_price),count,count,count,count(ss_list_price),sum,avg(UnscaledValue(ss_list_price))] [count(ss_list_price),B3_CNTD,B3_CNT,count,B3_LP,count,count,count(ss_list_price),sum,avg(UnscaledValue(ss_list_price))] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B3_CNT,B3_CNTD,B3_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] InputAdapter Exchange #7 WholeStageCodegen - HashAggregate [count(ss_list_price),sum,count,count,ss_list_price,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] [count(ss_list_price),sum,count,count,count,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] - HashAggregate [sum,count,ss_list_price,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] [sum,count,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] InputAdapter Exchange [ss_list_price] #8 WholeStageCodegen - HashAggregate [sum,ss_list_price,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price))] [sum,count,count,count(ss_list_price),count,count,sum,avg(UnscaledValue(ss_list_price))] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] Project [ss_list_price] - Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] - Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] BroadcastExchange #9 WholeStageCodegen - HashAggregate [count,count,count(ss_list_price),count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum] [count,count,count(ss_list_price),count,avg(UnscaledValue(ss_list_price)),B4_LP,count(ss_list_price),B4_CNTD,sum,B4_CNT] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B4_CNT,B4_CNTD,B4_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] InputAdapter Exchange #10 WholeStageCodegen - HashAggregate [count,count,count(ss_list_price),count,count,count,avg(UnscaledValue(ss_list_price)),ss_list_price,count(ss_list_price),sum,sum] [count,count,count,count(ss_list_price),count,count,count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,sum] - HashAggregate [count,count,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price)),ss_list_price,sum,sum] [count,count,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price)),sum,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] InputAdapter Exchange [ss_list_price] #11 WholeStageCodegen - HashAggregate [count,count(ss_list_price),count,avg(UnscaledValue(ss_list_price)),ss_list_price,sum] [count,count,count(ss_list_price),count,count,avg(UnscaledValue(ss_list_price)),sum,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] Project [ss_list_price] - Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] - Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] BroadcastExchange #12 WholeStageCodegen - HashAggregate [count(ss_list_price),count,sum,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count] [count(ss_list_price),count,B5_LP,sum,avg(UnscaledValue(ss_list_price)),B5_CNT,count(ss_list_price),count,B5_CNTD,count] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B5_CNT,B5_CNTD,B5_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] InputAdapter Exchange #13 WholeStageCodegen - HashAggregate [count(ss_list_price),sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,ss_list_price,count,count,count] [count(ss_list_price),count,sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,count,count,count] - HashAggregate [sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,ss_list_price,count,count,count] [sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,count,count] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] InputAdapter Exchange [ss_list_price] #14 WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,ss_list_price,count] [sum,avg(UnscaledValue(ss_list_price)),sum,count(ss_list_price),count,count,count,count] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] Project [ss_list_price] - Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] - Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] BroadcastExchange #15 WholeStageCodegen - HashAggregate [count,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,count(ss_list_price),sum] [count,B6_CNT,B6_LP,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,B6_CNTD,count(ss_list_price),sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B6_CNT,B6_CNTD,B6_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] InputAdapter Exchange #16 WholeStageCodegen - HashAggregate [sum,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,ss_list_price,count(ss_list_price),count,count,count,sum] [count,sum,avg(UnscaledValue(ss_list_price)),count(ss_list_price),count,count,count(ss_list_price),count,count,count,sum] - HashAggregate [sum,avg(UnscaledValue(ss_list_price)),count,count,ss_list_price,count(ss_list_price),count,count,sum] [sum,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),count,count,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] InputAdapter Exchange [ss_list_price] #17 WholeStageCodegen - HashAggregate [sum,avg(UnscaledValue(ss_list_price)),ss_list_price,count(ss_list_price),count,count] [sum,avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),count,count,sum] + HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] Project [ss_list_price] - Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] - Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt index 64fa8afa9..f8c72710f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt @@ -1,66 +1,66 @@ -TakeOrderedAndProject [store_returns_quantity,i_item_id,i_item_desc,store_sales_quantity,s_store_id,catalog_sales_quantity,s_store_name] +TakeOrderedAndProject [catalog_sales_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_quantity,store_sales_quantity] WholeStageCodegen - HashAggregate [sum(cast(cs_quantity as bigint)),i_item_id,i_item_desc,sum,sum(cast(ss_quantity as bigint)),sum,sum(cast(sr_return_quantity as bigint)),s_store_id,sum,s_store_name] [sum(cast(cs_quantity as bigint)),store_returns_quantity,sum,sum(cast(ss_quantity as bigint)),store_sales_quantity,sum,sum(cast(sr_return_quantity as bigint)),sum,catalog_sales_quantity] + HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum,sum(cast(cs_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(ss_quantity as bigint))] [catalog_sales_quantity,store_returns_quantity,store_sales_quantity,sum,sum,sum,sum(cast(cs_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(ss_quantity as bigint))] InputAdapter - Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 WholeStageCodegen - HashAggregate [i_item_id,sum,i_item_desc,sum,sum,sum,s_store_id,sr_return_quantity,sum,sum,ss_quantity,cs_quantity,s_store_name] [sum,sum,sum,sum,sum,sum] - Project [s_store_id,s_store_name,ss_quantity,cs_quantity,sr_return_quantity,i_item_desc,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_id,s_store_name,ss_quantity,ss_item_sk,cs_quantity,sr_return_quantity] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity] + HashAggregate [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [cs_quantity,s_store_id,s_store_name,sr_return_quantity,ss_item_sk,ss_quantity] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [cs_quantity,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,sr_returned_date_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,cs_quantity,ss_store_sk,sr_return_quantity,cs_sold_date_sk,ss_sold_date_sk,sr_returned_date_sk] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [ss_quantity,ss_item_sk,ss_store_sk,sr_return_quantity,sr_customer_sk,sr_item_sk,ss_sold_date_sk,sr_returned_date_sk] - BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] - Project [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] - Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] [sr_return_quantity,sr_customer_sk,sr_ticket_number,sr_item_sk,sr_returned_date_sk] + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [s_store_sk,s_store_id,s_store_name] + Project [s_store_id,s_store_name,s_store_sk] Filter [s_store_sk] - Scan parquet default.store [s_store_sk,s_store_id,s_store_name] [s_store_sk,s_store_id,s_store_name] + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [i_item_sk,i_item_id,i_item_desc] + Project [i_item_desc,i_item_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] [i_item_sk,i_item_id,i_item_desc] + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt index a8432c52f..23253bbd2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt @@ -1,26 +1,26 @@ -TakeOrderedAndProject [d_year,sum_agg,brand_id,brand] +TakeOrderedAndProject [brand,brand_id,d_year,sum_agg] WholeStageCodegen - HashAggregate [d_year,i_brand,sum(UnscaledValue(ss_ext_sales_price)),i_brand_id,sum] [brand_id,sum(UnscaledValue(ss_ext_sales_price)),sum_agg,sum,brand] + HashAggregate [d_year,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,sum,sum(UnscaledValue(ss_ext_sales_price)),sum_agg] InputAdapter Exchange [d_year,i_brand,i_brand_id] #1 WholeStageCodegen - HashAggregate [sum,d_year,i_brand,ss_ext_sales_price,i_brand_id,sum] [sum,sum] - Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [d_year,ss_item_sk,ss_ext_sales_price] + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_year] - Filter [d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] - Filter [ss_sold_date_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_item_sk,i_brand_id,i_brand] - Filter [i_manufact_id,i_item_sk] - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id] [i_item_sk,i_brand_id,i_brand,i_manufact_id] + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manufact_id] + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manufact_id] [i_brand,i_brand_id,i_item_sk,i_manufact_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt index 1be056d32..a17fc2aea 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt @@ -1,29 +1,29 @@ -TakeOrderedAndProject [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth_month,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_birth_year,c_last_review_date] +TakeOrderedAndProject [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] WholeStageCodegen - Project [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth_month,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,c_birth_year,c_last_review_date] + Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_preferred_cust_flag,c_current_addr_sk,ctr_total_return,c_email_address,c_customer_id,c_birth_year,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] - BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] + BroadcastHashJoin [c_customer_sk,ctr_customer_sk] Project [ctr_customer_sk,ctr_total_return] - BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,ctr_state,ctr_total_return] Filter [ctr_total_return] - HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [ctr_total_return,ctr_state,sum(UnscaledValue(wr_return_amt)),sum,ctr_customer_sk] + HashAggregate [ca_state,sum,sum(UnscaledValue(wr_return_amt)),wr_returning_customer_sk] [ctr_customer_sk,ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] InputAdapter - Exchange [wr_returning_customer_sk,ca_state] #1 + Exchange [ca_state,wr_returning_customer_sk] #1 WholeStageCodegen - HashAggregate [wr_return_amt,sum,wr_returning_customer_sk,sum,ca_state] [sum,sum] - Project [wr_returning_customer_sk,wr_return_amt,ca_state] - BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] - Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] - BroadcastHashJoin [wr_returned_date_sk,d_date_sk] - Project [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + HashAggregate [ca_state,sum,sum,wr_return_amt,wr_returning_customer_sk] [sum,sum] + Project [ca_state,wr_return_amt,wr_returning_customer_sk] + BroadcastHashJoin [ca_address_sk,wr_returning_addr_sk] + Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Project [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] Filter [wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] - Scan parquet default.web_returns [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #3 @@ -35,23 +35,23 @@ TakeOrderedAndProject [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth BroadcastExchange #4 WholeStageCodegen Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [ctr_state,sum,count,avg(ctr_total_return)] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),sum,ctr_state,avg(ctr_total_return),count] + HashAggregate [avg(ctr_total_return),count,ctr_state,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_state,sum] InputAdapter Exchange [ctr_state] #5 WholeStageCodegen - HashAggregate [ctr_state,count,sum,ctr_total_return,count,sum] [sum,count,sum,count] - HashAggregate [wr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(wr_return_amt))] [sum(UnscaledValue(wr_return_amt)),ctr_state,ctr_total_return,sum] + HashAggregate [count,count,ctr_state,ctr_total_return,sum,sum] [count,count,sum,sum] + HashAggregate [ca_state,sum,sum(UnscaledValue(wr_return_amt)),wr_returning_customer_sk] [ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] InputAdapter - Exchange [wr_returning_customer_sk,ca_state] #6 + Exchange [ca_state,wr_returning_customer_sk] #6 WholeStageCodegen - HashAggregate [sum,wr_return_amt,sum,wr_returning_customer_sk,ca_state] [sum,sum] - Project [wr_returning_customer_sk,wr_return_amt,ca_state] - BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] - Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] - BroadcastHashJoin [wr_returned_date_sk,d_date_sk] - Project [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + HashAggregate [ca_state,sum,sum,wr_return_amt,wr_returning_customer_sk] [sum,sum] + Project [ca_state,wr_return_amt,wr_returning_customer_sk] + BroadcastHashJoin [ca_address_sk,wr_returning_addr_sk] + Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Project [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] Filter [wr_returned_date_sk,wr_returning_addr_sk] - Scan parquet default.web_returns [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #2 InputAdapter @@ -59,12 +59,12 @@ TakeOrderedAndProject [ctr_total_return,c_birth_day,c_login,c_salutation,c_birth InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] - Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] [c_preferred_cust_flag,c_current_addr_sk,c_email_address,c_customer_id,c_birth_year,c_customer_sk,c_login,c_birth_day,c_last_review_date,c_last_name,c_birth_month,c_first_name,c_birth_country,c_salutation] + Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] InputAdapter BroadcastExchange #8 WholeStageCodegen Project [ca_address_sk] - Filter [ca_state,ca_address_sk] + Filter [ca_address_sk,ca_state] Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt index 78c6383a4..bf41c1709 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt @@ -3,32 +3,32 @@ WholeStageCodegen InputAdapter Exchange [ca_county] #1 WholeStageCodegen - Project [web_sales,store_sales,store_sales,web_sales,d_year,store_sales,web_sales,ca_county] - BroadcastHashJoin [web_sales,ca_county,store_sales,ca_county,store_sales,web_sales] - Project [d_year,store_sales,store_sales,ca_county,web_sales,ca_county,store_sales,web_sales] - BroadcastHashJoin [ca_county,ca_county,store_sales,web_sales,store_sales,web_sales] + Project [ca_county,d_year,store_sales,store_sales,store_sales,web_sales,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,store_sales,store_sales,web_sales,web_sales] + Project [ca_county,ca_county,d_year,store_sales,store_sales,store_sales,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,store_sales,store_sales,web_sales,web_sales] BroadcastHashJoin [ca_county,ca_county] - Project [d_year,store_sales,store_sales,ca_county,store_sales] + Project [ca_county,d_year,store_sales,store_sales,store_sales] BroadcastHashJoin [ca_county,ca_county] BroadcastHashJoin [ca_county,ca_county] - HashAggregate [d_year,d_qoy,ca_county,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [ca_county,d_qoy,d_year] #2 WholeStageCodegen - HashAggregate [d_year,d_qoy,sum,ca_county,sum,ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] - Filter [ss_sold_date_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [d_date_sk,d_year,d_qoy] - Filter [d_qoy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + Project [d_date_sk,d_qoy,d_year] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -38,103 +38,103 @@ WholeStageCodegen InputAdapter BroadcastExchange #5 WholeStageCodegen - HashAggregate [d_qoy,d_year,sum(UnscaledValue(ss_ext_sales_price)),sum,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [ca_county,d_qoy,d_year] #6 WholeStageCodegen - HashAggregate [sum,d_qoy,d_year,sum,ss_ext_sales_price,ca_county] [sum,sum] - Project [ss_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] - Filter [ss_sold_date_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [d_date_sk,d_year,d_qoy] - Filter [d_qoy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + Project [d_date_sk,d_qoy,d_year] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] InputAdapter ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #8 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price)),d_year,d_qoy,ca_county] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [ca_county,d_qoy,d_year] #9 WholeStageCodegen - HashAggregate [sum,d_year,d_qoy,sum,ss_ext_sales_price,ca_county] [sum,sum] - Project [ss_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] - Filter [ss_sold_date_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #10 WholeStageCodegen - Project [d_date_sk,d_year,d_qoy] - Filter [d_qoy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + Project [d_date_sk,d_qoy,d_year] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] InputAdapter ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #11 WholeStageCodegen - HashAggregate [sum,ca_county,d_qoy,d_year,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] InputAdapter Exchange [ca_county,d_qoy,d_year] #12 WholeStageCodegen - HashAggregate [sum,ws_ext_sales_price,ca_county,d_qoy,d_year,sum] [sum,sum] - Project [ws_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] - Filter [ws_sold_date_sk,ws_bill_addr_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #3 + ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 InputAdapter ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #13 WholeStageCodegen - HashAggregate [d_qoy,ca_county,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] InputAdapter Exchange [ca_county,d_qoy,d_year] #14 WholeStageCodegen - HashAggregate [d_qoy,ca_county,d_year,ws_ext_sales_price,sum,sum] [sum,sum] - Project [ws_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] - Filter [ws_sold_date_sk,ws_bill_addr_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #7 + ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #7 InputAdapter ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #15 WholeStageCodegen - HashAggregate [sum,d_qoy,ca_county,d_year,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] InputAdapter Exchange [ca_county,d_qoy,d_year] #16 WholeStageCodegen - HashAggregate [sum,sum,d_qoy,ca_county,ws_ext_sales_price,d_year] [sum,sum] - Project [ws_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] - Filter [ws_sold_date_sk,ws_bill_addr_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #10 + ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #10 InputAdapter ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt index 1da0b46e1..aeba4db83 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt @@ -3,37 +3,37 @@ CollectLimit Project BroadcastHashJoin [cs_sold_date_sk,d_date_sk] Project [cs_sold_date_sk] - BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] - Project [cs_sold_date_sk,cs_ext_discount_amt,i_item_sk] + BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),cs_ext_discount_amt,cs_item_sk,i_item_sk] + Project [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] - Filter [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + Project [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + Filter [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #1 WholeStageCodegen Project [i_item_sk] - Filter [i_manufact_id,i_item_sk] + Filter [i_item_sk,i_manufact_id] Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #2 WholeStageCodegen Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [cs_item_sk,sum,count,avg(UnscaledValue(cs_ext_discount_amt))] [avg(UnscaledValue(cs_ext_discount_amt)),(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),sum,count,cs_item_sk] + HashAggregate [avg(UnscaledValue(cs_ext_discount_amt)),count,cs_item_sk,sum] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(cs_ext_discount_amt)),count,cs_item_sk,sum] InputAdapter Exchange [cs_item_sk] #3 WholeStageCodegen - HashAggregate [cs_ext_discount_amt,count,sum,cs_item_sk,sum,count] [sum,count,sum,count] - Project [cs_item_sk,cs_ext_discount_amt] + HashAggregate [count,count,cs_ext_discount_amt,cs_item_sk,sum,sum] [count,count,sum,sum] + Project [cs_ext_discount_amt,cs_item_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] - Filter [cs_sold_date_sk,cs_item_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + Project [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt index 234413bf8..2989a8b23 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt @@ -1,11 +1,11 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_manufact_id#2,total_sales#1]) +- *(20) HashAggregate(keys=[i_manufact_id#2], functions=[sum(total_sales#3)]) - +- Exchange hashpartitioning(i_manufact_id#2, 200) + +- Exchange hashpartitioning(i_manufact_id#2, 5) +- *(19) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(total_sales#3)]) +- Union :- *(6) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- Exchange hashpartitioning(i_manufact_id#2, 200) + : +- Exchange hashpartitioning(i_manufact_id#2, 5) : +- *(5) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) : +- *(5) Project [ss_ext_sales_price#4, i_manufact_id#2] : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight @@ -34,7 +34,7 @@ TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Electronics)) : +- *(3) FileScan parquet default.item[i_category#15,i_manufact_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)], ReadSchema: struct :- *(12) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- Exchange hashpartitioning(i_manufact_id#2, 200) + : +- Exchange hashpartitioning(i_manufact_id#2, 5) : +- *(11) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) : +- *(11) Project [cs_ext_sales_price#16, i_manufact_id#2] : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight @@ -49,7 +49,7 @@ TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(18) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) - +- Exchange hashpartitioning(i_manufact_id#2, 200) + +- Exchange hashpartitioning(i_manufact_id#2, 5) +- *(17) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) +- *(17) Project [ws_ext_sales_price#20, i_manufact_id#2] +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt index f3e63e8a8..28a14a1ea 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt @@ -1,38 +1,38 @@ -TakeOrderedAndProject [total_sales,i_manufact_id] +TakeOrderedAndProject [i_manufact_id,total_sales] WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(total_sales)] [sum(total_sales),total_sales,sum] + HashAggregate [i_manufact_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] InputAdapter Exchange [i_manufact_id] #1 WholeStageCodegen - HashAggregate [i_manufact_id,total_sales,sum,sum] [sum,sum] + HashAggregate [i_manufact_id,sum,sum,total_sales] [sum,sum] InputAdapter Union WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] InputAdapter Exchange [i_manufact_id] #2 WholeStageCodegen HashAggregate [i_manufact_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [ss_ext_sales_price,i_manufact_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] - Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + Project [i_manufact_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #5 @@ -48,20 +48,20 @@ TakeOrderedAndProject [total_sales,i_manufact_id] Filter [i_category] Scan parquet default.item [i_category,i_manufact_id] [i_category,i_manufact_id] WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] InputAdapter Exchange [i_manufact_id] #7 WholeStageCodegen - HashAggregate [i_manufact_id,cs_ext_sales_price,sum,sum] [sum,sum] + HashAggregate [cs_ext_sales_price,i_manufact_id,sum,sum] [sum,sum] Project [cs_ext_sales_price,i_manufact_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_ext_sales_price] - BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] - Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] - Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter @@ -69,20 +69,20 @@ TakeOrderedAndProject [total_sales,i_manufact_id] InputAdapter ReusedExchange [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] #5 WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] InputAdapter Exchange [i_manufact_id] #8 WholeStageCodegen - HashAggregate [i_manufact_id,ws_ext_sales_price,sum,sum] [sum,sum] - Project [ws_ext_sales_price,i_manufact_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] - Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + HashAggregate [i_manufact_id,sum,sum,ws_ext_sales_price] [sum,sum] + Project [i_manufact_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt index 70572bb58..89b0a6f50 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt @@ -1,46 +1,46 @@ WholeStageCodegen - Sort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] + Sort [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] InputAdapter - Exchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] #1 + Exchange [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] #1 WholeStageCodegen - Project [ss_ticket_number,c_salutation,cnt,c_last_name,c_preferred_cust_flag,c_first_name] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] Filter [cnt] - HashAggregate [ss_ticket_number,ss_customer_sk,count,count(1)] [count(1),cnt,count] + HashAggregate [count,count(1),ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] InputAdapter - Exchange [ss_ticket_number,ss_customer_sk] #2 + Exchange [ss_customer_sk,ss_ticket_number] #2 WholeStageCodegen - HashAggregate [ss_ticket_number,ss_customer_sk,count,count] [count,count] + HashAggregate [count,count,ss_customer_sk,ss_ticket_number] [count,count] Project [ss_customer_sk,ss_ticket_number] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_dom,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_dom] [d_date_sk,d_year,d_dom] + Filter [d_date_sk,d_dom,d_year] + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] [d_date_sk,d_dom,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [s_store_sk] Filter [s_county,s_store_sk] - Scan parquet default.store [s_store_sk,s_county] [s_store_sk,s_county] + Scan parquet default.store [s_county,s_store_sk] [s_county,s_store_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] - Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] + Project [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] Filter [c_customer_sk] - Scan parquet default.customer [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt index 5fd7a355f..e07194d69 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt @@ -1,44 +1,44 @@ -TakeOrderedAndProject [cnt3,max(cd_dep_employed_count),max(cd_dep_count),min(cd_dep_count),min(cd_dep_college_count),min(cd_dep_employed_count),max(cd_dep_college_count),ca_state,avg(cd_dep_count),avg(cd_dep_college_count),avg(cd_dep_employed_count),aggOrder,cd_dep_college_count,cnt2,cd_marital_status,cd_gender,cd_dep_employed_count,cnt1] +TakeOrderedAndProject [aggOrder,avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),ca_state,cd_dep_college_count,cd_dep_employed_count,cd_gender,cd_marital_status,cnt1,cnt2,cnt3,max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_employed_count),min(cd_dep_college_count),min(cd_dep_count),min(cd_dep_employed_count)] WholeStageCodegen - HashAggregate [avg(cast(cd_dep_employed_count as bigint)),min,min(cd_dep_college_count),sum,max(cd_dep_college_count),sum,sum,ca_state,count,max,count,min,avg(cast(cd_dep_college_count as bigint)),max(cd_dep_count),max,avg(cast(cd_dep_count as bigint)),min(cd_dep_employed_count),cd_dep_college_count,count(1),cd_dep_count,min(cd_dep_count),cd_marital_status,cd_gender,count,max(cd_dep_employed_count),count,max,cd_dep_employed_count,min] [cnt3,avg(cast(cd_dep_employed_count as bigint)),min,max(cd_dep_employed_count),min(cd_dep_college_count),sum,max(cd_dep_college_count),max(cd_dep_count),sum,min(cd_dep_count),sum,min(cd_dep_college_count),min(cd_dep_employed_count),max(cd_dep_college_count),count,avg(cd_dep_count),max,count,min,avg(cast(cd_dep_college_count as bigint)),max(cd_dep_count),avg(cd_dep_college_count),max,avg(cd_dep_employed_count),avg(cast(cd_dep_count as bigint)),min(cd_dep_employed_count),aggOrder,count(1),cnt2,min(cd_dep_count),count,max(cd_dep_employed_count),count,max,min,cnt1] + HashAggregate [avg(cast(cd_dep_college_count as bigint)),avg(cast(cd_dep_count as bigint)),avg(cast(cd_dep_employed_count as bigint)),ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,count,count,count,count,count(1),max,max,max,max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_employed_count),min,min,min,min(cd_dep_college_count),min(cd_dep_count),min(cd_dep_employed_count),sum,sum,sum] [aggOrder,avg(cast(cd_dep_college_count as bigint)),avg(cast(cd_dep_count as bigint)),avg(cast(cd_dep_employed_count as bigint)),avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),cnt1,cnt2,cnt3,count,count,count,count,count(1),max,max,max,max(cd_dep_college_count),max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_count),max(cd_dep_employed_count),max(cd_dep_employed_count),min,min,min,min(cd_dep_college_count),min(cd_dep_college_count),min(cd_dep_count),min(cd_dep_count),min(cd_dep_employed_count),min(cd_dep_employed_count),sum,sum,sum] InputAdapter - Exchange [ca_state,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,cd_dep_employed_count] #1 + Exchange [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] #1 WholeStageCodegen - HashAggregate [sum,min,sum,sum,min,sum,sum,sum,max,ca_state,max,count,count,max,count,count,min,count,min,max,max,cd_dep_college_count,cd_dep_count,cd_marital_status,cd_gender,count,count,min,count,max,cd_dep_employed_count,min] [sum,min,sum,sum,min,sum,sum,sum,max,max,count,count,max,count,count,min,count,min,max,max,count,count,min,count,max,min] - Project [cd_dep_college_count,ca_state,cd_dep_employed_count,cd_marital_status,cd_dep_count,cd_gender] + HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,count,count,count,count,count,count,count,count,max,max,max,max,max,max,min,min,min,min,min,min,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,max,max,max,max,max,max,min,min,min,min,min,min,sum,sum,sum,sum,sum,sum] + Project [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk,ca_state] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_cdemo_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] Filter [exists,exists] BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] Filter [c_current_addr_sk,c_current_cdemo_sk] - Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_sold_date_sk] Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] [ss_sold_date_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_qoy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [ws_bill_customer_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_sold_date_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter @@ -46,9 +46,9 @@ TakeOrderedAndProject [cnt3,max(cd_dep_employed_count),max(cd_dep_count),min(cd_ WholeStageCodegen Project [cs_ship_customer_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ship_customer_sk] + Project [cs_ship_customer_sk,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_customer_sk] [cs_sold_date_sk,cs_ship_customer_sk] + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] [cs_ship_customer_sk,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter @@ -60,6 +60,6 @@ TakeOrderedAndProject [cnt3,max(cd_dep_employed_count),max(cd_dep_count),min(cd_ InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_gender] + Project [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_gender] [cd_dep_college_count,cd_dep_employed_count,cd_demo_sk,cd_marital_status,cd_dep_count,cd_gender] + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt index ac9f32c0a..8d87de2ff 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt @@ -1,43 +1,43 @@ -TakeOrderedAndProject [i_class,i_category,lochierarchy,gross_margin,rank_within_parent] +TakeOrderedAndProject [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] WholeStageCodegen - Project [i_class,i_category,lochierarchy,gross_margin,rank_within_parent] + Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] InputAdapter - Window [_w3,_w1,_w2] + Window [_w1,_w2,_w3] WholeStageCodegen Sort [_w1,_w2,_w3] InputAdapter Exchange [_w1,_w2] #1 WholeStageCodegen - HashAggregate [sum,i_class,i_category,sum,sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),spark_grouping_id] [_w2,sum,_w1,sum,lochierarchy,sum(UnscaledValue(ss_net_profit)),_w3,sum(UnscaledValue(ss_ext_sales_price)),gross_margin] + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] [_w1,_w2,_w3,gross_margin,lochierarchy,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] InputAdapter Exchange [i_category,i_class,spark_grouping_id] #2 WholeStageCodegen - HashAggregate [sum,sum,sum,i_class,i_category,sum,ss_ext_sales_price,spark_grouping_id,ss_net_profit] [sum,sum,sum,sum] - Expand [ss_ext_sales_price,ss_net_profit,i_category,i_class] - Project [ss_ext_sales_price,ss_net_profit,i_category,i_class] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_class,ss_store_sk,ss_ext_sales_price,i_category,ss_net_profit] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] - Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk] + HashAggregate [i_category,i_class,spark_grouping_id,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] + Expand [i_category,i_class,ss_ext_sales_price,ss_net_profit] + Project [i_category,i_class,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [i_category,i_class,ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_item_sk,i_class,i_category] + Project [i_category,i_class,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_class,i_category] [i_item_sk,i_class,i_category] + Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [s_store_sk] Filter [s_state,s_store_sk] - Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt index afe830f9d..23745be9e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt @@ -1,31 +1,31 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] +TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,i_current_price] + HashAggregate [i_current_price,i_item_desc,i_item_id] InputAdapter - Exchange [i_item_id,i_item_desc,i_current_price] #1 + Exchange [i_current_price,i_item_desc,i_item_id] #1 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,i_current_price] - Project [i_item_id,i_item_desc,i_current_price] - BroadcastHashJoin [i_item_sk,cs_item_sk] - Project [i_item_sk,i_item_id,i_item_desc,i_current_price] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [i_current_price,i_item_sk,inv_date_sk,i_item_desc,i_item_id] + HashAggregate [i_current_price,i_item_desc,i_item_id] + Project [i_current_price,i_item_desc,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [i_item_sk,i_item_id,i_item_desc,i_current_price] - Filter [i_current_price,i_manufact_id,i_item_sk] - Scan parquet default.item [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk,i_manufact_id] + Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [inv_date_sk,inv_item_sk] - Filter [inv_quantity_on_hand,inv_item_sk,inv_date_sk] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt index caec9c677..c7e30522f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt @@ -1,34 +1,34 @@ CollectLimit WholeStageCodegen - HashAggregate [count,count(1)] [count(1),count(1),count] + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #1 WholeStageCodegen HashAggregate [count,count] [count,count] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,d_date,c_first_name] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_first_name,d_date,d_date,c_last_name,c_last_name,c_first_name] - HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + HashAggregate [c_first_name,c_last_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #2 + Exchange [c_first_name,c_last_name,d_date] #2 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_customer_sk] - Filter [ss_sold_date_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] [ss_sold_date_sk,ss_customer_sk] + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_date,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] [d_date_sk,d_date,d_month_seq] + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -38,38 +38,38 @@ CollectLimit InputAdapter BroadcastExchange #5 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #6 + Exchange [c_first_name,c_last_name,d_date] #6 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] Project [cs_bill_customer_sk,d_date] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_bill_customer_sk] - Filter [cs_sold_date_sk,cs_bill_customer_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] [cs_sold_date_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 InputAdapter ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 InputAdapter BroadcastExchange #7 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #8 + Exchange [c_first_name,c_last_name,d_date] #8 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_bill_customer_sk,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_bill_customer_sk] - Filter [ws_sold_date_sk,ws_bill_customer_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_bill_customer_sk] + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [d_date,ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 InputAdapter ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt index 0fadf4ea4..d80f4a4ee 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt @@ -1,25 +1,25 @@ WholeStageCodegen - Sort [d_moy,mean,i_item_sk,w_warehouse_sk,d_moy,mean,cov,cov] + Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] InputAdapter - Exchange [d_moy,mean,i_item_sk,w_warehouse_sk,d_moy,mean,cov,cov] #1 + Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 WholeStageCodegen - BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] - Project [d_moy,mean,w_warehouse_sk,i_item_sk,stdev] + BroadcastHashJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] Filter [mean,stdev] - HashAggregate [m2,w_warehouse_sk,d_moy,sum,count,n,w_warehouse_name,avg,stddev_samp(cast(inv_quantity_on_hand as double)),i_item_sk,avg(cast(inv_quantity_on_hand as bigint))] [stdev,mean,m2,sum,count,n,avg,stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint))] + HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] InputAdapter - Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #2 WholeStageCodegen - HashAggregate [m2,w_warehouse_sk,d_moy,m2,sum,count,n,w_warehouse_name,sum,avg,i_item_sk,inv_quantity_on_hand,avg,count,n] [m2,m2,sum,count,n,sum,avg,avg,count,n] - Project [d_moy,inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name,inv_date_sk] + HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] - BroadcastHashJoin [inv_item_sk,i_item_sk] - Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] - Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -29,41 +29,41 @@ WholeStageCodegen InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [w_warehouse_sk,w_warehouse_name] + Project [w_warehouse_name,w_warehouse_sk] Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [i_item_sk,d_moy,mean,stdev,w_warehouse_sk] + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] Filter [mean,stdev] - HashAggregate [m2,i_item_sk,sum,count,w_warehouse_name,n,d_moy,avg,stddev_samp(cast(inv_quantity_on_hand as double)),w_warehouse_sk,avg(cast(inv_quantity_on_hand as bigint))] [m2,sum,count,n,mean,avg,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,avg(cast(inv_quantity_on_hand as bigint))] + HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] InputAdapter - Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #7 WholeStageCodegen - HashAggregate [m2,m2,i_item_sk,sum,count,w_warehouse_name,n,d_moy,sum,avg,w_warehouse_sk,inv_quantity_on_hand,avg,count,n] [m2,m2,sum,count,n,sum,avg,avg,count,n] - Project [w_warehouse_sk,inv_quantity_on_hand,w_warehouse_name,i_item_sk,d_moy] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [w_warehouse_sk,inv_quantity_on_hand,w_warehouse_name,i_item_sk,inv_date_sk] + HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] - BroadcastHashJoin [inv_item_sk,i_item_sk] - Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] - Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter ReusedExchange [i_item_sk] [i_item_sk] #3 InputAdapter - ReusedExchange [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] #4 + ReusedExchange [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] #4 InputAdapter BroadcastExchange #8 WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt index 846b7cca1..d80f4a4ee 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt @@ -1,25 +1,25 @@ WholeStageCodegen - Sort [i_item_sk,mean,w_warehouse_sk,d_moy,mean,cov,d_moy,cov] + Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] InputAdapter - Exchange [i_item_sk,mean,w_warehouse_sk,d_moy,mean,cov,d_moy,cov] #1 + Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 WholeStageCodegen - BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] - Project [d_moy,w_warehouse_sk,i_item_sk,stdev,mean] + BroadcastHashJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] Filter [mean,stdev] - HashAggregate [avg,w_warehouse_sk,d_moy,w_warehouse_name,m2,avg(cast(inv_quantity_on_hand as bigint)),i_item_sk,n,count,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg,m2,stdev,avg(cast(inv_quantity_on_hand as bigint)),n,count,mean,sum,stddev_samp(cast(inv_quantity_on_hand as double))] + HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] InputAdapter - Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #2 WholeStageCodegen - HashAggregate [avg,n,w_warehouse_sk,d_moy,count,w_warehouse_name,m2,sum,i_item_sk,inv_quantity_on_hand,m2,n,avg,count,sum] [avg,n,count,m2,sum,m2,n,avg,count,sum] - Project [d_moy,inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_quantity_on_hand,w_warehouse_sk,i_item_sk,w_warehouse_name,inv_date_sk] + HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] - BroadcastHashJoin [inv_item_sk,i_item_sk] - Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] - Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -29,41 +29,41 @@ WholeStageCodegen InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [w_warehouse_sk,w_warehouse_name] + Project [w_warehouse_name,w_warehouse_sk] Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [mean,stdev,d_moy,w_warehouse_sk,i_item_sk] + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] Filter [mean,stdev] - HashAggregate [avg,m2,avg(cast(inv_quantity_on_hand as bigint)),d_moy,w_warehouse_sk,n,i_item_sk,count,w_warehouse_name,sum,stddev_samp(cast(inv_quantity_on_hand as double))] [avg,stdev,m2,avg(cast(inv_quantity_on_hand as bigint)),n,count,sum,mean,stddev_samp(cast(inv_quantity_on_hand as double))] + HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] InputAdapter - Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #7 WholeStageCodegen - HashAggregate [avg,n,count,m2,sum,d_moy,w_warehouse_sk,inv_quantity_on_hand,m2,n,i_item_sk,avg,count,w_warehouse_name,sum] [avg,n,count,m2,sum,m2,n,avg,count,sum] - Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,d_moy,w_warehouse_name] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,inv_date_sk,w_warehouse_name] + HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] - BroadcastHashJoin [inv_item_sk,i_item_sk] - Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] - Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter ReusedExchange [i_item_sk] [i_item_sk] #3 InputAdapter - ReusedExchange [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] #4 + ReusedExchange [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] #4 InputAdapter BroadcastExchange #8 WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt index b055f5dbc..af3c3b475 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt @@ -1,41 +1,41 @@ -TakeOrderedAndProject [customer_email_address,customer_login,customer_first_name,customer_last_name,customer_birth_country,customer_id,customer_preferred_cust_flag] +TakeOrderedAndProject [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag] WholeStageCodegen - Project [customer_email_address,customer_login,customer_first_name,customer_last_name,customer_birth_country,customer_id,customer_preferred_cust_flag] - BroadcastHashJoin [year_total,year_total,customer_id,year_total,year_total,customer_id] - Project [year_total,customer_birth_country,customer_preferred_cust_flag,year_total,customer_email_address,customer_first_name,year_total,customer_login,customer_id,customer_id,customer_last_name] + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] - Project [year_total,customer_birth_country,customer_preferred_cust_flag,customer_email_address,customer_first_name,year_total,customer_login,customer_id,customer_id,customer_last_name] - BroadcastHashJoin [year_total,year_total,year_total,year_total,customer_id,customer_id] - Project [customer_birth_country,customer_preferred_cust_flag,customer_email_address,customer_first_name,year_total,customer_login,customer_id,customer_id,customer_last_name,year_total,year_total] + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] BroadcastHashJoin [customer_id,customer_id] InputAdapter Union WholeStageCodegen Filter [year_total] - HashAggregate [d_year,c_login,c_birth_country,c_customer_id,c_last_name,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_preferred_cust_flag,sum,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_id,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #1 + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #1 WholeStageCodegen - HashAggregate [ss_ext_wholesale_cost,d_year,ss_ext_discount_amt,c_login,sum,c_birth_country,c_customer_id,c_last_name,ss_ext_sales_price,c_preferred_cust_flag,sum,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,ss_ext_sales_price,c_last_name,c_first_name,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,ss_ext_sales_price,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,sum,sum] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] + Project [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] + Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk,d_year] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] LocalTableScan [customer_id,year_total] [customer_id,year_total] LocalTableScan [customer_id,year_total] [customer_id,year_total] @@ -43,52 +43,52 @@ TakeOrderedAndProject [customer_email_address,customer_login,customer_first_name BroadcastExchange #4 Union WholeStageCodegen - HashAggregate [d_year,c_login,c_birth_country,sum,c_customer_id,c_last_name,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_preferred_cust_flag,c_first_name,c_email_address] [customer_birth_country,customer_preferred_cust_flag,customer_email_address,customer_first_name,sum,customer_login,customer_id,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_last_name,year_total] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #5 + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #5 WholeStageCodegen - HashAggregate [ss_ext_wholesale_cost,d_year,ss_ext_discount_amt,c_login,sum,c_birth_country,sum,c_customer_id,c_last_name,ss_ext_sales_price,c_preferred_cust_flag,ss_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,d_year,c_login,ss_ext_sales_price,c_last_name,c_first_name,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ss_ext_discount_amt,c_login,ss_ext_sales_price,c_last_name,c_first_name,ss_sold_date_sk,c_birth_country,ss_ext_list_price,ss_ext_wholesale_cost] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,sum,sum] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] InputAdapter - ReusedExchange [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] [ss_ext_discount_amt,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ext_list_price,ss_ext_wholesale_cost] #2 + ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] #2 InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_date_sk,d_year] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [customer_id,year_total,customer_preferred_cust_flag,customer_last_name,customer_email_address,customer_birth_country,customer_login,customer_first_name] [customer_id,year_total,customer_preferred_cust_flag,customer_last_name,customer_email_address,customer_birth_country,customer_login,customer_first_name] - LocalTableScan [customer_first_name,customer_birth_country,customer_preferred_cust_flag,customer_id,customer_email_address,customer_last_name,customer_login,year_total] [customer_first_name,customer_birth_country,customer_preferred_cust_flag,customer_id,customer_email_address,customer_last_name,customer_login,year_total] + LocalTableScan [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total] [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total] + LocalTableScan [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total] [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total] InputAdapter BroadcastExchange #7 Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [d_year,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_login,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_id,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #8 + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #8 WholeStageCodegen - HashAggregate [d_year,sum,c_login,sum,cs_ext_discount_amt,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,cs_ext_wholesale_cost,c_first_name,cs_ext_sales_price,c_email_address,cs_ext_list_price] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,cs_ext_discount_amt,c_login,c_last_name,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year,sum,sum] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,cs_ext_discount_amt,c_login,c_last_name,cs_sold_date_sk,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + Project [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 LocalTableScan [customer_id,year_total] [customer_id,year_total] @@ -97,20 +97,20 @@ TakeOrderedAndProject [customer_email_address,customer_login,customer_first_name Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_id,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #11 + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #11 WholeStageCodegen - HashAggregate [d_year,sum,c_login,cs_ext_discount_amt,sum,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,cs_ext_wholesale_cost,c_first_name,cs_ext_sales_price,c_email_address,cs_ext_list_price] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,cs_ext_discount_amt,c_login,c_last_name,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year,sum,sum] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,cs_ext_discount_amt,c_login,c_last_name,cs_sold_date_sk,c_first_name,c_birth_country,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] InputAdapter - ReusedExchange [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] [cs_ext_discount_amt,cs_bill_customer_sk,cs_sold_date_sk,cs_ext_wholesale_cost,cs_ext_list_price,cs_ext_sales_price] #9 + ReusedExchange [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] #9 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 LocalTableScan [customer_id,year_total] [customer_id,year_total] @@ -121,24 +121,24 @@ TakeOrderedAndProject [customer_email_address,customer_login,customer_first_name LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [d_year,c_login,c_birth_country,c_customer_id,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),sum,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_id,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #13 + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #13 WholeStageCodegen - HashAggregate [d_year,ws_ext_wholesale_cost,ws_ext_discount_amt,c_login,sum,ws_ext_sales_price,c_birth_country,c_customer_id,sum,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] InputAdapter BroadcastExchange #14 WholeStageCodegen - Project [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] + Project [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 InputAdapter @@ -147,19 +147,19 @@ TakeOrderedAndProject [customer_email_address,customer_login,customer_first_name LocalTableScan [customer_id,year_total] [customer_id,year_total] LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [d_year,c_login,c_birth_country,c_customer_id,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),c_last_name,c_preferred_cust_flag,c_first_name,c_email_address,sum] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_id,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] InputAdapter - Exchange [d_year,c_login,c_birth_country,c_customer_id,c_last_name,c_preferred_cust_flag,c_first_name,c_email_address] #16 + Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #16 WholeStageCodegen - HashAggregate [d_year,ws_ext_wholesale_cost,ws_ext_discount_amt,c_login,ws_ext_sales_price,c_birth_country,c_customer_id,sum,c_last_name,c_preferred_cust_flag,ws_ext_list_price,c_first_name,c_email_address,sum] [sum,sum] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,d_year,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,ws_ext_sales_price,ws_ext_discount_amt,c_login,c_last_name,ws_sold_date_sk,ws_ext_list_price,c_first_name,c_birth_country,ws_ext_wholesale_cost] + HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] [sum,sum] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] [c_preferred_cust_flag,c_email_address,c_customer_id,c_customer_sk,c_login,c_last_name,c_first_name,c_birth_country] + Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] InputAdapter - ReusedExchange [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] [ws_ext_sales_price,ws_ext_discount_amt,ws_sold_date_sk,ws_ext_list_price,ws_bill_customer_sk,ws_ext_wholesale_cost] #14 + ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] #14 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt index 2e480b9ea..fcc311d0d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt @@ -1,42 +1,42 @@ -TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] +TakeOrderedAndProject [i_item_id,sales_after,sales_before,w_state] WholeStageCodegen - HashAggregate [i_item_id,sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),w_state,sum] [sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sales_before,sales_after,sum] + HashAggregate [i_item_id,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),w_state] [sales_after,sales_before,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] InputAdapter - Exchange [w_state,i_item_id] #1 + Exchange [i_item_id,w_state] #1 WholeStageCodegen - HashAggregate [i_item_id,sum,sum,d_date,w_state,sum,cs_sales_price,cr_refunded_cash,sum] [sum,sum,sum,sum] - Project [d_date,cs_sales_price,w_state,cr_refunded_cash,i_item_id] + HashAggregate [cr_refunded_cash,cs_sales_price,d_date,i_item_id,sum,sum,sum,sum,w_state] [sum,sum,sum,sum] + Project [cr_refunded_cash,cs_sales_price,d_date,i_item_id,w_state] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sales_price,w_state,cs_sold_date_sk,cr_refunded_cash,i_item_id] + Project [cr_refunded_cash,cs_sales_price,cs_sold_date_sk,i_item_id,w_state] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_sales_price,w_state,cs_sold_date_sk,cs_item_sk,cr_refunded_cash] + Project [cr_refunded_cash,cs_item_sk,cs_sales_price,cs_sold_date_sk,w_state] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk,cr_refunded_cash] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] - Filter [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] [cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + Project [cr_refunded_cash,cs_item_sk,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] + Filter [cs_item_sk,cs_sold_date_sk,cs_warehouse_sk] + Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [cr_item_sk,cr_order_number,cr_refunded_cash] - Filter [cr_order_number,cr_item_sk] + Filter [cr_item_sk,cr_order_number] Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash] [cr_item_sk,cr_order_number,cr_refunded_cash] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [w_warehouse_sk,w_state] + Project [w_state,w_warehouse_sk] Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_sk,w_state] [w_warehouse_sk,w_state] + Scan parquet default.warehouse [w_state,w_warehouse_sk] [w_state,w_warehouse_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id,i_current_price] [i_item_sk,i_item_id,i_current_price] + Scan parquet default.item [i_current_price,i_item_id,i_item_sk] [i_current_price,i_item_id,i_item_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [d_date_sk,d_date] + Project [d_date,d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt index caac67bf3..7196f719a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt @@ -8,18 +8,18 @@ TakeOrderedAndProject [i_product_name] Project [i_product_name] BroadcastHashJoin [i_manufact,i_manufact] Project [i_manufact,i_product_name] - Filter [i_manufact_id,i_manufact] - Scan parquet default.item [i_manufact_id,i_manufact,i_product_name] [i_manufact_id,i_manufact,i_product_name] + Filter [i_manufact,i_manufact_id] + Scan parquet default.item [i_manufact,i_manufact_id,i_product_name] [i_manufact,i_manufact_id,i_product_name] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [i_manufact] Filter [alwaysTrue,item_cnt] - HashAggregate [i_manufact,count,count(1)] [i_manufact,alwaysTrue,item_cnt,count,count(1)] + HashAggregate [count,count(1),i_manufact] [alwaysTrue,count,count(1),i_manufact,item_cnt] InputAdapter Exchange [i_manufact] #3 WholeStageCodegen - HashAggregate [i_manufact,count,count] [count,count] + HashAggregate [count,count,i_manufact] [count,count] Project [i_manufact] - Filter [i_units,i_manufact,i_color,i_size,i_category] - Scan parquet default.item [i_manufact,i_units,i_size,i_category,i_color] [i_manufact,i_units,i_size,i_category,i_color] + Filter [i_category,i_color,i_manufact,i_size,i_units] + Scan parquet default.item [i_category,i_color,i_manufact,i_size,i_units] [i_category,i_color,i_manufact,i_size,i_units] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt index abcbca371..67de156ac 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt @@ -1,26 +1,26 @@ -TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category] +TakeOrderedAndProject [d_year,i_category,i_category_id,sum(ss_ext_sales_price)] WholeStageCodegen - HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),sum,d_year,i_category,i_category_id] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] + HashAggregate [d_year,i_category,i_category_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price)] InputAdapter - Exchange [d_year,i_category_id,i_category] #1 + Exchange [d_year,i_category,i_category_id] #1 WholeStageCodegen - HashAggregate [sum,d_year,sum,i_category,i_category_id,ss_ext_sales_price] [sum,sum] - Project [d_year,ss_ext_sales_price,i_category_id,i_category] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [d_year,ss_item_sk,ss_ext_sales_price] + HashAggregate [d_year,i_category,i_category_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [d_year,i_category,i_category_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_year] - Filter [d_moy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] - Filter [ss_sold_date_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_item_sk,i_category_id,i_category] - Filter [i_manager_id,i_item_sk] - Scan parquet default.item [i_item_sk,i_category_id,i_category,i_manager_id] [i_item_sk,i_category_id,i_category,i_manager_id] + Project [i_category,i_category_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + Scan parquet default.item [i_category,i_category_id,i_item_sk,i_manager_id] [i_category,i_category_id,i_item_sk,i_manager_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt index a5e60a60c..136a277e1 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt @@ -1,26 +1,26 @@ -TakeOrderedAndProject [sat_sales,fri_sales,sun_sales,s_store_id,tue_sales,mon_sales,s_store_name,thu_sales,wed_sales] +TakeOrderedAndProject [fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum,s_store_id,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),s_store_name,sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END))] [sum,sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sat_sales,fri_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sun_sales,sum,sum,sum,sum,tue_sales,mon_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),thu_sales,wed_sales] + HashAggregate [s_store_id,s_store_name,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] InputAdapter - Exchange [s_store_name,s_store_id] #1 + Exchange [s_store_id,s_store_name] #1 WholeStageCodegen - HashAggregate [sum,sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,s_store_id,ss_sales_price,sum,sum,sum,sum,sum,s_store_name] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_day_name,ss_sales_price,s_store_id,s_store_name] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [d_day_name,ss_store_sk,ss_sales_price] + HashAggregate [d_day_name,s_store_id,s_store_name,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,s_store_id,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_day_name,ss_sales_price,ss_store_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_day_name] - Filter [d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_day_name] [d_date_sk,d_year,d_day_name] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_day_name,d_year] [d_date_sk,d_day_name,d_year] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_sold_date_sk,ss_store_sk,ss_sales_price] + Project [ss_sales_price,ss_sold_date_sk,ss_store_sk] Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_store_sk,ss_sales_price] + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [s_store_sk,s_store_id,s_store_name] + Project [s_store_id,s_store_name,s_store_sk] Filter [s_gmt_offset,s_store_sk] - Scan parquet default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset] [s_store_sk,s_store_id,s_store_name,s_gmt_offset] + Scan parquet default.store [s_gmt_offset,s_store_id,s_store_name,s_store_sk] [s_gmt_offset,s_store_id,s_store_name,s_store_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt index 55aab5cc2..139fd1f7b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt @@ -1,13 +1,13 @@ -TakeOrderedAndProject [rnk,best_performing,worst_performing] +TakeOrderedAndProject [best_performing,rnk,worst_performing] WholeStageCodegen - Project [rnk,i_product_name,i_product_name] - BroadcastHashJoin [item_sk,i_item_sk] - Project [rnk,item_sk,i_product_name] - BroadcastHashJoin [item_sk,i_item_sk] - Project [item_sk,rnk,item_sk] + Project [i_product_name,i_product_name,rnk] + BroadcastHashJoin [i_item_sk,item_sk] + Project [i_product_name,item_sk,rnk] + BroadcastHashJoin [i_item_sk,item_sk] + Project [item_sk,item_sk,rnk] BroadcastHashJoin [rnk,rnk] Project [item_sk,rnk] - Filter [rnk,item_sk] + Filter [item_sk,rnk] InputAdapter Window [rank_col] WholeStageCodegen @@ -19,27 +19,27 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing] Filter [avg(ss_net_profit)] Subquery #1 WholeStageCodegen - HashAggregate [ss_store_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,count] + HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_store_sk,sum] [avg(UnscaledValue(ss_net_profit)),count,rank_col,sum] InputAdapter Exchange [ss_store_sk] #3 WholeStageCodegen - HashAggregate [count,ss_store_sk,sum,sum,count,ss_net_profit] [sum,count,sum,count] - Project [ss_store_sk,ss_net_profit] - Filter [ss_store_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] [ss_addr_sk,ss_store_sk,ss_net_profit] - HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,avg(ss_net_profit),item_sk,count] + HashAggregate [count,count,ss_net_profit,ss_store_sk,sum,sum] [count,count,sum,sum] + Project [ss_net_profit,ss_store_sk] + Filter [ss_addr_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_net_profit,ss_store_sk] [ss_addr_sk,ss_net_profit,ss_store_sk] + HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] InputAdapter Exchange [ss_item_sk] #2 WholeStageCodegen - HashAggregate [ss_item_sk,sum,sum,count,count,ss_net_profit] [sum,count,sum,count] + HashAggregate [count,count,ss_item_sk,ss_net_profit,sum,sum] [count,count,sum,sum] Project [ss_item_sk,ss_net_profit] Filter [ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit] [ss_item_sk,ss_store_sk,ss_net_profit] + Scan parquet default.store_sales [ss_item_sk,ss_net_profit,ss_store_sk] [ss_item_sk,ss_net_profit,ss_store_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [item_sk,rnk] - Filter [rnk,item_sk] + Filter [item_sk,rnk] InputAdapter Window [rank_col] WholeStageCodegen @@ -51,17 +51,17 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing] Filter [avg(ss_net_profit)] Subquery #2 WholeStageCodegen - HashAggregate [ss_store_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,count] + HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_store_sk,sum] [avg(UnscaledValue(ss_net_profit)),count,rank_col,sum] InputAdapter Exchange [ss_store_sk] #6 WholeStageCodegen - HashAggregate [count,ss_store_sk,sum,count,sum,ss_net_profit] [sum,count,sum,count] - Project [ss_store_sk,ss_net_profit] - Filter [ss_store_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] [ss_addr_sk,ss_store_sk,ss_net_profit] - HashAggregate [ss_item_sk,sum,count,avg(UnscaledValue(ss_net_profit))] [avg(ss_net_profit),item_sk,count,avg(UnscaledValue(ss_net_profit)),rank_col,sum] + HashAggregate [count,count,ss_net_profit,ss_store_sk,sum,sum] [count,count,sum,sum] + Project [ss_net_profit,ss_store_sk] + Filter [ss_addr_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_net_profit,ss_store_sk] [ss_addr_sk,ss_net_profit,ss_store_sk] + HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] InputAdapter - ReusedExchange [ss_item_sk,sum,count] [ss_item_sk,sum,count] #2 + ReusedExchange [count,ss_item_sk,sum] [count,ss_item_sk,sum] #2 InputAdapter BroadcastExchange #7 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt index f6d66999d..72b6b03ea 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt @@ -1,30 +1,30 @@ -TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] +TakeOrderedAndProject [ca_city,ca_zip,sum(ws_sales_price)] WholeStageCodegen - HashAggregate [ca_zip,ca_city,sum,sum(UnscaledValue(ws_sales_price))] [sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price),sum] + HashAggregate [ca_city,ca_zip,sum,sum(UnscaledValue(ws_sales_price))] [sum,sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price)] InputAdapter - Exchange [ca_zip,ca_city] #1 + Exchange [ca_city,ca_zip] #1 WholeStageCodegen - HashAggregate [ca_city,ca_zip,sum,ws_sales_price,sum] [sum,sum] - Project [ws_sales_price,ca_city,ca_zip] + HashAggregate [ca_city,ca_zip,sum,sum,ws_sales_price] [sum,sum] + Project [ca_city,ca_zip,ws_sales_price] Filter [ca_zip,exists] BroadcastHashJoin [i_item_id,i_item_id] - Project [ws_sales_price,ca_city,ca_zip,i_item_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_sales_price,ca_city,ca_zip] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ca_zip,ca_city,ws_sold_date_sk,ws_sales_price,ws_item_sk] + Project [ca_city,ca_zip,i_item_id,ws_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ca_city,ca_zip,ws_item_sk,ws_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ca_city,ca_zip,ws_item_sk,ws_sales_price,ws_sold_date_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_sales_price,c_current_addr_sk] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_sales_price] - Filter [ws_bill_customer_sk,ws_sold_date_sk,ws_item_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_sales_price] + Project [c_current_addr_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [c_customer_sk,c_current_addr_sk] - Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_customer_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -35,17 +35,17 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] - Filter [d_qoy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [i_item_id] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt index 3010603c0..213915231 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt @@ -1,42 +1,42 @@ -TakeOrderedAndProject [bought_city,ss_ticket_number,profit,c_last_name,c_first_name,amt,ca_city] +TakeOrderedAndProject [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] WholeStageCodegen - Project [bought_city,ss_ticket_number,profit,c_last_name,c_first_name,amt,ca_city] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] - Project [c_current_addr_sk,profit,amt,c_last_name,c_first_name,ss_ticket_number,bought_city] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - HashAggregate [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_coupon_amt)),sum,ca_city,ss_customer_sk,sum,ss_ticket_number,ss_addr_sk] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_coupon_amt)),profit,sum,amt,sum,bought_city] + Project [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] + BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] + Project [amt,bought_city,c_current_addr_sk,c_first_name,c_last_name,profit,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] [amt,bought_city,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] InputAdapter - Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 WholeStageCodegen - HashAggregate [sum,ca_city,ss_customer_sk,ss_coupon_amt,sum,sum,sum,ss_ticket_number,ss_addr_sk,ss_net_profit] [sum,sum,sum,sum] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ca_city,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] + HashAggregate [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] - Filter [ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [d_date_sk] - Filter [d_dow,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_dow] [d_date_sk,d_year,d_dow] + Filter [d_date_sk,d_dow,d_year] + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] [d_date_sk,d_dow,d_year] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [s_store_sk] Filter [s_city,s_store_sk] - Scan parquet default.store [s_store_sk,s_city] [s_store_sk,s_city] + Scan parquet default.store [s_city,s_store_sk] [s_city,s_store_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #5 @@ -47,8 +47,8 @@ TakeOrderedAndProject [bought_city,ss_ticket_number,profit,c_last_name,c_first_n InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] - Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + Project [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] InputAdapter ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt index 656b50091..f0ed21cce 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt @@ -1,77 +1,77 @@ -TakeOrderedAndProject [d_year,d_moy,s_store_name,i_category,psum,i_brand,sum_sales,s_company_name,avg_monthly_sales,nsum] +TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,psum,s_company_name,s_store_name,sum_sales] WholeStageCodegen - Project [s_store_name,d_year,avg_monthly_sales,d_moy,sum_sales,sum_sales,i_category,i_brand,s_company_name,sum_sales] - BroadcastHashJoin [s_store_name,s_company_name,i_brand,i_category,i_category,rn,i_brand,s_store_name,rn,s_company_name] - Project [d_year,s_company_name,rn,d_moy,sum_sales,i_brand,sum_sales,i_category,s_store_name,avg_monthly_sales] - BroadcastHashJoin [s_store_name,i_category,i_category,s_company_name,rn,i_brand,rn,s_company_name,s_store_name,i_brand] - Project [d_year,s_company_name,rn,d_moy,i_brand,sum_sales,i_category,s_store_name,avg_monthly_sales] - Filter [avg_monthly_sales,sum_sales,rn] + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales,sum_sales] + BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + Filter [avg_monthly_sales,rn,sum_sales] InputAdapter - Window [d_year,s_company_name,_w0,i_brand,i_category,s_store_name] + Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] WholeStageCodegen Filter [d_year] InputAdapter - Window [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] WholeStageCodegen - Sort [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 WholeStageCodegen - HashAggregate [d_year,s_company_name,d_moy,sum,sum(UnscaledValue(ss_sales_price)),i_brand,i_category,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] InputAdapter - Exchange [d_year,s_company_name,d_moy,i_brand,i_category,s_store_name] #2 + Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #2 WholeStageCodegen - HashAggregate [d_year,s_company_name,d_moy,sum,i_brand,ss_sales_price,i_category,sum,s_store_name] [sum,sum] - Project [s_store_name,s_company_name,d_moy,d_year,i_category,ss_sales_price,i_brand] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [d_moy,d_year,ss_store_sk,i_category,ss_sales_price,i_brand] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_store_sk,i_category,ss_sales_price,ss_sold_date_sk,i_brand] + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] + Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_year,i_brand,i_category,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_category,ss_sales_price,ss_sold_date_sk,ss_store_sk] BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_item_sk,i_brand,i_category] - Filter [i_item_sk,i_brand,i_category] - Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] + Project [i_brand,i_category,i_item_sk] + Filter [i_brand,i_category,i_item_sk] + Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [d_date_sk,d_year,d_moy] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Project [d_date_sk,d_moy,d_year] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [s_store_sk,s_store_name,s_company_name] - Filter [s_store_sk,s_store_name,s_company_name] - Scan parquet default.store [s_store_sk,s_store_name,s_company_name] [s_store_sk,s_store_name,s_company_name] + Project [s_company_name,s_store_name,s_store_sk] + Filter [s_company_name,s_store_name,s_store_sk] + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [i_category,rn,s_company_name,s_store_name,sum_sales,i_brand] + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] Filter [rn] InputAdapter - Window [i_category,s_company_name,s_store_name,d_moy,d_year,i_brand] + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] WholeStageCodegen - Sort [i_category,s_company_name,s_store_name,d_moy,d_year,i_brand] + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name] #7 + Exchange [i_brand,i_category,s_company_name,s_store_name] #7 WholeStageCodegen - HashAggregate [i_category,s_company_name,s_store_name,sum(UnscaledValue(ss_sales_price)),d_moy,d_year,sum,i_brand] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [sum,sum(UnscaledValue(ss_sales_price)),sum_sales] InputAdapter - ReusedExchange [i_category,s_company_name,s_store_name,d_moy,d_year,sum,i_brand] [i_category,s_company_name,s_store_name,d_moy,d_year,sum,i_brand] #2 + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #2 InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [rn,i_category,s_store_name,i_brand,s_company_name,sum_sales] + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] Filter [rn] InputAdapter - Window [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year] + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] WholeStageCodegen - Sort [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year] + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] InputAdapter - ReusedExchange [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year,sum_sales] [i_category,s_store_name,i_brand,s_company_name,d_moy,d_year,sum_sales] #7 + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt index 78fb16b88..b4f78ab5c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt @@ -1,20 +1,20 @@ WholeStageCodegen - HashAggregate [sum,sum(cast(ss_quantity as bigint))] [sum(cast(ss_quantity as bigint)),sum(ss_quantity),sum] + HashAggregate [sum,sum(cast(ss_quantity as bigint))] [sum,sum(cast(ss_quantity as bigint)),sum(ss_quantity)] InputAdapter Exchange #1 WholeStageCodegen HashAggregate [ss_quantity,sum,sum] [sum,sum] Project [ss_quantity] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity] - BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] - Project [ss_sold_date_sk,ss_addr_sk,ss_quantity,ss_net_profit] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,ss_sales_price,cd_education_status,cd_marital_status] - Project [ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] - Filter [ss_store_sk,ss_cdemo_sk,ss_addr_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] [ss_addr_sk,ss_quantity,ss_store_sk,ss_sales_price,ss_net_profit,ss_cdemo_sk,ss_sold_date_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] + Project [ss_addr_sk,ss_net_profit,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,ss_cdemo_sk,ss_sales_price] + Project [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_addr_sk,ss_cdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen @@ -24,18 +24,18 @@ WholeStageCodegen InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [cd_demo_sk,cd_marital_status,cd_education_status] + Project [cd_demo_sk,cd_education_status,cd_marital_status] Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [ca_address_sk,ca_state] - Filter [ca_country,ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] [ca_address_sk,ca_state,ca_country] + Filter [ca_address_sk,ca_country] + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt index e0a243a63..7c9fedc27 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt @@ -1,15 +1,15 @@ -TakeOrderedAndProject [channel,currency_rank,return_rank,item,return_ratio] +TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] WholeStageCodegen - HashAggregate [channel,currency_rank,return_rank,item,return_ratio] + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] InputAdapter - Exchange [channel,currency_rank,return_rank,item,return_ratio] #1 + Exchange [channel,currency_rank,item,return_rank,return_ratio] #1 WholeStageCodegen - HashAggregate [channel,currency_rank,return_rank,item,return_ratio] + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] InputAdapter Union WholeStageCodegen - Project [item,return_ratio,return_rank,currency_rank] - Filter [return_rank,currency_rank] + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] InputAdapter Window [currency_ratio] WholeStageCodegen @@ -21,33 +21,33 @@ TakeOrderedAndProject [channel,currency_rank,return_rank,item,return_ratio] InputAdapter Exchange #2 WholeStageCodegen - HashAggregate [sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),ws_item_sk,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum,sum(cast(coalesce(ws_quantity, 0) as bigint))] [currency_ratio,return_ratio,sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum,sum(cast(coalesce(ws_quantity, 0) as bigint))] + HashAggregate [sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),ws_item_sk] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] InputAdapter Exchange [ws_item_sk] #3 WholeStageCodegen - HashAggregate [sum,wr_return_amt,sum,ws_item_sk,sum,sum,sum,ws_net_paid,ws_quantity,wr_return_quantity,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [wr_return_amt,ws_net_paid,ws_quantity,wr_return_quantity,ws_item_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [wr_return_amt,ws_net_paid,ws_quantity,ws_sold_date_sk,wr_return_quantity,ws_item_sk] - BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - Project [ws_net_paid,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] - Filter [ws_net_profit,ws_order_number,ws_item_sk,ws_net_paid,ws_sold_date_sk,ws_quantity] - Scan parquet default.web_sales [ws_net_paid,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_item_sk] [ws_net_paid,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_item_sk] + HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] + Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] - Filter [wr_return_amt,wr_item_sk,wr_order_number] - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + Filter [wr_item_sk,wr_order_number,wr_return_amt] + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] WholeStageCodegen - Project [item,return_ratio,return_rank,currency_rank] - Filter [return_rank,currency_rank] + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] InputAdapter Window [currency_ratio] WholeStageCodegen @@ -59,29 +59,29 @@ TakeOrderedAndProject [channel,currency_rank,return_rank,item,return_ratio] InputAdapter Exchange #6 WholeStageCodegen - HashAggregate [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum,cs_item_sk,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cs_quantity, 0) as bigint))] [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum,item,return_ratio,sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),sum,sum,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(cast(coalesce(cs_quantity, 0) as bigint)),currency_ratio] + HashAggregate [cs_item_sk,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] InputAdapter Exchange [cs_item_sk] #7 WholeStageCodegen - HashAggregate [sum,cr_return_amount,cr_return_quantity,sum,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,cs_item_sk,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [cs_net_paid,cs_quantity,cr_return_quantity,cr_return_amount,cs_item_sk] + HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_net_paid,cs_quantity,cr_return_quantity,cs_sold_date_sk,cr_return_amount,cs_item_sk] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk] - Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_item_sk,cs_sold_date_sk,cs_order_number] - Scan parquet default.catalog_sales [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit] [cs_net_paid,cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] - Filter [cr_return_amount,cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Project [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + Filter [cr_item_sk,cr_order_number,cr_return_amount] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #5 WholeStageCodegen - Project [item,return_ratio,return_rank,currency_rank] - Filter [return_rank,currency_rank] + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] InputAdapter Window [currency_ratio] WholeStageCodegen @@ -93,23 +93,23 @@ TakeOrderedAndProject [channel,currency_rank,return_rank,item,return_ratio] InputAdapter Exchange #9 WholeStageCodegen - HashAggregate [sum,ss_item_sk,sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum,sum] [sum,sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,sum(cast(coalesce(ss_quantity, 0) as bigint)),sum,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(cast(coalesce(sr_return_quantity, 0) as bigint)),return_ratio,sum,sum,currency_ratio] + HashAggregate [ss_item_sk,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] InputAdapter Exchange [ss_item_sk] #10 WholeStageCodegen - HashAggregate [ss_net_paid,sum,ss_item_sk,sum,sum,sr_return_amt,sum,sum,ss_quantity,sr_return_quantity,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [ss_quantity,ss_item_sk,ss_net_paid,sr_return_quantity,sr_return_amt] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,ss_net_paid,sr_return_quantity,ss_sold_date_sk,sr_return_amt] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_quantity,ss_item_sk,ss_net_paid,ss_sold_date_sk,ss_ticket_number] - Filter [ss_net_paid,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_net_profit,ss_ticket_number] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_net_paid,ss_net_profit,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_net_paid,ss_net_profit,ss_sold_date_sk,ss_ticket_number] + HashAggregate [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk,ss_ticket_number] + Filter [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] InputAdapter BroadcastExchange #11 WholeStageCodegen - Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] - Filter [sr_return_amt,sr_ticket_number,sr_item_sk] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Project [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + Filter [sr_item_sk,sr_return_amt,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt index a3baee3e1..257c8efa6 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt @@ -1,110 +1,110 @@ -TakeOrderedAndProject [profit,channel,sales,returns,id] +TakeOrderedAndProject [channel,id,profit,returns,sales] WholeStageCodegen - HashAggregate [sum(returns),channel,spark_grouping_id,sum,sum(profit),sum(sales),sum,sum,id] [sum(returns),profit,sum,sum(profit),sum(sales),sales,sum,sum,returns] + HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] InputAdapter Exchange [channel,id,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [sum,channel,spark_grouping_id,sum,sum,sum,profit,returns,sum,sum,id,sales] [sum,sum,sum,sum,sum,sum] + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] Expand [channel,id,profit,returns,sales] InputAdapter Union WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),sum,sum(UnscaledValue(profit)),s_store_id,sum(UnscaledValue(sales_price)),sum,sum] [sales,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(net_loss)),RETURNS,sum,sum(UnscaledValue(profit)),channel,profit,sum(UnscaledValue(sales_price)),id,sum,sum] + HashAggregate [s_store_id,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] InputAdapter Exchange [s_store_id] #2 WholeStageCodegen - HashAggregate [sum,sum,return_amt,net_loss,sum,sum,s_store_id,sum,sales_price,sum,sum,sum,profit] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [s_store_id,sales_price,net_loss,return_amt,profit] - BroadcastHashJoin [store_sk,s_store_sk] - Project [store_sk,sales_price,net_loss,return_amt,profit] - BroadcastHashJoin [date_sk,d_date_sk] + HashAggregate [net_loss,profit,return_amt,s_store_id,sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,s_store_id,sales_price] + BroadcastHashJoin [s_store_sk,store_sk] + Project [net_loss,profit,return_amt,sales_price,store_sk] + BroadcastHashJoin [d_date_sk,date_sk] InputAdapter Union WholeStageCodegen - Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] + Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] WholeStageCodegen - Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] + Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] Filter [sr_returned_date_sk,sr_store_sk] - Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [s_store_sk,s_store_id] + Project [s_store_id,s_store_sk] Filter [s_store_sk] - Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] WholeStageCodegen - HashAggregate [cp_catalog_page_id,sum(UnscaledValue(profit)),sum,sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum,sum,sum(UnscaledValue(net_loss)),sum] [sum(UnscaledValue(profit)),sum,sum(UnscaledValue(sales_price)),channel,sum(UnscaledValue(return_amt)),sales,sum,profit,sum,id,sum(UnscaledValue(net_loss)),sum,RETURNS] + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] InputAdapter Exchange [cp_catalog_page_id] #5 WholeStageCodegen - HashAggregate [cp_catalog_page_id,sum,sum,sum,sales_price,return_amt,net_loss,sum,profit,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,cp_catalog_page_id,profit,net_loss,return_amt] - BroadcastHashJoin [page_sk,cp_catalog_page_sk] - Project [sales_price,profit,page_sk,net_loss,return_amt] - BroadcastHashJoin [date_sk,d_date_sk] + HashAggregate [cp_catalog_page_id,net_loss,profit,return_amt,sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] + BroadcastHashJoin [cp_catalog_page_sk,page_sk] + Project [net_loss,page_sk,profit,return_amt,sales_price] + BroadcastHashJoin [d_date_sk,date_sk] InputAdapter Union WholeStageCodegen - Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] - Filter [cs_sold_date_sk,cs_catalog_page_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] + Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + Filter [cs_catalog_page_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] WholeStageCodegen - Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] - Filter [cr_returned_date_sk,cr_catalog_page_sk] - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] + Project [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] + Filter [cr_catalog_page_sk,cr_returned_date_sk] + Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [cp_catalog_page_sk,cp_catalog_page_id] + Project [cp_catalog_page_id,cp_catalog_page_sk] Filter [cp_catalog_page_sk] - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] [cp_catalog_page_sk,cp_catalog_page_id] + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] [cp_catalog_page_id,cp_catalog_page_sk] WholeStageCodegen - HashAggregate [web_site_id,sum,sum,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,sum(UnscaledValue(net_loss))] [sales,profit,sum,sum,RETURNS,channel,id,sum,sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),sum(UnscaledValue(profit)),sum,sum(UnscaledValue(net_loss))] + HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),web_site_id] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] InputAdapter Exchange [web_site_id] #8 WholeStageCodegen - HashAggregate [web_site_id,sum,profit,sum,sum,net_loss,sales_price,sum,sum,sum,sum,sum,return_amt] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [web_site_id,profit,sales_price,return_amt,net_loss] - BroadcastHashJoin [wsr_web_site_sk,web_site_sk] - Project [profit,wsr_web_site_sk,sales_price,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + HashAggregate [net_loss,profit,return_amt,sales_price,sum,sum,sum,sum,sum,sum,sum,sum,web_site_id] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,sales_price,web_site_id] + BroadcastHashJoin [web_site_sk,wsr_web_site_sk] + Project [net_loss,profit,return_amt,sales_price,wsr_web_site_sk] + BroadcastHashJoin [d_date_sk,date_sk] InputAdapter Union WholeStageCodegen - Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] + Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] Filter [ws_sold_date_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] WholeStageCodegen - Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] + Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,ws_web_site_sk] BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] + Project [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] Filter [wr_returned_date_sk] - Scan parquet default.web_returns [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] [wr_order_number,wr_return_amt,wr_returned_date_sk,wr_net_loss,wr_item_sk] + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [ws_item_sk,ws_web_site_sk,ws_order_number] + Project [ws_item_sk,ws_order_number,ws_web_site_sk] Filter [ws_item_sk,ws_order_number,ws_web_site_sk] - Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] [ws_item_sk,ws_web_site_sk,ws_order_number] + Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_web_site_sk] [ws_item_sk,ws_order_number,ws_web_site_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter BroadcastExchange #10 WholeStageCodegen - Project [web_site_sk,web_site_id] + Project [web_site_id,web_site_sk] Filter [web_site_sk] - Scan parquet default.web_site [web_site_sk,web_site_id] [web_site_sk,web_site_id] + Scan parquet default.web_site [web_site_id,web_site_sk] [web_site_id,web_site_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt index fcbf25c1d..c3f728d0a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt @@ -1,33 +1,33 @@ -TakeOrderedAndProject [s_street_number,s_street_type,s_company_id,31 - 60 days ,61 - 90 days ,s_county,>120 days ,30 days ,91 - 120 days ,s_state,s_street_name,s_city,s_suite_number,s_store_name,s_zip] +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] WholeStageCodegen - HashAggregate [s_street_number,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),s_street_type,sum,s_company_id,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),s_county,sum,s_state,s_street_name,sum,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum,s_city,s_suite_number,s_store_name,s_zip,sum] [sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum,31 - 60 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum,>120 days ,30 days ,91 - 120 days ,sum,sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),sum,sum] + HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] InputAdapter - Exchange [s_street_number,s_street_type,s_company_id,s_county,s_state,s_street_name,s_city,s_suite_number,s_store_name,s_zip] #1 + Exchange [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] #1 WholeStageCodegen - HashAggregate [sum,sum,s_street_number,ss_sold_date_sk,s_street_type,sum,s_company_id,sum,s_county,sum,s_state,s_street_name,sum,sum,sr_returned_date_sk,sum,sum,s_city,s_suite_number,s_store_name,s_zip,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_zip,s_state,s_suite_number,ss_sold_date_sk,s_city,sr_returned_date_sk] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_sold_date_sk,ss_store_sk,sr_returned_date_sk] - BroadcastHashJoin [sr_customer_sk,sr_ticket_number,ss_customer_sk,sr_item_sk,ss_ticket_number,ss_item_sk] - Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_item_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_ticket_number] + HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [sr_returned_date_sk,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] - Filter [sr_ticket_number,sr_customer_sk,sr_item_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] + Project [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] Filter [s_store_sk] - Scan parquet default.store [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] [s_street_name,s_store_name,s_street_number,s_company_id,s_county,s_street_type,s_store_sk,s_zip,s_state,s_suite_number,s_city] + Scan parquet default.store [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -38,5 +38,5 @@ TakeOrderedAndProject [s_street_number,s_street_type,s_company_id,31 - 60 days , BroadcastExchange #5 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt index af45acaf5..2b126d38a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt @@ -1,67 +1,67 @@ -TakeOrderedAndProject [item_sk,store_cumulative,store_sales,web_sales,d_date,web_cumulative] +TakeOrderedAndProject [d_date,item_sk,store_cumulative,store_sales,web_cumulative,web_sales] WholeStageCodegen - Filter [web_cumulative,store_cumulative] + Filter [store_cumulative,web_cumulative] InputAdapter - Window [web_sales,item_sk,d_date,store_sales] + Window [d_date,item_sk,store_sales,web_sales] WholeStageCodegen - Sort [item_sk,d_date] + Sort [d_date,item_sk] InputAdapter Exchange [item_sk] #1 WholeStageCodegen - Project [d_date,d_date,item_sk,item_sk,cume_sales,cume_sales] + Project [cume_sales,cume_sales,d_date,d_date,item_sk,item_sk] InputAdapter - SortMergeJoin [item_sk,d_date,item_sk,d_date] + SortMergeJoin [d_date,d_date,item_sk,item_sk] WholeStageCodegen - Sort [item_sk,d_date] + Sort [d_date,item_sk] InputAdapter - Exchange [item_sk,d_date] #2 + Exchange [d_date,item_sk] #2 WholeStageCodegen - Project [item_sk,d_date,cume_sales] + Project [cume_sales,d_date,item_sk] InputAdapter - Window [_w0,ws_item_sk,d_date] + Window [_w0,d_date,ws_item_sk] WholeStageCodegen - Sort [ws_item_sk,d_date] + Sort [d_date,ws_item_sk] InputAdapter Exchange [ws_item_sk] #3 WholeStageCodegen - HashAggregate [ws_item_sk,d_date,sum,sum(UnscaledValue(ws_sales_price))] [sum(UnscaledValue(ws_sales_price)),item_sk,_w0,sum] + HashAggregate [d_date,sum,sum(UnscaledValue(ws_sales_price)),ws_item_sk] [_w0,item_sk,sum,sum(UnscaledValue(ws_sales_price))] InputAdapter - Exchange [ws_item_sk,d_date] #4 + Exchange [d_date,ws_item_sk] #4 WholeStageCodegen - HashAggregate [sum,ws_item_sk,sum,d_date,ws_sales_price] [sum,sum] - Project [ws_item_sk,ws_sales_price,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_sales_price] + HashAggregate [d_date,sum,sum,ws_item_sk,ws_sales_price] [sum,sum] + Project [d_date,ws_item_sk,ws_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_item_sk,ws_sales_price,ws_sold_date_sk] Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_sales_price] [ws_sold_date_sk,ws_item_sk,ws_sales_price] + Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] [ws_item_sk,ws_sales_price,ws_sold_date_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] [d_date_sk,d_date,d_month_seq] + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] WholeStageCodegen - Sort [item_sk,d_date] + Sort [d_date,item_sk] InputAdapter - Exchange [item_sk,d_date] #6 + Exchange [d_date,item_sk] #6 WholeStageCodegen - Project [item_sk,d_date,cume_sales] + Project [cume_sales,d_date,item_sk] InputAdapter - Window [_w0,ss_item_sk,d_date] + Window [_w0,d_date,ss_item_sk] WholeStageCodegen - Sort [ss_item_sk,d_date] + Sort [d_date,ss_item_sk] InputAdapter Exchange [ss_item_sk] #7 WholeStageCodegen - HashAggregate [ss_item_sk,d_date,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),item_sk,_w0,sum] + HashAggregate [d_date,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] [_w0,item_sk,sum,sum(UnscaledValue(ss_sales_price))] InputAdapter - Exchange [ss_item_sk,d_date] #8 + Exchange [d_date,ss_item_sk] #8 WholeStageCodegen - HashAggregate [d_date,sum,ss_sales_price,sum,ss_item_sk] [sum,sum] - Project [ss_item_sk,ss_sales_price,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_sales_price] + HashAggregate [d_date,ss_item_sk,ss_sales_price,sum,sum] [sum,sum] + Project [d_date,ss_item_sk,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_sales_price] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #5 + ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt index 00c2d4b14..583e5acae 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt @@ -1,26 +1,26 @@ -TakeOrderedAndProject [d_year,ext_price,brand_id,brand] +TakeOrderedAndProject [brand,brand_id,d_year,ext_price] WholeStageCodegen - HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),d_year,i_brand,i_brand_id,sum] [brand_id,sum(UnscaledValue(ss_ext_sales_price)),brand,sum,ext_price] + HashAggregate [d_year,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [d_year,i_brand,i_brand_id] #1 WholeStageCodegen - HashAggregate [sum,d_year,i_brand,ss_ext_sales_price,i_brand_id,sum] [sum,sum] - Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [d_year,ss_item_sk,ss_ext_sales_price] + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_year] - Filter [d_moy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] - Filter [ss_sold_date_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_item_sk,i_brand_id,i_brand] - Filter [i_manager_id,i_item_sk] - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] [i_item_sk,i_brand_id,i_brand,i_manager_id] + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt index 6a8804ae2..ace96279f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] +TakeOrderedAndProject [avg_quarterly_sales,i_manufact_id,sum_sales] WholeStageCodegen - Project [i_manufact_id,sum_sales,avg_quarterly_sales] + Project [avg_quarterly_sales,i_manufact_id,sum_sales] Filter [avg_quarterly_sales,sum_sales] InputAdapter Window [_w0,i_manufact_id] @@ -9,31 +9,31 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] InputAdapter Exchange [i_manufact_id] #1 WholeStageCodegen - HashAggregate [i_manufact_id,d_qoy,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + HashAggregate [d_qoy,i_manufact_id,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] InputAdapter - Exchange [i_manufact_id,d_qoy] #2 + Exchange [d_qoy,i_manufact_id] #2 WholeStageCodegen - HashAggregate [d_qoy,sum,ss_sales_price,sum,i_manufact_id] [sum,sum] - Project [i_manufact_id,ss_sales_price,d_qoy] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_manufact_id,ss_sold_date_sk,ss_store_sk,ss_sales_price] + HashAggregate [d_qoy,i_manufact_id,ss_sales_price,sum,sum] [sum,sum] + Project [d_qoy,i_manufact_id,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_qoy,i_manufact_id,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manufact_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_manufact_id] - Filter [i_category,i_class,i_brand,i_item_sk] - Scan parquet default.item [i_class,i_manufact_id,i_item_sk,i_category,i_brand] [i_class,i_manufact_id,i_item_sk,i_category,i_brand] + Filter [i_brand,i_category,i_class,i_item_sk] + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manufact_id] [i_brand,i_category,i_class,i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk,d_qoy] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] [d_date_sk,d_month_seq,d_qoy] InputAdapter BroadcastExchange #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt index 3707aa2f8..4abf2165f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt @@ -1,85 +1,85 @@ -TakeOrderedAndProject [segment,num_customers,segment_base] +TakeOrderedAndProject [num_customers,segment,segment_base] WholeStageCodegen - HashAggregate [segment,count,count(1)] [count(1),num_customers,segment_base,count] + HashAggregate [count,count(1),segment] [count,count(1),num_customers,segment_base] InputAdapter Exchange [segment] #1 WholeStageCodegen - HashAggregate [segment,count,count] [count,count] - HashAggregate [c_customer_sk,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),segment,sum] + HashAggregate [count,count,segment] [count,count] + HashAggregate [c_customer_sk,sum,sum(UnscaledValue(ss_ext_sales_price))] [segment,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [c_customer_sk] #2 WholeStageCodegen HashAggregate [c_customer_sk,ss_ext_sales_price,sum,sum] [sum,sum] Project [c_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] BroadcastHashJoin [ca_county,ca_state,s_county,s_state] - Project [ca_state,c_customer_sk,ss_ext_sales_price,ca_county,ss_sold_date_sk] + Project [c_customer_sk,ca_county,ca_state,ss_ext_sales_price,ss_sold_date_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_customer_sk,c_current_addr_sk,ss_sold_date_sk,ss_ext_sales_price] + Project [c_current_addr_sk,c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - HashAggregate [c_customer_sk,c_current_addr_sk] + HashAggregate [c_current_addr_sk,c_customer_sk] InputAdapter - Exchange [c_customer_sk,c_current_addr_sk] #3 + Exchange [c_current_addr_sk,c_customer_sk] #3 WholeStageCodegen - HashAggregate [c_customer_sk,c_current_addr_sk] - Project [c_customer_sk,c_current_addr_sk] - BroadcastHashJoin [customer_sk,c_customer_sk] + HashAggregate [c_current_addr_sk,c_customer_sk] + Project [c_current_addr_sk,c_customer_sk] + BroadcastHashJoin [c_customer_sk,customer_sk] Project [customer_sk] - BroadcastHashJoin [sold_date_sk,d_date_sk] - Project [sold_date_sk,customer_sk] - BroadcastHashJoin [item_sk,i_item_sk] + BroadcastHashJoin [d_date_sk,sold_date_sk] + Project [customer_sk,sold_date_sk] + BroadcastHashJoin [i_item_sk,item_sk] InputAdapter Union WholeStageCodegen - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - Filter [cs_item_sk,cs_sold_date_sk,cs_bill_customer_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] WholeStageCodegen - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] - Filter [ws_item_sk,ws_sold_date_sk,ws_bill_customer_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [i_item_sk] Filter [i_category,i_class,i_item_sk] - Scan parquet default.item [i_item_sk,i_class,i_category] [i_item_sk,i_class,i_category] + Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_customer_sk,c_current_addr_sk] - Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_customer_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] + Project [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen Project [ca_address_sk,ca_county,ca_state] - Filter [ca_address_sk,ca_state,ca_county] + Filter [ca_address_sk,ca_county,ca_state] Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] [ca_address_sk,ca_county,ca_state] InputAdapter BroadcastExchange #9 WholeStageCodegen Project [s_county,s_state] - Filter [s_state,s_county] + Filter [s_county,s_state] Scan parquet default.store [s_county,s_state] [s_county,s_state] InputAdapter BroadcastExchange #10 WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Subquery #1 WholeStageCodegen HashAggregate [(d_month_seq + 1)] @@ -88,8 +88,8 @@ TakeOrderedAndProject [segment,num_customers,segment_base] WholeStageCodegen HashAggregate [(d_month_seq + 1)] Project [d_month_seq] - Filter [d_year,d_moy] - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] Subquery #2 WholeStageCodegen HashAggregate [(d_month_seq + 3)] @@ -98,8 +98,8 @@ TakeOrderedAndProject [segment,num_customers,segment_base] WholeStageCodegen HashAggregate [(d_month_seq + 3)] Project [d_month_seq] - Filter [d_year,d_moy] - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] Subquery #1 WholeStageCodegen @@ -109,8 +109,8 @@ TakeOrderedAndProject [segment,num_customers,segment_base] WholeStageCodegen HashAggregate [(d_month_seq + 1)] Project [d_month_seq] - Filter [d_year,d_moy] - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] Subquery #2 WholeStageCodegen HashAggregate [(d_month_seq + 3)] @@ -119,5 +119,5 @@ TakeOrderedAndProject [segment,num_customers,segment_base] WholeStageCodegen HashAggregate [(d_month_seq + 3)] Project [d_month_seq] - Filter [d_year,d_moy] - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt index e5ff08959..478e4a54b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt @@ -1,26 +1,26 @@ -TakeOrderedAndProject [ext_price,brand_id,brand] +TakeOrderedAndProject [brand,brand_id,ext_price] WholeStageCodegen - HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,ext_price,sum(UnscaledValue(ss_ext_sales_price)),sum,brand_id] + HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [i_brand,i_brand_id] #1 WholeStageCodegen - HashAggregate [i_brand,sum,sum,ss_ext_sales_price,i_brand_id] [sum,sum] - Project [ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] + HashAggregate [i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] - Filter [ss_sold_date_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_item_sk,i_brand_id,i_brand] - Filter [i_manager_id,i_item_sk] - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] [i_item_sk,i_brand_id,i_brand,i_manager_id] + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt index 5678609bc..8d1ef0a64 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt @@ -1,11 +1,11 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_item_id#2,total_sales#1]) +- *(20) HashAggregate(keys=[i_item_id#2], functions=[sum(total_sales#3)]) - +- Exchange hashpartitioning(i_item_id#2, 200) + +- Exchange hashpartitioning(i_item_id#2, 5) +- *(19) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(total_sales#3)]) +- Union :- *(6) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- Exchange hashpartitioning(i_item_id#2, 200) + : +- Exchange hashpartitioning(i_item_id#2, 5) : +- *(5) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) : +- *(5) Project [ss_ext_sales_price#4, i_item_id#2] : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight @@ -34,7 +34,7 @@ TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output : +- *(3) Filter i_color#15 IN (slate,blanched,burnished) : +- *(3) FileScan parquet default.item[i_item_id#2,i_color#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_color, [slate,blanched,burnished])], ReadSchema: struct :- *(12) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- Exchange hashpartitioning(i_item_id#2, 200) + : +- Exchange hashpartitioning(i_item_id#2, 5) : +- *(11) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) : +- *(11) Project [cs_ext_sales_price#16, i_item_id#2] : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight @@ -49,7 +49,7 @@ TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(18) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) - +- Exchange hashpartitioning(i_item_id#2, 200) + +- Exchange hashpartitioning(i_item_id#2, 5) +- *(17) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) +- *(17) Project [ws_ext_sales_price#20, i_item_id#2] +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt index 693fb71e6..0433f198d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt @@ -1,91 +1,91 @@ -TakeOrderedAndProject [total_sales,i_item_id] +TakeOrderedAndProject [i_item_id,total_sales] WholeStageCodegen - HashAggregate [i_item_id,sum,sum(total_sales)] [sum(total_sales),total_sales,sum] + HashAggregate [i_item_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] InputAdapter Exchange [i_item_id] #1 WholeStageCodegen - HashAggregate [i_item_id,total_sales,sum,sum] [sum,sum] + HashAggregate [i_item_id,sum,sum,total_sales] [sum,sum] InputAdapter Union WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #2 WholeStageCodegen HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [ss_ext_sales_price,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] - Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #5 WholeStageCodegen BroadcastHashJoin [i_item_id,i_item_id] - Project [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [i_item_id] Filter [i_color] - Scan parquet default.item [i_item_id,i_color] [i_item_id,i_color] + Scan parquet default.item [i_color,i_item_id] [i_color,i_item_id] WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #7 WholeStageCodegen - HashAggregate [i_item_id,cs_ext_sales_price,sum,sum] [sum,sum] + HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] Project [cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_ext_sales_price] - BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] - Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] - Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter ReusedExchange [ca_address_sk] [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #8 WholeStageCodegen - HashAggregate [i_item_id,ws_ext_sales_price,sum,sum] [sum,sum] - Project [ws_ext_sales_price,i_item_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] - Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter ReusedExchange [ca_address_sk] [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt index d41aaf1e2..3d71eb1d9 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt @@ -1,48 +1,48 @@ -TakeOrderedAndProject [d_year,avg_monthly_sales,nsum,d_moy,sum_sales,i_category,i_brand,cc_name,psum] +TakeOrderedAndProject [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,nsum,psum,sum_sales] WholeStageCodegen - Project [sum_sales,d_year,sum_sales,d_moy,sum_sales,i_category,i_brand,cc_name,avg_monthly_sales] - BroadcastHashJoin [cc_name,i_brand,rn,rn,i_category,i_category,i_brand,cc_name] - Project [d_year,d_moy,sum_sales,rn,i_brand,cc_name,sum_sales,i_category,avg_monthly_sales] - BroadcastHashJoin [i_category,cc_name,rn,i_category,rn,i_brand,cc_name,i_brand] - Project [d_year,d_moy,sum_sales,rn,i_brand,cc_name,i_category,avg_monthly_sales] - Filter [avg_monthly_sales,sum_sales,rn] + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales,sum_sales] + BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales] + Filter [avg_monthly_sales,rn,sum_sales] InputAdapter - Window [d_year,i_brand,cc_name,i_category,_w0] + Window [_w0,cc_name,d_year,i_brand,i_category] WholeStageCodegen Filter [d_year] InputAdapter - Window [d_year,d_moy,i_brand,cc_name,i_category] + Window [cc_name,d_moy,d_year,i_brand,i_category] WholeStageCodegen - Sort [d_year,d_moy,i_brand,cc_name,i_category] + Sort [cc_name,d_moy,d_year,i_brand,i_category] InputAdapter - Exchange [i_category,i_brand,cc_name] #1 + Exchange [cc_name,i_brand,i_category] #1 WholeStageCodegen - HashAggregate [d_year,d_moy,sum(UnscaledValue(cs_sales_price)),i_brand,sum,cc_name,i_category] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum,sum(UnscaledValue(cs_sales_price))] [_w0,sum,sum(UnscaledValue(cs_sales_price)),sum_sales] InputAdapter - Exchange [d_year,d_moy,i_brand,cc_name,i_category] #2 + Exchange [cc_name,d_moy,d_year,i_brand,i_category] #2 WholeStageCodegen - HashAggregate [d_year,d_moy,i_brand,sum,sum,cc_name,i_category,cs_sales_price] [sum,sum] - Project [d_moy,d_year,cs_sales_price,i_category,cc_name,i_brand] - BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] - Project [d_moy,d_year,cs_sales_price,i_category,i_brand,cs_call_center_sk] + HashAggregate [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category,sum,sum] [sum,sum] + Project [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_sales_price,d_moy,d_year,i_brand,i_category] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sales_price,cs_sold_date_sk,i_category,i_brand,cs_call_center_sk] - BroadcastHashJoin [i_item_sk,cs_item_sk] - Project [i_item_sk,i_brand,i_category] - Filter [i_item_sk,i_brand,i_category] - Scan parquet default.item [i_item_sk,i_brand,i_category] [i_item_sk,i_brand,i_category] + Project [cs_call_center_sk,cs_sales_price,cs_sold_date_sk,i_brand,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [i_brand,i_category,i_item_sk] + Filter [i_brand,i_category,i_item_sk] + Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [cs_sold_date_sk,cs_call_center_sk,cs_item_sk,cs_sales_price] - Filter [cs_item_sk,cs_sold_date_sk,cs_call_center_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_item_sk,cs_sales_price] [cs_sold_date_sk,cs_call_center_sk,cs_item_sk,cs_sales_price] + Project [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + Filter [cs_call_center_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [d_date_sk,d_year,d_moy] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Project [d_date_sk,d_moy,d_year] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen @@ -52,26 +52,26 @@ TakeOrderedAndProject [d_year,avg_monthly_sales,nsum,d_moy,sum_sales,i_category, InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [cc_name,sum_sales,i_brand,rn,i_category] + Project [cc_name,i_brand,i_category,rn,sum_sales] Filter [rn] InputAdapter - Window [cc_name,i_brand,d_moy,d_year,i_category] + Window [cc_name,d_moy,d_year,i_brand,i_category] WholeStageCodegen - Sort [cc_name,i_brand,d_moy,d_year,i_category] + Sort [cc_name,d_moy,d_year,i_brand,i_category] InputAdapter - Exchange [i_category,i_brand,cc_name] #7 + Exchange [cc_name,i_brand,i_category] #7 WholeStageCodegen - HashAggregate [sum(UnscaledValue(cs_sales_price)),sum,cc_name,i_brand,d_moy,d_year,i_category] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum,sum(UnscaledValue(cs_sales_price))] [sum,sum(UnscaledValue(cs_sales_price)),sum_sales] InputAdapter - ReusedExchange [cc_name,i_brand,d_moy,sum,d_year,i_category] [cc_name,i_brand,d_moy,sum,d_year,i_category] #2 + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum] [cc_name,d_moy,d_year,i_brand,i_category,sum] #2 InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [cc_name,rn,i_brand,i_category,sum_sales] + Project [cc_name,i_brand,i_category,rn,sum_sales] Filter [rn] InputAdapter - Window [cc_name,d_year,i_brand,i_category,d_moy] + Window [cc_name,d_moy,d_year,i_brand,i_category] WholeStageCodegen - Sort [cc_name,d_year,i_brand,i_category,d_moy] + Sort [cc_name,d_moy,d_year,i_brand,i_category] InputAdapter - ReusedExchange [cc_name,d_year,i_brand,i_category,d_moy,sum_sales] [cc_name,d_year,i_brand,i_category,d_moy,sum_sales] #7 + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt index fd4503647..ea45e8665 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt @@ -1,36 +1,36 @@ -TakeOrderedAndProject [cs_item_rev,item_id,ws_dev,ws_item_rev,average,cs_dev,ss_item_rev,ss_dev] +TakeOrderedAndProject [average,cs_dev,cs_item_rev,item_id,ss_dev,ss_item_rev,ws_dev,ws_item_rev] WholeStageCodegen - Project [item_id,ss_item_rev,cs_item_rev,ws_item_rev] - BroadcastHashJoin [cs_item_rev,item_id,ws_item_rev,item_id,ss_item_rev] - Project [item_id,ss_item_rev,cs_item_rev] - BroadcastHashJoin [item_id,item_id,ss_item_rev,cs_item_rev] + Project [cs_item_rev,item_id,ss_item_rev,ws_item_rev] + BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev,ws_item_rev] + Project [cs_item_rev,item_id,ss_item_rev] + BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev] Filter [ss_item_rev] - HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),item_id,ss_item_rev,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [item_id,ss_item_rev,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [i_item_id] #1 WholeStageCodegen HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [ss_ext_sales_price,i_item_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_ext_sales_price,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_item_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [i_item_sk,i_item_id] - Filter [i_item_sk,i_item_id] - Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] + Filter [i_item_id,i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Project [d_date_sk,d_date] + Project [d_date,d_date_sk] Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -51,28 +51,28 @@ TakeOrderedAndProject [cs_item_rev,item_id,ws_dev,ws_item_rev,average,cs_dev,ss_ BroadcastExchange #5 WholeStageCodegen Filter [cs_item_rev] - HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),item_id,cs_item_rev,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [cs_item_rev,item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] InputAdapter Exchange [i_item_id] #6 WholeStageCodegen - HashAggregate [i_item_id,cs_ext_sales_price,sum,sum] [sum,sum] + HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] Project [cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ext_sales_price,i_item_id] + Project [cs_ext_sales_price,cs_sold_date_sk,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #2 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 InputAdapter BroadcastExchange #7 WholeStageCodegen Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Project [d_date_sk,d_date] + Project [d_date,d_date_sk] Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen @@ -93,28 +93,28 @@ TakeOrderedAndProject [cs_item_rev,item_id,ws_dev,ws_item_rev,average,cs_dev,ss_ BroadcastExchange #9 WholeStageCodegen Filter [ws_item_rev] - HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),item_id,ws_item_rev,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [item_id,sum,sum(UnscaledValue(ws_ext_sales_price)),ws_item_rev] InputAdapter Exchange [i_item_id] #10 WholeStageCodegen - HashAggregate [i_item_id,ws_ext_sales_price,sum,sum] [sum,sum] - Project [ws_ext_sales_price,i_item_id] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_ext_sales_price,i_item_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_item_id,ws_ext_sales_price,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #2 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 InputAdapter BroadcastExchange #11 WholeStageCodegen Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Project [d_date_sk,d_date] + Project [d_date,d_date_sk] Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #12 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt index 0ffe4eb4e..038219a53 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt @@ -1,33 +1,33 @@ -TakeOrderedAndProject [(wed_sales1 / wed_sales2),d_week_seq1,(mon_sales1 / mon_sales2),(thu_sales1 / thu_sales2),(sat_sales1 / sat_sales2),s_store_name1,(sun_sales1 / sun_sales2),s_store_id1,(fri_sales1 / fri_sales2),(tue_sales1 / tue_sales2)] +TakeOrderedAndProject [(fri_sales1 / fri_sales2),(mon_sales1 / mon_sales2),(sat_sales1 / sat_sales2),(sun_sales1 / sun_sales2),(thu_sales1 / thu_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),d_week_seq1,s_store_id1,s_store_name1] WholeStageCodegen - Project [sat_sales1,sat_sales2,sun_sales2,tue_sales1,d_week_seq1,mon_sales1,fri_sales1,wed_sales1,thu_sales1,mon_sales2,s_store_name1,s_store_id1,wed_sales2,thu_sales2,fri_sales2,tue_sales2,sun_sales1] - BroadcastHashJoin [s_store_id1,d_week_seq1,s_store_id2,d_week_seq2] - Project [fri_sales,tue_sales,sat_sales,s_store_id,sun_sales,d_week_seq,wed_sales,mon_sales,s_store_name,thu_sales] + Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,s_store_id1,s_store_name1,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] + BroadcastHashJoin [d_week_seq1,d_week_seq2,s_store_id1,s_store_id2] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - Project [s_store_id,s_store_name,thu_sales,sun_sales,fri_sales,tue_sales,d_week_seq,wed_sales,sat_sales,mon_sales] - BroadcastHashJoin [ss_store_sk,s_store_sk] - HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,sum,ss_store_sk,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),thu_sales,sun_sales,sum,fri_sales,tue_sales,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,wed_sales,sat_sales,sum,mon_sales,sum] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] InputAdapter Exchange [d_week_seq,ss_store_sk] #1 WholeStageCodegen - HashAggregate [sum,sum,d_day_name,sum,sum,sum,sum,sum,sum,ss_store_sk,d_week_seq,ss_sales_price,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_store_sk,ss_sales_price] + HashAggregate [d_day_name,d_week_seq,ss_sales_price,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_sales_price,ss_sold_date_sk,ss_store_sk] Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_store_sk,ss_sales_price] + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [d_date_sk,d_week_seq,d_day_name] + Project [d_date_sk,d_day_name,d_week_seq] Filter [d_date_sk,d_week_seq] - Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] [d_date_sk,d_week_seq,d_day_name] + Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] [d_date_sk,d_day_name,d_week_seq] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [s_store_sk,s_store_id,s_store_name] - Filter [s_store_sk,s_store_id] - Scan parquet default.store [s_store_sk,s_store_id,s_store_name] [s_store_sk,s_store_id,s_store_name] + Project [s_store_id,s_store_name,s_store_sk] + Filter [s_store_id,s_store_sk] + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -37,19 +37,19 @@ TakeOrderedAndProject [(wed_sales1 / wed_sales2),d_week_seq1,(mon_sales1 / mon_s InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [fri_sales,tue_sales,sat_sales,s_store_id,sun_sales,d_week_seq,wed_sales,mon_sales,thu_sales] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - Project [s_store_id,thu_sales,sun_sales,fri_sales,tue_sales,d_week_seq,wed_sales,sat_sales,mon_sales] - BroadcastHashJoin [ss_store_sk,s_store_sk] - HashAggregate [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum,ss_store_sk,d_week_seq,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum,sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),thu_sales,sum,sun_sales,fri_sales,tue_sales,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum,sum,wed_sales,sat_sales,sum,mon_sales] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] InputAdapter - ReusedExchange [sum,sum,sum,ss_store_sk,d_week_seq,sum,sum,sum,sum] [sum,sum,sum,ss_store_sk,d_week_seq,sum,sum,sum,sum] #1 + ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [s_store_sk,s_store_id] - Filter [s_store_sk,s_store_id] - Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + Project [s_store_id,s_store_sk] + Filter [s_store_id,s_store_sk] + Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt index 59ea8c0d9..2bf134280 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt @@ -1,40 +1,40 @@ TakeOrderedAndProject [cnt,state] WholeStageCodegen - Project [state,cnt] + Project [cnt,state] Filter [count(1)] - HashAggregate [ca_state,count,count(1)] [state,cnt,count(1),count,count(1)] + HashAggregate [ca_state,count,count(1)] [cnt,count,count(1),count(1),state] InputAdapter Exchange [ca_state] #1 WholeStageCodegen HashAggregate [ca_state,count,count] [count,count] Project [ca_state] - BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] Project [ca_state,ss_item_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ca_state,ss_sold_date_sk,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ca_state,ss_item_sk,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [ca_state,c_customer_sk] - BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Project [c_customer_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] Project [ca_address_sk,ca_state] Filter [ca_address_sk] Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [c_customer_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_customer_sk] Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk] - Filter [ss_customer_sk,ss_sold_date_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] [ss_customer_sk,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Subquery #1 WholeStageCodegen HashAggregate [d_month_seq] @@ -43,8 +43,8 @@ TakeOrderedAndProject [cnt,state] WholeStageCodegen HashAggregate [d_month_seq] Project [d_month_seq] - Filter [d_year,d_moy] - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] Subquery #1 WholeStageCodegen @@ -54,25 +54,25 @@ TakeOrderedAndProject [cnt,state] WholeStageCodegen HashAggregate [d_month_seq] Project [d_month_seq] - Filter [d_year,d_moy] - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] [d_month_seq,d_year,d_moy] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [i_item_sk] - Filter [i_current_price,avg(i_current_price)] + Filter [avg(i_current_price),i_current_price] BroadcastHashJoin [i_category,i_category] - Project [i_item_sk,i_current_price,i_category] + Project [i_category,i_current_price,i_item_sk] Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_item_sk,i_current_price,i_category] [i_item_sk,i_current_price,i_category] + Scan parquet default.item [i_category,i_current_price,i_item_sk] [i_category,i_current_price,i_item_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen - HashAggregate [i_category,sum,count,avg(UnscaledValue(i_current_price))] [avg(i_current_price),sum,i_category,count,avg(UnscaledValue(i_current_price))] + HashAggregate [avg(UnscaledValue(i_current_price)),count,i_category,sum] [avg(UnscaledValue(i_current_price)),avg(i_current_price),count,i_category,sum] InputAdapter Exchange [i_category] #8 WholeStageCodegen - HashAggregate [sum,sum,count,i_category,i_current_price,count] [sum,count,sum,count] - Project [i_current_price,i_category] + HashAggregate [count,count,i_category,i_current_price,sum,sum] [count,count,sum,sum] + Project [i_category,i_current_price] Filter [i_category] - Scan parquet default.item [i_current_price,i_category] [i_current_price,i_category] + Scan parquet default.item [i_category,i_current_price] [i_category,i_current_price] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt index c5587f4e4..e4e212165 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt @@ -1,11 +1,11 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,total_sales#2 ASC NULLS FIRST], output=[i_item_id#1,total_sales#2]) +- *(20) HashAggregate(keys=[i_item_id#1], functions=[sum(total_sales#3)]) - +- Exchange hashpartitioning(i_item_id#1, 200) + +- Exchange hashpartitioning(i_item_id#1, 5) +- *(19) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(total_sales#3)]) +- Union :- *(6) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- Exchange hashpartitioning(i_item_id#1, 200) + : +- Exchange hashpartitioning(i_item_id#1, 5) : +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) : +- *(5) Project [ss_ext_sales_price#4, i_item_id#1] : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight @@ -34,7 +34,7 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,total_sale : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Music)) : +- *(3) FileScan parquet default.item[i_item_id#1,i_category#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)], ReadSchema: struct :- *(12) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- Exchange hashpartitioning(i_item_id#1, 200) + : +- Exchange hashpartitioning(i_item_id#1, 5) : +- *(11) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) : +- *(11) Project [cs_ext_sales_price#16, i_item_id#1] : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight @@ -49,7 +49,7 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,total_sale : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(18) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) - +- Exchange hashpartitioning(i_item_id#1, 200) + +- Exchange hashpartitioning(i_item_id#1, 5) +- *(17) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) +- *(17) Project [ws_ext_sales_price#20, i_item_id#1] +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt index e37083df9..01ea963ce 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt @@ -1,91 +1,91 @@ TakeOrderedAndProject [i_item_id,total_sales] WholeStageCodegen - HashAggregate [i_item_id,sum,sum(total_sales)] [sum(total_sales),total_sales,sum] + HashAggregate [i_item_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] InputAdapter Exchange [i_item_id] #1 WholeStageCodegen - HashAggregate [i_item_id,total_sales,sum,sum] [sum,sum] + HashAggregate [i_item_id,sum,sum,total_sales] [sum,sum] InputAdapter Union WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #2 WholeStageCodegen HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [ss_ext_sales_price,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] - Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #5 WholeStageCodegen BroadcastHashJoin [i_item_id,i_item_id] - Project [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [i_item_id] Filter [i_category] - Scan parquet default.item [i_item_id,i_category] [i_item_id,i_category] + Scan parquet default.item [i_category,i_item_id] [i_category,i_item_id] WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #7 WholeStageCodegen - HashAggregate [i_item_id,cs_ext_sales_price,sum,sum] [sum,sum] + HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] Project [cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_ext_sales_price] - BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] - Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] - Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter ReusedExchange [ca_address_sk] [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #8 WholeStageCodegen - HashAggregate [i_item_id,ws_ext_sales_price,sum,sum] [sum,sum] - Project [ws_ext_sales_price,i_item_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] - Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter ReusedExchange [ca_address_sk] [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #5 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt index 39a31d2ee..7a6550c72 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt @@ -1,91 +1,91 @@ -TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))] +TakeOrderedAndProject [(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20))),promotions,total] WholeStageCodegen Project [promotions,total] InputAdapter BroadcastNestedLoopJoin WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum] + HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price))] [promotions,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange #1 WholeStageCodegen HashAggregate [ss_ext_sales_price,sum,sum] [sum,sum] Project [ss_ext_sales_price] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] - Filter [ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_item_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_promo_sk] + Project [c_current_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + Filter [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [s_store_sk] Filter [s_gmt_offset,s_store_sk] - Scan parquet default.store [s_store_sk,s_gmt_offset] [s_store_sk,s_gmt_offset] + Scan parquet default.store [s_gmt_offset,s_store_sk] [s_gmt_offset,s_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [p_promo_sk] Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] - Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] + Scan parquet default.promotion [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [c_customer_sk,c_current_addr_sk] - Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_customer_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #7 WholeStageCodegen Project [i_item_sk] Filter [i_category,i_item_sk] - Scan parquet default.item [i_item_sk,i_category] [i_item_sk,i_category] + Scan parquet default.item [i_category,i_item_sk] [i_category,i_item_sk] BroadcastExchange #8 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price))] [sum(UnscaledValue(ss_ext_sales_price)),total,sum] + HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total] InputAdapter Exchange #9 WholeStageCodegen HashAggregate [ss_ext_sales_price,sum,sum] [sum,sum] Project [ss_ext_sales_price] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] - Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_item_sk,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + Project [c_current_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] InputAdapter ReusedExchange [s_store_sk] [s_store_sk] #2 InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #4 InputAdapter - ReusedExchange [c_customer_sk,c_current_addr_sk] [c_customer_sk,c_current_addr_sk] #5 + ReusedExchange [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] #5 InputAdapter ReusedExchange [ca_address_sk] [ca_address_sk] #6 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt index a6a12999a..b8879730e 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt @@ -1,27 +1,27 @@ -TakeOrderedAndProject [web_name,substring(w_warehouse_name, 1, 20),91 - 120 days ,31 - 60 days ,>120 days ,30 days ,sm_type,61 - 90 days ] +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sm_type,substring(w_warehouse_name, 1, 20),web_name] WholeStageCodegen - HashAggregate [web_name,sum,sum,sum,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum,sm_type,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint))] [substring(w_warehouse_name, 1, 20),sum,sum,91 - 120 days ,31 - 60 days ,>120 days ,sum,30 days ,sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint))] + HashAggregate [sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),web_name] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] InputAdapter - Exchange [substring(w_warehouse_name, 1, 20),sm_type,web_name] #1 + Exchange [sm_type,substring(w_warehouse_name, 1, 20),web_name] #1 WholeStageCodegen - HashAggregate [web_name,sum,sum,ws_sold_date_sk,w_warehouse_name,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sm_type,sum,sum,ws_ship_date_sk] [sum,sum,sum,sum,sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum] - Project [web_name,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] - BroadcastHashJoin [ws_ship_date_sk,d_date_sk] - Project [web_name,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] - BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_web_site_sk,ws_sold_date_sk,sm_type,ws_ship_date_sk,w_warehouse_name] - BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] - Project [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,w_warehouse_name] - BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] - Project [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] - Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk] - Scan parquet default.web_sales [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] [ws_web_site_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_ship_date_sk,ws_warehouse_sk] + HashAggregate [sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] [substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + Project [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [sm_type,w_warehouse_name,ws_ship_date_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk] + Project [w_warehouse_name,ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [w_warehouse_sk,ws_warehouse_sk] + Project [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] + Filter [ws_ship_date_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [w_warehouse_sk,w_warehouse_name] + Project [w_warehouse_name,w_warehouse_sk] Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -31,12 +31,12 @@ TakeOrderedAndProject [web_name,substring(w_warehouse_name, 1, 20),91 - 120 days InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [web_site_sk,web_name] + Project [web_name,web_site_sk] Filter [web_site_sk] - Scan parquet default.web_site [web_site_sk,web_name] [web_site_sk,web_name] + Scan parquet default.web_site [web_name,web_site_sk] [web_name,web_site_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt index 941d3fe37..fc602334c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt @@ -1,6 +1,6 @@ -TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] +TakeOrderedAndProject [avg_monthly_sales,i_manager_id,sum_sales] WholeStageCodegen - Project [i_manager_id,sum_sales,avg_monthly_sales] + Project [avg_monthly_sales,i_manager_id,sum_sales] Filter [avg_monthly_sales,sum_sales] InputAdapter Window [_w0,i_manager_id] @@ -9,31 +9,31 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] InputAdapter Exchange [i_manager_id] #1 WholeStageCodegen - HashAggregate [i_manager_id,d_moy,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + HashAggregate [d_moy,i_manager_id,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] InputAdapter - Exchange [i_manager_id,d_moy] #2 + Exchange [d_moy,i_manager_id] #2 WholeStageCodegen - HashAggregate [d_moy,sum,ss_sales_price,i_manager_id,sum] [sum,sum] - Project [i_manager_id,ss_sales_price,d_moy] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_manager_id,ss_store_sk,ss_sales_price,d_moy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_manager_id,ss_sold_date_sk,ss_store_sk,ss_sales_price] + HashAggregate [d_moy,i_manager_id,ss_sales_price,sum,sum] [sum,sum] + Project [d_moy,i_manager_id,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,i_manager_id,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manager_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_manager_id] - Filter [i_category,i_class,i_brand,i_item_sk] - Scan parquet default.item [i_class,i_item_sk,i_manager_id,i_category,i_brand] [i_class,i_item_sk,i_manager_id,i_category,i_brand] + Filter [i_brand,i_category,i_class,i_item_sk] + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manager_id] [i_brand,i_category,i_class,i_item_sk,i_manager_id] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] [d_date_sk,d_month_seq,d_moy] InputAdapter BroadcastExchange #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt index 7336416f6..2955e0f78 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt @@ -1,52 +1,52 @@ WholeStageCodegen - Sort [product_name,store_name,cnt] + Sort [cnt,product_name,store_name] InputAdapter - Exchange [product_name,store_name,cnt] #1 + Exchange [cnt,product_name,store_name] #1 WholeStageCodegen - Project [cnt,b_streen_name,product_name,b_city,b_zip,syear,c_street_name,store_zip,c_street_number,s1,s3,c_zip,s3,s1,b_street_number,s2,store_name,cnt,s2,syear,c_city] - BroadcastHashJoin [cnt,store_zip,store_name,store_zip,store_name,item_sk,cnt,item_sk] - HashAggregate [d_year,ca_street_name,count,sum(UnscaledValue(ss_coupon_amt)),sum,ca_street_number,ca_street_name,sum(UnscaledValue(ss_list_price)),ca_zip,count(1),d_year,ca_city,sum,i_item_sk,sum,ca_street_number,d_year,ca_city,s_store_name,s_zip,sum(UnscaledValue(ss_wholesale_cost)),ca_zip,i_product_name] [c_city,b_streen_name,count,b_zip,sum(UnscaledValue(ss_coupon_amt)),sum,b_city,c_street_name,s3,sum(UnscaledValue(ss_list_price)),cnt,b_street_number,c_street_number,item_sk,count(1),store_zip,s2,product_name,sum,sum,store_name,c_zip,sum(UnscaledValue(ss_wholesale_cost)),s1,syear] + Project [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,cnt,product_name,s1,s1,s2,s2,s3,s3,store_name,store_zip,syear,syear] + BroadcastHashJoin [cnt,cnt,item_sk,item_sk,store_name,store_name,store_zip,store_zip] + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count(1),d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost))] [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,count,count(1),item_sk,product_name,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] InputAdapter - Exchange [d_year,ca_street_name,ca_street_number,ca_street_name,ca_zip,d_year,ca_city,i_item_sk,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] #2 + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #2 WholeStageCodegen - HashAggregate [d_year,ca_street_name,count,sum,sum,ca_street_number,ss_wholesale_cost,ca_street_name,sum,count,ca_zip,ss_coupon_amt,d_year,sum,ca_city,ss_list_price,sum,i_item_sk,sum,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] [count,sum,sum,sum,count,sum,sum,sum] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,d_year,ca_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_street_name,ca_city,i_product_name,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,hd_income_band_sk,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,hd_income_band_sk,d_year,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,hd_income_band_sk,d_year,s_zip,hd_income_band_sk,d_year,ss_wholesale_cost] + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,hd_income_band_sk,d_year,s_zip,d_year,ss_wholesale_cost] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,cd_marital_status,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,c_first_sales_date_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ss_customer_sk,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,d_year,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_item_sk,cs_item_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] - Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_ticket_number,ss_addr_sk,ss_item_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -57,41 +57,41 @@ WholeStageCodegen BroadcastExchange #4 WholeStageCodegen Project [cs_item_sk] - Filter [sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2)))] - HashAggregate [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),cs_item_sk,sum,sum(UnscaledValue(cs_ext_list_price)),sum] [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum,sum(UnscaledValue(cs_ext_list_price)),sum] + Filter [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(cs_ext_list_price)] + HashAggregate [cs_item_sk,sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(UnscaledValue(cs_ext_list_price))] [sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(UnscaledValue(cs_ext_list_price)),sum(cs_ext_list_price)] InputAdapter Exchange [cs_item_sk] #5 WholeStageCodegen - HashAggregate [sum,sum,cs_item_sk,sum,cr_refunded_cash,cr_reversed_charge,cr_store_credit,sum,cs_ext_list_price] [sum,sum,sum,sum] - Project [cr_store_credit,cs_item_sk,cr_refunded_cash,cs_ext_list_price,cr_reversed_charge] - BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] - Project [cs_item_sk,cs_order_number,cs_ext_list_price] + HashAggregate [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_ext_list_price,cs_item_sk,cs_order_number] Filter [cs_item_sk,cs_order_number] - Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price] [cs_item_sk,cs_order_number,cs_ext_list_price] + Scan parquet default.catalog_sales [cs_ext_list_price,cs_item_sk,cs_order_number] [cs_ext_list_price,cs_item_sk,cs_order_number] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] + Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] [cr_order_number,cr_store_credit,cr_refunded_cash,cr_reversed_charge,cr_item_sk] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] InputAdapter BroadcastExchange #7 WholeStageCodegen Project [d_date_sk,d_year] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [s_store_sk,s_store_name,s_zip] - Filter [s_store_sk,s_zip,s_store_name] - Scan parquet default.store [s_store_sk,s_store_name,s_zip] [s_store_sk,s_store_name,s_zip] + Project [s_store_name,s_store_sk,s_zip] + Filter [s_store_name,s_store_sk,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] - Filter [c_current_cdemo_sk,c_customer_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_addr_sk] - Scan parquet default.customer [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] InputAdapter BroadcastExchange #10 WholeStageCodegen @@ -125,11 +125,11 @@ WholeStageCodegen InputAdapter BroadcastExchange #14 WholeStageCodegen - Project [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] + Project [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] Filter [ca_address_sk] - Scan parquet default.customer_address [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] InputAdapter - ReusedExchange [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] #14 + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 InputAdapter BroadcastExchange #15 WholeStageCodegen @@ -142,53 +142,53 @@ WholeStageCodegen BroadcastExchange #16 WholeStageCodegen Project [i_item_sk,i_product_name] - Filter [i_current_price,i_color,i_item_sk] - Scan parquet default.item [i_item_sk,i_current_price,i_color,i_product_name] [i_item_sk,i_current_price,i_color,i_product_name] + Filter [i_color,i_current_price,i_item_sk] + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_product_name] [i_color,i_current_price,i_item_sk,i_product_name] InputAdapter BroadcastExchange #17 WholeStageCodegen - HashAggregate [d_year,ca_street_name,count,sum(UnscaledValue(ss_coupon_amt)),ca_street_number,sum,ca_street_name,sum(UnscaledValue(ss_list_price)),ca_zip,count(1),d_year,ca_city,sum,i_item_sk,ca_street_number,sum,d_year,ca_city,s_store_name,s_zip,sum(UnscaledValue(ss_wholesale_cost)),ca_zip,i_product_name] [count,sum(UnscaledValue(ss_coupon_amt)),syear,s3,sum,sum(UnscaledValue(ss_list_price)),count(1),store_name,cnt,sum,s2,item_sk,s1,sum,store_zip,sum(UnscaledValue(ss_wholesale_cost))] + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count(1),d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost))] [cnt,count,count(1),item_sk,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] InputAdapter - Exchange [d_year,ca_street_name,ca_street_number,ca_street_name,ca_zip,d_year,ca_city,i_item_sk,ca_street_number,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] #18 + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #18 WholeStageCodegen - HashAggregate [d_year,ca_street_name,count,ca_street_number,ss_wholesale_cost,sum,sum,ca_street_name,count,ca_zip,ss_coupon_amt,d_year,ca_city,ss_list_price,sum,sum,i_item_sk,sum,ca_street_number,sum,d_year,ca_city,s_store_name,s_zip,ca_zip,i_product_name] [count,sum,sum,count,sum,sum,sum,sum] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,d_year,ca_zip,ca_street_number,ca_zip,d_year,i_item_sk,ca_street_name,ca_city,i_product_name,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,ca_street_number,ca_zip,hd_income_band_sk,d_year,ca_street_name,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ca_zip,hd_income_band_sk,d_year,ca_city,s_zip,hd_income_band_sk,ca_street_number,d_year,ss_wholesale_cost,ca_street_name] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,hd_income_band_sk,d_year,s_zip,hd_income_band_sk,d_year,ss_wholesale_cost] + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,hd_income_band_sk,d_year,s_zip,d_year,ss_wholesale_cost] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,cd_marital_status,d_year,s_zip,d_year,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,d_year,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [c_current_cdemo_sk,c_first_shipto_date_sk,ss_addr_sk,c_current_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,c_current_hdemo_sk,d_year,s_zip,ss_cdemo_sk,ss_promo_sk,c_first_sales_date_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_addr_sk,ss_coupon_amt,s_store_name,ss_list_price,ss_item_sk,d_year,ss_customer_sk,s_zip,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,d_year,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_item_sk,cs_item_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost] - BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Project [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] - Filter [ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_promo_sk,ss_store_sk,ss_ticket_number,ss_addr_sk,ss_item_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_list_price,ss_item_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk,ss_hdemo_sk,ss_wholesale_cost,ss_ticket_number] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] InputAdapter ReusedExchange [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] #3 InputAdapter @@ -197,12 +197,12 @@ WholeStageCodegen BroadcastExchange #19 WholeStageCodegen Project [d_date_sk,d_year] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter - ReusedExchange [s_store_sk,s_store_name,s_zip] [s_store_sk,s_store_name,s_zip] #8 + ReusedExchange [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] #8 InputAdapter - ReusedExchange [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] [c_current_cdemo_sk,c_first_shipto_date_sk,c_current_addr_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk] #9 + ReusedExchange [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] #9 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 InputAdapter @@ -218,9 +218,9 @@ WholeStageCodegen InputAdapter ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 InputAdapter - ReusedExchange [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] [ca_zip,ca_city,ca_address_sk,ca_street_number,ca_street_name] #14 + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 InputAdapter - ReusedExchange [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] [ca_city,ca_address_sk,ca_street_number,ca_zip,ca_street_name] #14 + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 InputAdapter ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt index a83319f19..2250a433c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt @@ -1,57 +1,57 @@ -TakeOrderedAndProject [i_item_desc,i_brand,revenue,i_wholesale_cost,i_current_price,s_store_name] +TakeOrderedAndProject [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] WholeStageCodegen - Project [i_item_desc,i_brand,revenue,i_wholesale_cost,i_current_price,s_store_name] - BroadcastHashJoin [ss_store_sk,ss_store_sk,revenue,ave] - Project [s_store_name,i_current_price,ss_store_sk,revenue,i_brand,i_wholesale_cost,i_item_desc] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_name,ss_store_sk,ss_item_sk,revenue] + Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] + BroadcastHashJoin [ave,revenue,ss_store_sk,ss_store_sk] + Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [revenue,s_store_name,ss_item_sk,ss_store_sk] BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [s_store_sk,s_store_name] + Project [s_store_name,s_store_sk] Filter [s_store_sk] - Scan parquet default.store [s_store_sk,s_store_name] [s_store_sk,s_store_name] + Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] InputAdapter BroadcastExchange #1 WholeStageCodegen Filter [revenue] - HashAggregate [ss_store_sk,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + HashAggregate [ss_item_sk,ss_store_sk,sum,sum(UnscaledValue(ss_sales_price))] [revenue,sum,sum(UnscaledValue(ss_sales_price))] InputAdapter - Exchange [ss_store_sk,ss_item_sk] #2 + Exchange [ss_item_sk,ss_store_sk] #2 WholeStageCodegen - HashAggregate [ss_item_sk,sum,sum,ss_store_sk,ss_sales_price] [sum,sum] - Project [ss_item_sk,ss_store_sk,ss_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] - Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk,sum,sum] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] + Project [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] Filter [i_item_sk] - Scan parquet default.item [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] [i_current_price,i_item_sk,i_brand,i_wholesale_cost,i_item_desc] + Scan parquet default.item [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] InputAdapter BroadcastExchange #5 WholeStageCodegen - HashAggregate [ss_store_sk,sum,count,avg(revenue)] [avg(revenue),ave,sum,count] + HashAggregate [avg(revenue),count,ss_store_sk,sum] [ave,avg(revenue),count,sum] InputAdapter Exchange [ss_store_sk] #6 WholeStageCodegen - HashAggregate [sum,sum,count,revenue,ss_store_sk,count] [sum,count,sum,count] - HashAggregate [ss_store_sk,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + HashAggregate [count,count,revenue,ss_store_sk,sum,sum] [count,count,sum,sum] + HashAggregate [ss_item_sk,ss_store_sk,sum,sum(UnscaledValue(ss_sales_price))] [revenue,sum,sum(UnscaledValue(ss_sales_price))] InputAdapter - Exchange [ss_store_sk,ss_item_sk] #7 + Exchange [ss_item_sk,ss_store_sk] #7 WholeStageCodegen - HashAggregate [sum,ss_store_sk,sum,ss_sales_price,ss_item_sk] [sum,sum] - Project [ss_item_sk,ss_store_sk,ss_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk,sum,sum] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt index f053b48d2..d964b2833 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt @@ -1,74 +1,74 @@ -TakeOrderedAndProject [dec_net,w_city,oct_sales_per_sq_foot,apr_sales_per_sq_foot,mar_sales,jul_sales_per_sq_foot,feb_sales_per_sq_foot,apr_net,ship_carriers,feb_net,may_sales_per_sq_foot,aug_sales,jun_net,w_county,sep_sales_per_sq_foot,feb_sales,mar_sales_per_sq_foot,w_country,year,w_state,oct_net,w_warehouse_sq_ft,dec_sales,may_sales,aug_net,sep_net,mar_net,nov_net,nov_sales_per_sq_foot,oct_sales,jun_sales,w_warehouse_name,may_net,jul_sales,dec_sales_per_sq_foot,jul_net,jan_net,sep_sales,jun_sales_per_sq_foot,jan_sales,nov_sales,apr_sales,jan_sales_per_sq_foot,aug_sales_per_sq_foot] +TakeOrderedAndProject [apr_net,apr_sales,apr_sales_per_sq_foot,aug_net,aug_sales,aug_sales_per_sq_foot,dec_net,dec_sales,dec_sales_per_sq_foot,feb_net,feb_sales,feb_sales_per_sq_foot,jan_net,jan_sales,jan_sales_per_sq_foot,jul_net,jul_sales,jul_sales_per_sq_foot,jun_net,jun_sales,jun_sales_per_sq_foot,mar_net,mar_sales,mar_sales_per_sq_foot,may_net,may_sales,may_sales_per_sq_foot,nov_net,nov_sales,nov_sales_per_sq_foot,oct_net,oct_sales,oct_sales_per_sq_foot,sep_net,sep_sales,sep_sales_per_sq_foot,ship_carriers,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] WholeStageCodegen - HashAggregate [sum(may_net),sum,w_city,sum,sum(sep_sales),sum(dec_net),sum(jul_net),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,sum,sum(oct_sales),sum,sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(mar_sales),sum(sep_net),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(oct_net),sum,sum(apr_sales),sum(jan_net),ship_carriers,sum(nov_net),sum,sum(jun_sales),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(dec_sales),w_county,sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(may_sales),w_country,year,sum,sum,sum,sum,w_state,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,w_warehouse_sq_ft,sum,sum,sum,sum(aug_sales),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(nov_sales),sum(aug_net),sum(jul_sales),sum,sum(jan_sales),sum,sum,sum(feb_sales),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,w_warehouse_name,sum,sum,sum(jun_net),sum(mar_net),sum,sum,sum,sum,sum,sum(apr_net),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_net)] [sum(may_net),dec_net,sum,sum,sum(sep_sales),sum(dec_net),sum(jul_net),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),oct_sales_per_sq_foot,apr_sales_per_sq_foot,sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum,mar_sales,sum,sum(oct_sales),sum,sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(mar_sales),sum(sep_net),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,sum(oct_net),jul_sales_per_sq_foot,feb_sales_per_sq_foot,sum,sum(apr_sales),sum(jan_net),apr_net,feb_net,sum(nov_net),may_sales_per_sq_foot,aug_sales,sum,jun_net,sum(jun_sales),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(dec_sales),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sep_sales_per_sq_foot,feb_sales,sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(may_sales),mar_sales_per_sq_foot,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),oct_net,sum,dec_sales,sum,sum,may_sales,sum,sum(aug_sales),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(nov_sales),aug_net,sum(aug_net),sum(jul_sales),sum,sep_net,mar_net,sum(jan_sales),nov_net,nov_sales_per_sq_foot,sum,oct_sales,sum,sum(feb_sales),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum,jun_sales,sum,may_net,jul_sales,sum,dec_sales_per_sq_foot,jul_net,jan_net,sum,sum(jun_net),sum(mar_net),sum,sum,sep_sales,sum,jun_sales_per_sq_foot,jan_sales,nov_sales,apr_sales,sum,sum,jan_sales_per_sq_foot,aug_sales_per_sq_foot,sum(apr_net),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(feb_net)] + HashAggregate [ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(apr_net),sum(apr_sales),sum(aug_net),sum(aug_sales),sum(dec_net),sum(dec_sales),sum(feb_net),sum(feb_sales),sum(jan_net),sum(jan_sales),sum(jul_net),sum(jul_sales),sum(jun_net),sum(jun_sales),sum(mar_net),sum(mar_sales),sum(may_net),sum(may_sales),sum(nov_net),sum(nov_sales),sum(oct_net),sum(oct_sales),sum(sep_net),sum(sep_sales),w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] [apr_net,apr_sales,apr_sales_per_sq_foot,aug_net,aug_sales,aug_sales_per_sq_foot,dec_net,dec_sales,dec_sales_per_sq_foot,feb_net,feb_sales,feb_sales_per_sq_foot,jan_net,jan_sales,jan_sales_per_sq_foot,jul_net,jul_sales,jul_sales_per_sq_foot,jun_net,jun_sales,jun_sales_per_sq_foot,mar_net,mar_sales,mar_sales_per_sq_foot,may_net,may_sales,may_sales_per_sq_foot,nov_net,nov_sales,nov_sales_per_sq_foot,oct_net,oct_sales,oct_sales_per_sq_foot,sep_net,sep_sales,sep_sales_per_sq_foot,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(apr_net),sum(apr_sales),sum(aug_net),sum(aug_sales),sum(dec_net),sum(dec_sales),sum(feb_net),sum(feb_sales),sum(jan_net),sum(jan_sales),sum(jul_net),sum(jul_sales),sum(jun_net),sum(jun_sales),sum(mar_net),sum(mar_sales),sum(may_net),sum(may_sales),sum(nov_net),sum(nov_sales),sum(oct_net),sum(oct_sales),sum(sep_net),sum(sep_sales)] InputAdapter - Exchange [w_city,ship_carriers,w_county,w_country,year,w_state,w_warehouse_sq_ft,w_warehouse_name] #1 + Exchange [ship_carriers,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] #1 WholeStageCodegen - HashAggregate [sum,sum,w_city,sum,may_sales,apr_net,sum,sum,sum,jul_net,sum,sum,jan_net,sum,sum,sep_sales,sum,sum,sum,sum,jan_sales,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,aug_net,dec_sales,jun_sales,sum,sum,sum,feb_net,sum,sep_net,ship_carriers,sum,mar_sales,sum,sum,sum,dec_net,w_county,sum,nov_sales,jul_sales,may_net,w_country,sum,oct_sales,sum,year,sum,sum,sum,sum,sum,w_state,sum,sum,mar_net,sum,jun_net,w_warehouse_sq_ft,sum,sum,sum,aug_sales,sum,sum,sum,sum,sum,feb_sales,sum,oct_net,sum,apr_sales,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,nov_net] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + HashAggregate [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] InputAdapter Union WholeStageCodegen - HashAggregate [sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,d_year,sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_warehouse_sq_ft,w_county,sum,sum,sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_warehouse_name,sum,sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_city,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_state,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_country] [sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,may_sales,oct_sales,nov_net,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),apr_net,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,ship_carriers,sum,sum,sum,dec_sales,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,apr_sales,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,nov_sales,sum,sum,jul_net,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jan_sales,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jun_net,sep_net,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,feb_net,aug_sales,jul_sales,sum,sum,sum,sum,year,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sep_sales,jan_net,sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,may_net,mar_net,sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),aug_net,mar_sales,oct_net,feb_sales,jun_sales,dec_net] + HashAggregate [d_year,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),year] InputAdapter - Exchange [d_year,w_warehouse_sq_ft,w_county,w_warehouse_name,w_city,w_state,w_country] #2 + Exchange [d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] #2 WholeStageCodegen - HashAggregate [sum,sum,sum,sum,d_year,sum,sum,sum,sum,d_moy,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_sq_ft,w_county,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name,sum,sum,sum,sum,ws_ext_sales_price,sum,w_city,sum,ws_net_paid,sum,sum,sum,w_state,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,ws_quantity,sum,w_country,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_moy,ws_net_paid,d_year,ws_ext_sales_price,ws_quantity,w_county,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] - BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] - Project [d_moy,ws_net_paid,d_year,ws_ext_sales_price,ws_quantity,w_county,w_country,ws_ship_mode_sk,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] - BroadcastHashJoin [ws_sold_time_sk,t_time_sk] - Project [d_moy,ws_net_paid,d_year,ws_ext_sales_price,ws_quantity,w_county,w_country,ws_ship_mode_sk,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city,ws_sold_time_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_net_paid,ws_ext_sales_price,ws_quantity,w_county,w_country,ws_ship_mode_sk,w_state,ws_sold_date_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,ws_sold_time_sk] - BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] - Project [ws_net_paid,ws_ext_sales_price,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_sold_time_sk] - Filter [ws_warehouse_sk,ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk] - Scan parquet default.web_sales [ws_net_paid,ws_ext_sales_price,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_sold_time_sk] [ws_net_paid,ws_ext_sales_price,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_sold_time_sk] + HashAggregate [d_moy,d_year,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity] + BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk] + Project [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk] + BroadcastHashJoin [t_time_sk,ws_sold_time_sk] + Project [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_time_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk] + BroadcastHashJoin [w_warehouse_sk,ws_warehouse_sk] + Project [ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] + Filter [ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] [ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] + Project [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] + Scan parquet default.warehouse [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [d_date_sk,d_year,d_moy] - Filter [d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Project [d_date_sk,d_moy,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [t_time_sk] Filter [t_time,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_time] [t_time_sk,t_time] + Scan parquet default.time_dim [t_time,t_time_sk] [t_time,t_time_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [sm_ship_mode_sk] Filter [sm_carrier,sm_ship_mode_sk] - Scan parquet default.ship_mode [sm_ship_mode_sk,sm_carrier] [sm_ship_mode_sk,sm_carrier] + Scan parquet default.ship_mode [sm_carrier,sm_ship_mode_sk] [sm_carrier,sm_ship_mode_sk] WholeStageCodegen - HashAggregate [sum,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),d_year,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,w_warehouse_sq_ft,sum,w_county,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_warehouse_name,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_city,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,w_state,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_country] [sum,jan_sales,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),oct_net,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),feb_sales,sum,sum,sum,feb_net,sum,sum,sep_sales,sum,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,apr_net,sum,ship_carriers,sum,nov_net,sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),may_net,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),dec_sales,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),nov_sales,sum,sum,jun_net,oct_sales,sep_net,sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jul_sales,aug_sales,sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jul_net,sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),dec_net,aug_net,sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),jan_net,mar_net,mar_sales,jun_sales,sum,sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),may_sales,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum,sum,year,sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),apr_sales] + HashAggregate [d_year,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),year] InputAdapter - Exchange [d_year,w_warehouse_sq_ft,w_county,w_warehouse_name,w_city,w_state,w_country] #7 + Exchange [d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] #7 WholeStageCodegen - HashAggregate [sum,sum,sum,d_year,sum,sum,sum,sum,sum,sum,sum,sum,d_moy,sum,sum,sum,sum,w_warehouse_sq_ft,sum,sum,w_county,sum,sum,sum,sum,w_warehouse_name,sum,sum,sum,sum,sum,sum,w_city,sum,sum,sum,sum,w_state,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,cs_sales_price,cs_quantity,sum,sum,cs_net_paid_inc_tax,sum,sum,sum,sum,w_country,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [cs_quantity,d_moy,d_year,w_county,w_country,cs_sales_price,w_state,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city] + HashAggregate [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,d_moy,d_year,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] - Project [cs_quantity,d_moy,d_year,w_county,w_country,cs_sales_price,w_state,cs_net_paid_inc_tax,cs_ship_mode_sk,w_warehouse_name,w_warehouse_sq_ft,w_city] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] BroadcastHashJoin [cs_sold_time_sk,t_time_sk] - Project [cs_sold_time_sk,cs_quantity,d_moy,d_year,w_county,w_country,cs_sales_price,w_state,cs_net_paid_inc_tax,cs_ship_mode_sk,w_warehouse_name,w_warehouse_sq_ft,w_city] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_time_sk,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_time_sk,cs_quantity,w_county,w_country,cs_sales_price,w_state,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk,w_warehouse_name,w_warehouse_sq_ft,w_city] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_sold_time_sk,cs_quantity,cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk] - Filter [cs_warehouse_sk,cs_sold_date_sk,cs_sold_time_sk,cs_ship_mode_sk] - Scan parquet default.catalog_sales [cs_sold_time_sk,cs_quantity,cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk] [cs_sold_time_sk,cs_quantity,cs_warehouse_sk,cs_sales_price,cs_sold_date_sk,cs_net_paid_inc_tax,cs_ship_mode_sk] + Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] + Filter [cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] + Scan parquet default.catalog_sales [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] InputAdapter - ReusedExchange [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] [w_county,w_warehouse_sk,w_country,w_state,w_warehouse_name,w_warehouse_sq_ft,w_city] #3 + ReusedExchange [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] #3 InputAdapter - ReusedExchange [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] #4 + ReusedExchange [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] #4 InputAdapter ReusedExchange [t_time_sk] [t_time_sk] #5 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt index 9381a8892..49dac167f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt @@ -1,43 +1,43 @@ -TakeOrderedAndProject [d_qoy,i_category,i_brand,i_class,d_year,d_moy,rk,i_product_name,s_store_id,sumsales] +TakeOrderedAndProject [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,rk,s_store_id,sumsales] WholeStageCodegen Filter [rk] InputAdapter - Window [sumsales,i_category] + Window [i_category,sumsales] WholeStageCodegen Sort [i_category,sumsales] InputAdapter Exchange [i_category] #1 WholeStageCodegen - HashAggregate [s_store_id,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),i_category,sum,i_brand,i_product_name,spark_grouping_id,d_moy,i_class,d_year,d_qoy] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00))] [sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales] InputAdapter - Exchange [s_store_id,i_category,i_brand,i_product_name,spark_grouping_id,d_moy,i_class,d_year,d_qoy] #2 + Exchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id] #2 WholeStageCodegen - HashAggregate [s_store_id,i_category,sum,i_brand,ss_sales_price,i_product_name,spark_grouping_id,d_moy,i_class,ss_quantity,d_year,d_qoy,sum] [sum,sum] - Expand [d_year,i_class,d_qoy,d_moy,i_brand,i_category,s_store_id,i_product_name,ss_sales_price,ss_quantity] - Project [d_year,d_qoy,d_moy,i_brand,s_store_id,ss_sales_price,i_category,ss_quantity,i_class,i_product_name] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_id,d_qoy,ss_quantity,ss_item_sk,d_moy,d_year,ss_sales_price] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [d_qoy,ss_quantity,ss_item_sk,d_moy,d_year,ss_store_sk,ss_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] - Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] [ss_quantity,ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk] + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id,ss_quantity,ss_sales_price,sum,sum] [sum,sum] + Expand [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] + Project [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_moy,d_qoy,d_year,s_store_id,ss_item_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_qoy,d_year,ss_item_sk,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [d_date_sk,d_year,d_moy,d_qoy] - Filter [d_month_seq,d_date_sk] - Scan parquet default.date_dim [d_qoy,d_moy,d_year,d_month_seq,d_date_sk] [d_qoy,d_moy,d_year,d_month_seq,d_date_sk] + Project [d_date_sk,d_moy,d_qoy,d_year] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [s_store_sk,s_store_id] + Project [s_store_id,s_store_sk] Filter [s_store_sk] - Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [i_class,i_item_sk,i_product_name,i_category,i_brand] + Project [i_brand,i_category,i_class,i_item_sk,i_product_name] Filter [i_item_sk] - Scan parquet default.item [i_class,i_item_sk,i_product_name,i_category,i_brand] [i_class,i_item_sk,i_product_name,i_category,i_brand] + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] [i_brand,i_category,i_class,i_item_sk,i_product_name] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt index 1e9ce42b5..579970f83 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt @@ -1,42 +1,42 @@ -TakeOrderedAndProject [bought_city,ss_ticket_number,extended_price,list_price,c_last_name,c_first_name,extended_tax,ca_city] +TakeOrderedAndProject [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] WholeStageCodegen - Project [bought_city,ss_ticket_number,extended_price,list_price,c_last_name,c_first_name,extended_tax,ca_city] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] - Project [c_current_addr_sk,extended_price,c_last_name,list_price,c_first_name,ss_ticket_number,extended_tax,bought_city] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - HashAggregate [sum,sum,ca_city,ss_customer_sk,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_sales_price)),ss_ticket_number,ss_addr_sk,sum(UnscaledValue(ss_ext_list_price)),sum] [sum,extended_price,sum,list_price,sum(UnscaledValue(ss_ext_tax)),sum(UnscaledValue(ss_ext_sales_price)),extended_tax,sum(UnscaledValue(ss_ext_list_price)),sum,bought_city] + Project [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] + BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] + Project [bought_city,c_current_addr_sk,c_first_name,c_last_name,extended_price,extended_tax,list_price,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] [bought_city,extended_price,extended_tax,list_price,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] InputAdapter - Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 WholeStageCodegen - HashAggregate [sum,ss_ext_tax,sum,ca_city,ss_customer_sk,sum,sum,sum,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number,ss_addr_sk,sum] [sum,sum,sum,sum,sum,sum] - Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ca_city,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ss_ext_sales_price,ss_ext_list_price,ss_ticket_number] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,ss_ext_tax,ss_customer_sk,ss_ext_sales_price,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] - Filter [ss_hdemo_sk,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_addr_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] [ss_addr_sk,ss_ext_tax,ss_store_sk,ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk,ss_hdemo_sk,ss_ext_list_price,ss_ticket_number] + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [d_date_sk] - Filter [d_dom,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_dom] [d_date_sk,d_year,d_dom] + Filter [d_date_sk,d_dom,d_year] + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] [d_date_sk,d_dom,d_year] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [s_store_sk] Filter [s_city,s_store_sk] - Scan parquet default.store [s_store_sk,s_city] [s_store_sk,s_city] + Scan parquet default.store [s_city,s_store_sk] [s_city,s_store_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #5 @@ -47,8 +47,8 @@ TakeOrderedAndProject [bought_city,ss_ticket_number,extended_price,list_price,c_ InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] - Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + Project [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] InputAdapter ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt index d6e6152f7..be4dce4b0 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt @@ -1,43 +1,43 @@ -TakeOrderedAndProject [cnt3,cnt1,cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender,cnt2] +TakeOrderedAndProject [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3] WholeStageCodegen - HashAggregate [count(1),cd_purchase_estimate,count,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender] [cnt3,count(1),cnt1,count,cnt2] + HashAggregate [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count,count(1)] [cnt1,cnt2,cnt3,count,count(1)] InputAdapter - Exchange [cd_purchase_estimate,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender] #1 + Exchange [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 WholeStageCodegen - HashAggregate [count,cd_purchase_estimate,count,cd_education_status,cd_credit_rating,cd_marital_status,cd_gender] [count,count] - Project [cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + HashAggregate [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count,count] [count,count] + Project [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_cdemo_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_current_cdemo_sk] BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] Filter [c_current_addr_sk,c_current_cdemo_sk] - Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_sold_date_sk] Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] [ss_sold_date_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [ws_bill_customer_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_sold_date_sk] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter @@ -45,20 +45,20 @@ TakeOrderedAndProject [cnt3,cnt1,cd_purchase_estimate,cd_education_status,cd_cre WholeStageCodegen Project [cs_ship_customer_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ship_customer_sk] + Project [cs_ship_customer_sk,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_customer_sk] [cs_sold_date_sk,cs_ship_customer_sk] + Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] [cs_ship_customer_sk,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter BroadcastExchange #6 WholeStageCodegen Project [ca_address_sk] - Filter [ca_state,ca_address_sk] + Filter [ca_address_sk,ca_state] Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [cd_demo_sk,cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + Project [cd_credit_rating,cd_demo_sk,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] [cd_demo_sk,cd_marital_status,cd_purchase_estimate,cd_gender,cd_credit_rating,cd_education_status] + Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [cd_credit_rating,cd_demo_sk,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt index b72978a57..2cf9df4fd 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt @@ -1,42 +1,42 @@ -TakeOrderedAndProject [agg2,i_item_id,agg1,agg4,agg3] +TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] WholeStageCodegen - HashAggregate [i_item_id,count,sum,count,avg(UnscaledValue(ss_sales_price)),sum,avg(cast(ss_quantity as bigint)),sum,sum,avg(UnscaledValue(ss_coupon_amt)),count,count,avg(UnscaledValue(ss_list_price))] [agg2,agg1,count,sum,count,avg(UnscaledValue(ss_sales_price)),sum,agg4,avg(cast(ss_quantity as bigint)),sum,sum,avg(UnscaledValue(ss_coupon_amt)),count,agg3,count,avg(UnscaledValue(ss_list_price))] + HashAggregate [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] InputAdapter Exchange [i_item_id] #1 WholeStageCodegen - HashAggregate [count,i_item_id,count,count,sum,count,sum,count,sum,sum,sum,ss_coupon_amt,count,ss_sales_price,sum,sum,ss_list_price,sum,count,ss_quantity,count] [count,count,count,sum,count,sum,count,sum,sum,sum,count,sum,sum,sum,count,count] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_sales_price,i_item_id] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_sales_price,ss_promo_sk,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_promo_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_promo_sk] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] - Filter [ss_cdemo_sk,ss_sold_date_sk,ss_item_sk,ss_promo_sk] - Scan parquet default.store_sales [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] [ss_coupon_amt,ss_quantity,ss_list_price,ss_item_sk,ss_sales_price,ss_cdemo_sk,ss_sold_date_sk,ss_promo_sk] + HashAggregate [count,count,count,count,count,count,count,count,i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [cd_demo_sk] - Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [p_promo_sk] Filter [p_channel_email,p_channel_event,p_promo_sk] - Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event] [p_promo_sk,p_channel_email,p_channel_event] + Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] [p_channel_email,p_channel_event,p_promo_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt index 2ef724de4..d2c001785 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt @@ -1,39 +1,39 @@ -TakeOrderedAndProject [total_sum,lochierarchy,s_state,s_county,rank_within_parent] +TakeOrderedAndProject [lochierarchy,rank_within_parent,s_county,s_state,total_sum] WholeStageCodegen - Project [total_sum,lochierarchy,s_state,s_county,rank_within_parent] + Project [lochierarchy,rank_within_parent,s_county,s_state,total_sum] InputAdapter - Window [_w3,_w1,_w2] + Window [_w1,_w2,_w3] WholeStageCodegen Sort [_w1,_w2,_w3] InputAdapter Exchange [_w1,_w2] #1 WholeStageCodegen - HashAggregate [sum,s_state,s_county,spark_grouping_id,sum(UnscaledValue(ss_net_profit))] [total_sum,sum,lochierarchy,_w3,sum(UnscaledValue(ss_net_profit)),_w2,_w1] + HashAggregate [s_county,s_state,spark_grouping_id,sum,sum(UnscaledValue(ss_net_profit))] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ss_net_profit)),total_sum] InputAdapter - Exchange [s_state,s_county,spark_grouping_id] #2 + Exchange [s_county,s_state,spark_grouping_id] #2 WholeStageCodegen - HashAggregate [sum,s_state,s_county,spark_grouping_id,sum,ss_net_profit] [sum,sum] - Expand [ss_net_profit,s_state,s_county] - Project [ss_net_profit,s_state,s_county] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_store_sk,ss_net_profit] + HashAggregate [s_county,s_state,spark_grouping_id,ss_net_profit,sum,sum] [sum,sum] + Expand [s_county,s_state,ss_net_profit] + Project [s_county,s_state,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_net_profit] + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 WholeStageCodegen BroadcastHashJoin [s_state,s_state] - Project [s_store_sk,s_county,s_state] + Project [s_county,s_state,s_store_sk] Filter [s_store_sk] - Scan parquet default.store [s_store_sk,s_county,s_state] [s_store_sk,s_county,s_state] + Scan parquet default.store [s_county,s_state,s_store_sk] [s_county,s_state,s_store_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen @@ -42,24 +42,24 @@ TakeOrderedAndProject [total_sum,lochierarchy,s_state,s_county,rank_within_paren InputAdapter Window [_w2,s_state] WholeStageCodegen - Sort [s_state,_w2] - HashAggregate [s_state,sum,sum(UnscaledValue(ss_net_profit))] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum] + Sort [_w2,s_state] + HashAggregate [s_state,sum,sum(UnscaledValue(ss_net_profit))] [_w2,s_state,sum,sum(UnscaledValue(ss_net_profit))] InputAdapter Exchange [s_state] #6 WholeStageCodegen HashAggregate [s_state,ss_net_profit,sum,sum] [sum,sum] - Project [ss_net_profit,s_state] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_net_profit,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_sold_date_sk,ss_store_sk,ss_net_profit] - Filter [ss_store_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_net_profit] + Project [s_state,ss_net_profit] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [s_state,ss_net_profit,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [s_store_sk,s_state] + Project [s_state,s_store_sk] Filter [s_store_sk] - Scan parquet default.store [s_store_sk,s_state] [s_store_sk,s_state] + Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt index 0153205a5..c905e2e5d 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt @@ -1,56 +1,56 @@ WholeStageCodegen - Sort [ext_price,brand_id] + Sort [brand_id,ext_price] InputAdapter - Exchange [ext_price,brand_id] #1 + Exchange [brand_id,ext_price] #1 WholeStageCodegen - HashAggregate [sum(UnscaledValue(ext_price)),sum,t_minute,t_hour,i_brand,i_brand_id] [sum(UnscaledValue(ext_price)),sum,brand,ext_price,brand_id] + HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ext_price)),t_hour,t_minute] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ext_price))] InputAdapter Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 WholeStageCodegen - HashAggregate [ext_price,sum,t_minute,t_hour,i_brand,i_brand_id,sum] [sum,sum] - Project [ext_price,t_minute,i_brand_id,i_brand,t_hour] - BroadcastHashJoin [time_sk,t_time_sk] - Project [i_brand_id,i_brand,ext_price,time_sk] + HashAggregate [ext_price,i_brand,i_brand_id,sum,sum,t_hour,t_minute] [sum,sum] + Project [ext_price,i_brand,i_brand_id,t_hour,t_minute] + BroadcastHashJoin [t_time_sk,time_sk] + Project [ext_price,i_brand,i_brand_id,time_sk] BroadcastHashJoin [i_item_sk,sold_item_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [i_item_sk,i_brand_id,i_brand] - Filter [i_manager_id,i_item_sk] - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] [i_item_sk,i_brand_id,i_brand,i_manager_id] + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] InputAdapter Union WholeStageCodegen Project [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_sold_time_sk,ws_item_sk,ws_ext_sales_price] - Filter [ws_sold_date_sk,ws_item_sk,ws_sold_time_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_sold_time_sk,ws_item_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_sold_time_sk,ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] + Filter [ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] WholeStageCodegen Project [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_sold_time_sk,cs_item_sk,cs_ext_sales_price] - Filter [cs_sold_date_sk,cs_item_sk,cs_sold_time_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_sold_time_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_sold_time_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] + Filter [cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #4 WholeStageCodegen Project [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_ext_sales_price] - Filter [ss_sold_date_sk,ss_item_sk,ss_sold_time_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #4 InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [t_time_sk,t_hour,t_minute] + Project [t_hour,t_minute,t_time_sk] Filter [t_meal_time,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time] [t_time_sk,t_hour,t_minute,t_meal_time] + Scan parquet default.time_dim [t_hour,t_meal_time,t_minute,t_time_sk] [t_hour,t_meal_time,t_minute,t_time_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt index ee1925612..5a0b8c54a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt @@ -1,69 +1,69 @@ -TakeOrderedAndProject [i_item_desc,w_warehouse_name,promo,no_promo,d_week_seq,total_cnt] +TakeOrderedAndProject [d_week_seq,i_item_desc,no_promo,promo,total_cnt,w_warehouse_name] WholeStageCodegen - HashAggregate [i_item_desc,count(1),w_warehouse_name,d_week_seq,count] [count(1),promo,no_promo,count,total_cnt] + HashAggregate [count,count(1),d_week_seq,i_item_desc,w_warehouse_name] [count,count(1),no_promo,promo,total_cnt] InputAdapter - Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + Exchange [d_week_seq,i_item_desc,w_warehouse_name] #1 WholeStageCodegen - HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count,count] [count,count] - Project [w_warehouse_name,i_item_desc,d_week_seq] - BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] - Project [d_week_seq,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] + HashAggregate [count,count,d_week_seq,i_item_desc,w_warehouse_name] [count,count] + Project [d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_order_number,d_week_seq,i_item_desc,w_warehouse_name] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [d_week_seq,cs_promo_sk,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] - BroadcastHashJoin [cs_ship_date_sk,d_date_sk,d_date,d_date] - Project [d_date,d_week_seq,cs_promo_sk,cs_ship_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,i_item_desc] - BroadcastHashJoin [d_week_seq,inv_date_sk,d_week_seq,d_date_sk] - Project [d_date,d_week_seq,cs_promo_sk,cs_ship_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc] + Project [cs_item_sk,cs_order_number,cs_promo_sk,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cs_ship_date_sk,d_date,d_date,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [d_date_sk,d_week_seq,d_week_seq,inv_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,inv_date_sk,w_warehouse_name] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] - Project [cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc,cs_bill_hdemo_sk] - BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,i_item_desc,cs_bill_hdemo_sk] + Project [cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,w_warehouse_name,inv_date_sk,cs_bill_hdemo_sk] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [cs_promo_sk,cs_bill_cdemo_sk,cs_ship_date_sk,inv_warehouse_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,inv_date_sk,cs_bill_hdemo_sk] - BroadcastHashJoin [cs_item_sk,inv_item_sk,inv_quantity_on_hand,cs_quantity] - Project [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] - Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk] - Scan parquet default.catalog_sales [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] [cs_promo_sk,cs_bill_cdemo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_bill_hdemo_sk] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,inv_warehouse_sk] + BroadcastHashJoin [cs_item_sk,cs_quantity,inv_item_sk,inv_quantity_on_hand] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + Filter [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] - Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [w_warehouse_sk,w_warehouse_name] + Project [w_warehouse_name,w_warehouse_sk] Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_item_sk,i_item_desc] + Project [i_item_desc,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_item_desc] [i_item_sk,i_item_desc] + Scan parquet default.item [i_item_desc,i_item_sk] [i_item_desc,i_item_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [cd_demo_sk] - Filter [cd_marital_status,cd_demo_sk] + Filter [cd_demo_sk,cd_marital_status] Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [hd_demo_sk] Filter [hd_buy_potential,hd_demo_sk] - Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] [hd_demo_sk,hd_buy_potential] + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] [hd_buy_potential,hd_demo_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [d_date_sk,d_date,d_week_seq] - Filter [d_year,d_date_sk,d_week_seq] - Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year] [d_date_sk,d_date,d_week_seq,d_year] + Project [d_date,d_date_sk,d_week_seq] + Filter [d_date_sk,d_week_seq,d_year] + Scan parquet default.date_dim [d_date,d_date_sk,d_week_seq,d_year] [d_date,d_date_sk,d_week_seq,d_year] InputAdapter BroadcastExchange #8 WholeStageCodegen @@ -73,9 +73,9 @@ TakeOrderedAndProject [i_item_desc,w_warehouse_name,promo,no_promo,d_week_seq,to InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [d_date_sk,d_date] + Project [d_date,d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #10 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt index f94077a9b..15bb10f43 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt @@ -3,44 +3,44 @@ WholeStageCodegen InputAdapter Exchange [cnt] #1 WholeStageCodegen - Project [ss_ticket_number,c_salutation,cnt,c_last_name,c_preferred_cust_flag,c_first_name] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] Filter [cnt] - HashAggregate [ss_ticket_number,ss_customer_sk,count,count(1)] [count(1),cnt,count] + HashAggregate [count,count(1),ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] InputAdapter - Exchange [ss_ticket_number,ss_customer_sk] #2 + Exchange [ss_customer_sk,ss_ticket_number] #2 WholeStageCodegen - HashAggregate [ss_ticket_number,ss_customer_sk,count,count] [count,count] + HashAggregate [count,count,ss_customer_sk,ss_ticket_number] [count,count] Project [ss_customer_sk,ss_ticket_number] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_store_sk,ss_customer_sk,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_dom,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_dom] [d_date_sk,d_year,d_dom] + Filter [d_date_sk,d_dom,d_year] + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] [d_date_sk,d_dom,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [s_store_sk] Filter [s_county,s_store_sk] - Scan parquet default.store [s_store_sk,s_county] [s_store_sk,s_county] + Scan parquet default.store [s_county,s_store_sk] [s_county,s_store_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] - Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] + Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] + Project [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] Filter [c_customer_sk] - Scan parquet default.customer [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] [c_preferred_cust_flag,c_customer_sk,c_last_name,c_first_name,c_salutation] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt index c3144e1d9..5167a92d5 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt @@ -1,88 +1,88 @@ -TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] +TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name] WholeStageCodegen - Project [customer_id,customer_first_name,customer_last_name] - BroadcastHashJoin [year_total,year_total,year_total,year_total,customer_id,customer_id] - Project [customer_first_name,year_total,year_total,customer_last_name,year_total,customer_id,customer_id] + Project [customer_first_name,customer_id,customer_last_name] + BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] + Project [customer_first_name,customer_id,customer_id,customer_last_name,year_total,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] BroadcastHashJoin [customer_id,customer_id] InputAdapter Union WholeStageCodegen Filter [year_total] - HashAggregate [d_year,sum(UnscaledValue(ss_net_paid)),c_customer_id,c_last_name,sum,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] [customer_id,sum,sum(UnscaledValue(ss_net_paid)),year_total] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 WholeStageCodegen - HashAggregate [d_year,sum,ss_net_paid,c_customer_id,c_last_name,sum,c_first_name] [sum,sum] - Project [c_customer_id,d_year,ss_net_paid,c_last_name,c_first_name] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,ss_net_paid,c_last_name,c_first_name,ss_sold_date_sk] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid,sum,sum] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [ss_sold_date_sk,ss_customer_sk,ss_net_paid] + Project [ss_customer_sk,ss_net_paid,ss_sold_date_sk] Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_net_paid] [ss_sold_date_sk,ss_customer_sk,ss_net_paid] + Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] [ss_customer_sk,ss_net_paid,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk,d_year] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] LocalTableScan [customer_id,year_total] [customer_id,year_total] InputAdapter BroadcastExchange #4 Union WholeStageCodegen - HashAggregate [d_year,sum(UnscaledValue(ss_net_paid)),sum,c_customer_id,c_last_name,c_first_name] [sum(UnscaledValue(ss_net_paid)),customer_first_name,sum,customer_last_name,year_total,customer_id] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] [customer_first_name,customer_id,customer_last_name,sum,sum(UnscaledValue(ss_net_paid)),year_total] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 WholeStageCodegen - HashAggregate [d_year,ss_net_paid,sum,c_customer_id,c_last_name,c_first_name,sum] [sum,sum] - Project [c_customer_id,d_year,ss_net_paid,c_last_name,c_first_name] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,ss_net_paid,c_last_name,c_first_name,ss_sold_date_sk] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid,sum,sum] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] InputAdapter - ReusedExchange [ss_sold_date_sk,ss_customer_sk,ss_net_paid] [ss_sold_date_sk,ss_customer_sk,ss_net_paid] #2 + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 InputAdapter BroadcastExchange #6 WholeStageCodegen Project [d_date_sk,d_year] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [customer_id,customer_first_name,customer_last_name,year_total] [customer_id,customer_first_name,customer_last_name,year_total] + LocalTableScan [customer_first_name,customer_id,customer_last_name,year_total] [customer_first_name,customer_id,customer_last_name,year_total] InputAdapter BroadcastExchange #7 Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen Filter [year_total] - HashAggregate [d_year,sum,sum(UnscaledValue(ws_net_paid)),c_customer_id,c_last_name,c_first_name] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 WholeStageCodegen - HashAggregate [d_year,sum,sum,ws_net_paid,c_customer_id,c_last_name,c_first_name] [sum,sum] - Project [c_customer_id,ws_net_paid,d_year,c_last_name,c_first_name] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_customer_id,ws_net_paid,c_last_name,ws_sold_date_sk,c_first_name] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] + Project [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 InputAdapter @@ -90,19 +90,19 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] Union LocalTableScan [customer_id,year_total] [customer_id,year_total] WholeStageCodegen - HashAggregate [d_year,sum(UnscaledValue(ws_net_paid)),c_customer_id,c_last_name,c_first_name,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 WholeStageCodegen - HashAggregate [d_year,ws_net_paid,c_customer_id,sum,c_last_name,c_first_name,sum] [sum,sum] - Project [c_customer_id,ws_net_paid,d_year,c_last_name,c_first_name] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_customer_id,ws_net_paid,c_last_name,ws_sold_date_sk,c_first_name] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_customer_sk,c_customer_id,c_first_name,c_last_name] - Filter [c_customer_sk,c_customer_id] - Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] InputAdapter - ReusedExchange [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] #9 + ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt index 656d3c077..8ea410541 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt @@ -1,167 +1,167 @@ -TakeOrderedAndProject [i_brand_id,year,sales_amt_diff,prev_yr_cnt,sales_cnt_diff,curr_yr_cnt,i_category_id,i_class_id,i_manufact_id,prev_year] +TakeOrderedAndProject [curr_yr_cnt,i_brand_id,i_category_id,i_class_id,i_manufact_id,prev_year,prev_yr_cnt,sales_amt_diff,sales_cnt_diff,year] WholeStageCodegen - Project [i_brand_id,sales_amt,d_year,sales_cnt,sales_cnt,d_year,sales_amt,i_category_id,i_class_id,i_manufact_id] - BroadcastHashJoin [i_brand_id,i_class_id,i_brand_id,sales_cnt,i_manufact_id,sales_cnt,i_category_id,i_category_id,i_class_id,i_manufact_id] - HashAggregate [sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint)),i_manufact_id,i_brand_id,i_class_id,i_category_id,sum,sum,d_year] [sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint)),sales_amt,sum,sum,sales_cnt] + Project [d_year,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_amt,sales_cnt,sales_cnt] + BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,i_manufact_id,i_manufact_id,sales_cnt,sales_cnt] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] InputAdapter - Exchange [i_manufact_id,i_brand_id,i_class_id,i_category_id,d_year] #1 + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #1 WholeStageCodegen - HashAggregate [sales_amt,i_manufact_id,i_brand_id,i_class_id,i_category_id,sum,sum,sales_cnt,sum,sum,d_year] [sum,sum,sum,sum] - HashAggregate [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt,sum,sum,sum,sum] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] InputAdapter - Exchange [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] #2 + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #2 WholeStageCodegen - HashAggregate [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] InputAdapter Union WholeStageCodegen - HashAggregate [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] InputAdapter - Exchange [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] #3 + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #3 WholeStageCodegen - HashAggregate [d_year,i_category_id,i_brand_id,i_manufact_id,sales_cnt,sales_amt,i_class_id] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] InputAdapter Union WholeStageCodegen - Project [d_year,cr_return_quantity,cr_return_amount,i_category_id,i_brand_id,i_manufact_id,cs_quantity,i_class_id,cs_ext_sales_price] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_quantity,i_manufact_id,d_year,i_category_id,cs_order_number,i_brand_id,cs_item_sk,i_class_id,cs_ext_sales_price] + Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,i_manufact_id,i_category_id,cs_sold_date_sk,cs_order_number,i_brand_id,cs_item_sk,i_class_id,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_manufact_id,i_category_id,i_item_sk,i_brand_id,i_class_id] - Filter [i_category,i_category_id,i_item_sk,i_brand_id,i_manufact_id,i_class_id] - Scan parquet default.item [i_manufact_id,i_category_id,i_item_sk,i_category,i_brand_id,i_class_id] [i_manufact_id,i_category_id,i_item_sk,i_category,i_brand_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] + Filter [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] + Scan parquet default.item [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk,d_year] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #6 WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Project [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] WholeStageCodegen - Project [d_year,sr_return_amt,sr_return_quantity,i_category_id,ss_ext_sales_price,i_brand_id,i_manufact_id,ss_quantity,i_class_id] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_quantity,ss_item_sk,i_manufact_id,d_year,i_category_id,ss_ext_sales_price,i_brand_id,i_class_id,ss_ticket_number] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,i_manufact_id,i_category_id,ss_ext_sales_price,i_brand_id,ss_sold_date_sk,i_class_id,ss_ticket_number] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] InputAdapter - ReusedExchange [i_manufact_id,i_category_id,i_item_sk,i_brand_id,i_class_id] [i_manufact_id,i_category_id,i_item_sk,i_brand_id,i_class_id] #4 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #5 InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] - Filter [sr_ticket_number,sr_item_sk] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + Project [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] WholeStageCodegen - Project [wr_return_quantity,d_year,ws_ext_sales_price,wr_return_amt,i_category_id,i_brand_id,i_manufact_id,i_class_id,ws_quantity] - BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - Project [i_manufact_id,d_year,ws_ext_sales_price,ws_quantity,ws_order_number,i_category_id,i_brand_id,i_class_id,ws_item_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [i_manufact_id,ws_ext_sales_price,ws_quantity,ws_order_number,i_category_id,ws_sold_date_sk,i_brand_id,i_class_id,ws_item_sk] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] InputAdapter - ReusedExchange [i_manufact_id,i_category_id,i_item_sk,i_brand_id,i_class_id] [i_manufact_id,i_category_id,i_item_sk,i_brand_id,i_class_id] #4 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #5 InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] Filter [wr_item_sk,wr_order_number] - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] InputAdapter BroadcastExchange #9 WholeStageCodegen - HashAggregate [sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint)),d_year,sum,i_class_id,i_category_id,i_manufact_id,i_brand_id,sum] [sum(UnscaledValue(sales_amt)),sales_amt,sum(cast(sales_cnt as bigint)),sales_cnt,sum,sum] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] InputAdapter - Exchange [d_year,i_class_id,i_category_id,i_manufact_id,i_brand_id] #10 + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #10 WholeStageCodegen - HashAggregate [sum,d_year,sales_amt,sum,sum,i_class_id,sales_cnt,i_category_id,i_manufact_id,i_brand_id,sum] [sum,sum,sum,sum] - HashAggregate [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt,sum,sum,sum,sum] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] InputAdapter - Exchange [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] #11 + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #11 WholeStageCodegen - HashAggregate [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] InputAdapter Union WholeStageCodegen - HashAggregate [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] InputAdapter - Exchange [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] #12 + Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #12 WholeStageCodegen - HashAggregate [i_manufact_id,d_year,i_category_id,sales_cnt,i_class_id,sales_amt,i_brand_id] + HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] InputAdapter Union WholeStageCodegen - Project [cr_return_quantity,i_manufact_id,cr_return_amount,d_year,i_category_id,i_class_id,cs_quantity,i_brand_id,cs_ext_sales_price] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_quantity,i_brand_id,i_manufact_id,i_category_id,d_year,cs_order_number,cs_item_sk,cs_ext_sales_price,i_class_id] + Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,i_brand_id,i_manufact_id,i_category_id,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price,i_class_id] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] [cs_quantity,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_ext_sales_price] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] InputAdapter - ReusedExchange [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] #4 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 InputAdapter BroadcastExchange #13 WholeStageCodegen Project [d_date_sk,d_year] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter - ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #6 + ReusedExchange [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] #6 WholeStageCodegen - Project [i_manufact_id,d_year,i_category_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity,i_class_id,i_brand_id] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_quantity,ss_item_sk,i_brand_id,i_manufact_id,i_category_id,d_year,ss_ext_sales_price,i_class_id,ss_ticket_number] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,i_brand_id,i_manufact_id,i_category_id,ss_ext_sales_price,ss_sold_date_sk,i_class_id,ss_ticket_number] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] [ss_quantity,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] InputAdapter - ReusedExchange [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] #4 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #13 InputAdapter - ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7 + ReusedExchange [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] #7 WholeStageCodegen - Project [wr_return_quantity,i_manufact_id,d_year,i_category_id,ws_ext_sales_price,wr_return_amt,i_class_id,i_brand_id,ws_quantity] - BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - Project [i_brand_id,ws_ext_sales_price,ws_quantity,i_manufact_id,i_category_id,d_year,ws_order_number,i_class_id,ws_item_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [i_brand_id,ws_ext_sales_price,ws_quantity,i_manufact_id,i_category_id,ws_order_number,ws_sold_date_sk,i_class_id,ws_item_sk] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] [ws_ext_sales_price,ws_quantity,ws_order_number,ws_sold_date_sk,ws_item_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] InputAdapter - ReusedExchange [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] [i_brand_id,i_item_sk,i_manufact_id,i_category_id,i_class_id] #4 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #13 InputAdapter - ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #8 + ReusedExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] #8 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt index e40976f52..5b7872ad7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt @@ -1,53 +1,53 @@ -TakeOrderedAndProject [d_year,channel,sales_cnt,col_name,d_qoy,i_category,sales_amt] +TakeOrderedAndProject [channel,col_name,d_qoy,d_year,i_category,sales_amt,sales_cnt] WholeStageCodegen - HashAggregate [d_year,channel,sum,sum(UnscaledValue(ext_sales_price)),count,count(1),col_name,d_qoy,i_category] [sum,sum(UnscaledValue(ext_sales_price)),sales_cnt,count,count(1),sales_amt] + HashAggregate [channel,col_name,count,count(1),d_qoy,d_year,i_category,sum,sum(UnscaledValue(ext_sales_price))] [count,count(1),sales_amt,sales_cnt,sum,sum(UnscaledValue(ext_sales_price))] InputAdapter - Exchange [d_year,channel,col_name,d_qoy,i_category] #1 + Exchange [channel,col_name,d_qoy,d_year,i_category] #1 WholeStageCodegen - HashAggregate [d_year,count,channel,sum,ext_sales_price,count,col_name,d_qoy,i_category,sum] [count,sum,count,sum] + HashAggregate [channel,col_name,count,count,d_qoy,d_year,ext_sales_price,i_category,sum,sum] [count,count,sum,sum] InputAdapter Union WholeStageCodegen - Project [d_year,d_qoy,ss_store_sk,i_category,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,i_category] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price] - Filter [ss_store_sk,ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price] + Project [d_qoy,d_year,i_category,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_category,ss_ext_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [i_item_sk,i_category] + Project [i_category,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_category] [i_item_sk,i_category] + Scan parquet default.item [i_category,i_item_sk] [i_category,i_item_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [d_date_sk,d_year,d_qoy] + Project [d_date_sk,d_qoy,d_year] Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] WholeStageCodegen - Project [d_year,d_qoy,ws_ship_customer_sk,ws_ext_sales_price,i_category] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_ship_customer_sk,ws_ext_sales_price,i_category] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price] - Filter [ws_ship_customer_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price] [ws_sold_date_sk,ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price] + Project [d_qoy,d_year,i_category,ws_ext_sales_price,ws_ship_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_category,ws_ext_sales_price,ws_ship_customer_sk,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_category] [i_item_sk,i_category] #2 + ReusedExchange [i_category,i_item_sk] [i_category,i_item_sk] #2 InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #3 + ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 WholeStageCodegen - Project [d_year,d_qoy,i_category,cs_ship_addr_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_ship_addr_sk,d_qoy,d_year,i_category] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ship_addr_sk,cs_ext_sales_price,i_category] + Project [cs_ext_sales_price,cs_ship_addr_sk,cs_sold_date_sk,i_category] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_sold_date_sk,cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price] - Filter [cs_ship_addr_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price] [cs_sold_date_sk,cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_category] [i_item_sk,i_category] #2 + ReusedExchange [i_category,i_item_sk] [i_category,i_item_sk] #2 InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] #3 + ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt index 1c29af5ef..ef07e38e8 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt @@ -1,34 +1,34 @@ -TakeOrderedAndProject [channel,profit,id,sales,returns] +TakeOrderedAndProject [channel,id,profit,returns,sales] WholeStageCodegen - HashAggregate [channel,sum,sum,id,sum(profit),spark_grouping_id,sum(sales),sum,sum(returns)] [sum,profit,sum,sum(profit),sales,sum(sales),sum,returns,sum(returns)] + HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] InputAdapter Exchange [channel,id,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [profit,channel,sum,sum,sales,sum,sum,id,sum,spark_grouping_id,returns,sum] [sum,sum,sum,sum,sum,sum] - Expand [profit,channel,sales,id,returns] + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] InputAdapter Union WholeStageCodegen - Project [profit,s_store_sk,sales,returns,profit_loss] + Project [profit,profit_loss,returns,s_store_sk,sales] BroadcastHashJoin [s_store_sk,s_store_sk] - HashAggregate [sum(UnscaledValue(ss_ext_sales_price)),sum,sum,s_store_sk,sum(UnscaledValue(ss_net_profit))] [sales,sum(UnscaledValue(ss_ext_sales_price)),sum,sum,sum(UnscaledValue(ss_net_profit)),profit] + HashAggregate [s_store_sk,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] [profit,sales,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] InputAdapter Exchange [s_store_sk] #2 WholeStageCodegen - HashAggregate [sum,sum,sum,sum,s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] - Project [ss_ext_sales_price,ss_net_profit,s_store_sk] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [s_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -38,24 +38,24 @@ TakeOrderedAndProject [channel,profit,id,sales,returns] InputAdapter BroadcastExchange #5 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),s_store_sk,sum] [sum,sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),profit_loss,returns,sum] + HashAggregate [s_store_sk,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] [profit_loss,returns,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] InputAdapter Exchange [s_store_sk] #6 WholeStageCodegen - HashAggregate [sum,sum,sr_return_amt,s_store_sk,sum,sum,sr_net_loss] [sum,sum,sum,sum] - Project [sr_return_amt,sr_net_loss,s_store_sk] - BroadcastHashJoin [sr_store_sk,s_store_sk] - Project [sr_store_sk,sr_return_amt,sr_net_loss] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + HashAggregate [s_store_sk,sr_net_loss,sr_return_amt,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [s_store_sk,sr_net_loss,sr_return_amt] + BroadcastHashJoin [s_store_sk,sr_store_sk] + Project [sr_net_loss,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] Filter [sr_returned_date_sk,sr_store_sk] - Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen @@ -63,51 +63,51 @@ TakeOrderedAndProject [channel,profit,id,sales,returns] Filter [s_store_sk] Scan parquet default.store [s_store_sk] [s_store_sk] WholeStageCodegen - Project [sales,returns,profit,cs_call_center_sk,profit_loss] + Project [cs_call_center_sk,profit,profit_loss,returns,sales] InputAdapter BroadcastNestedLoopJoin BroadcastExchange #9 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(cs_net_profit)),sum,sum(UnscaledValue(cs_ext_sales_price)),cs_call_center_sk] [sales,sum,profit,sum(UnscaledValue(cs_net_profit)),sum,sum(UnscaledValue(cs_ext_sales_price))] + HashAggregate [cs_call_center_sk,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] [profit,sales,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] InputAdapter Exchange [cs_call_center_sk] #10 WholeStageCodegen - HashAggregate [sum,sum,cs_net_profit,sum,cs_ext_sales_price,cs_call_center_sk,sum] [sum,sum,sum,sum] + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss))] [profit_loss,sum,returns,sum(UnscaledValue(cr_return_amount)),sum,sum(UnscaledValue(cr_net_loss))] + HashAggregate [sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] [profit_loss,returns,sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] InputAdapter Exchange #11 WholeStageCodegen - HashAggregate [cr_return_amount,sum,sum,sum,cr_net_loss,sum] [sum,sum,sum,sum] - Project [cr_return_amount,cr_net_loss] + HashAggregate [cr_net_loss,cr_return_amount,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [cr_net_loss,cr_return_amount] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_returned_date_sk,cr_return_amount,cr_net_loss] + Project [cr_net_loss,cr_return_amount,cr_returned_date_sk] Filter [cr_returned_date_sk] - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss] [cr_returned_date_sk,cr_return_amount,cr_net_loss] + Scan parquet default.catalog_returns [cr_net_loss,cr_return_amount,cr_returned_date_sk] [cr_net_loss,cr_return_amount,cr_returned_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 WholeStageCodegen - Project [profit,returns,sales,wp_web_page_sk,profit_loss] + Project [profit,profit_loss,returns,sales,wp_web_page_sk] BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] - HashAggregate [sum(UnscaledValue(ws_ext_sales_price)),sum,sum(UnscaledValue(ws_net_profit)),sum,wp_web_page_sk] [sum(UnscaledValue(ws_ext_sales_price)),profit,sales,sum,sum(UnscaledValue(ws_net_profit)),sum] + HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),wp_web_page_sk] [profit,sales,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] InputAdapter Exchange [wp_web_page_sk] #12 WholeStageCodegen - HashAggregate [sum,sum,ws_ext_sales_price,ws_net_profit,sum,sum,wp_web_page_sk] [sum,sum,sum,sum] - Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + HashAggregate [sum,sum,sum,sum,wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Project [ws_ext_sales_price,ws_net_profit,ws_web_page_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] Filter [ws_sold_date_sk,ws_web_page_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter @@ -119,18 +119,18 @@ TakeOrderedAndProject [channel,profit,id,sales,returns] InputAdapter BroadcastExchange #14 WholeStageCodegen - HashAggregate [sum,wp_web_page_sk,sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),sum] [sum,returns,sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),profit_loss,sum] + HashAggregate [sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt)),wp_web_page_sk] [profit_loss,returns,sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt))] InputAdapter Exchange [wp_web_page_sk] #15 WholeStageCodegen - HashAggregate [sum,wp_web_page_sk,wr_return_amt,sum,wr_net_loss,sum,sum] [sum,sum,sum,sum] - Project [wr_return_amt,wr_net_loss,wp_web_page_sk] - BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] - Project [wr_web_page_sk,wr_return_amt,wr_net_loss] - BroadcastHashJoin [wr_returned_date_sk,d_date_sk] - Project [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] + HashAggregate [sum,sum,sum,sum,wp_web_page_sk,wr_net_loss,wr_return_amt] [sum,sum,sum,sum] + Project [wp_web_page_sk,wr_net_loss,wr_return_amt] + BroadcastHashJoin [wp_web_page_sk,wr_web_page_sk] + Project [wr_net_loss,wr_return_amt,wr_web_page_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] Filter [wr_returned_date_sk,wr_web_page_sk] - Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] + Scan parquet default.web_returns [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #7 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt index 64b375073..fc8bc4f3f 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt @@ -1,56 +1,56 @@ -TakeOrderedAndProject [cs_qty,store_wholesale_cost,store_sales_price,ws_qty,ss_qty,ss_sp,other_chan_qty,store_qty,ratio,other_chan_sales_price,other_chan_wholesale_cost,ss_wc] +TakeOrderedAndProject [cs_qty,other_chan_qty,other_chan_sales_price,other_chan_wholesale_cost,ratio,ss_qty,ss_sp,ss_wc,store_qty,store_sales_price,store_wholesale_cost,ws_qty] WholeStageCodegen - Project [ws_wc,cs_qty,ws_qty,cs_sp,ss_qty,ss_sp,ws_sp,cs_wc,ss_wc] - BroadcastHashJoin [ss_item_sk,ss_customer_sk,cs_customer_sk,cs_item_sk,cs_sold_year,ss_sold_year] - Project [ss_customer_sk,ws_sp,ws_wc,ss_wc,ss_sold_year,ss_sp,ws_qty,ss_qty,ss_item_sk] - BroadcastHashJoin [ss_item_sk,ss_customer_sk,ws_sold_year,ws_item_sk,ws_customer_sk,ss_sold_year] - HashAggregate [d_year,ss_customer_sk,sum(UnscaledValue(ss_sales_price)),sum,sum,sum(cast(ss_quantity as bigint)),sum,sum(UnscaledValue(ss_wholesale_cost)),ss_item_sk] [sum(UnscaledValue(ss_sales_price)),sum,ss_wc,sum,sum(cast(ss_quantity as bigint)),ss_sold_year,ss_sp,ss_qty,sum,sum(UnscaledValue(ss_wholesale_cost))] + Project [cs_qty,cs_sp,cs_wc,ss_qty,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] + BroadcastHashJoin [cs_customer_sk,cs_item_sk,cs_sold_year,ss_customer_sk,ss_item_sk,ss_sold_year] + Project [ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_sold_year,ws_customer_sk,ws_item_sk,ws_sold_year] + HashAggregate [d_year,ss_customer_sk,ss_item_sk,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] [ss_qty,ss_sold_year,ss_sp,ss_wc,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] InputAdapter - Exchange [d_year,ss_item_sk,ss_customer_sk] #1 + Exchange [d_year,ss_customer_sk,ss_item_sk] #1 WholeStageCodegen - HashAggregate [d_year,sum,ss_wholesale_cost,ss_customer_sk,sum,sum,ss_sales_price,sum,sum,ss_quantity,sum,ss_item_sk] [sum,sum,sum,sum,sum,sum] - Project [ss_quantity,ss_item_sk,d_year,ss_customer_sk,ss_sales_price,ss_wholesale_cost] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] + HashAggregate [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] Filter [sr_ticket_number] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] - Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost,ss_ticket_number] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [sr_item_sk,sr_ticket_number] - Filter [sr_ticket_number,sr_item_sk] + Filter [sr_item_sk,sr_ticket_number] Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk,d_year] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #4 WholeStageCodegen Filter [ws_qty] - HashAggregate [d_year,sum(cast(ws_quantity as bigint)),ws_item_sk,sum(UnscaledValue(ws_sales_price)),ws_bill_customer_sk,sum,sum,sum(UnscaledValue(ws_wholesale_cost)),sum] [sum(cast(ws_quantity as bigint)),sum(UnscaledValue(ws_sales_price)),ws_sp,ws_wc,sum,ws_sold_year,sum,sum(UnscaledValue(ws_wholesale_cost)),sum,ws_customer_sk,ws_qty] + HashAggregate [d_year,sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_bill_customer_sk,ws_item_sk] [sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_customer_sk,ws_qty,ws_sold_year,ws_sp,ws_wc] InputAdapter - Exchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 + Exchange [d_year,ws_bill_customer_sk,ws_item_sk] #5 WholeStageCodegen - HashAggregate [d_year,ws_wholesale_cost,ws_item_sk,sum,ws_bill_customer_sk,sum,sum,sum,sum,sum,ws_sales_price,ws_quantity] [sum,sum,sum,sum,sum,sum] - Project [d_year,ws_quantity,ws_wholesale_cost,ws_bill_customer_sk,ws_sales_price,ws_item_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_quantity,ws_wholesale_cost,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + HashAggregate [d_year,sum,sum,sum,sum,sum,sum,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] Filter [wr_order_number] - BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - Project [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] - Filter [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] - Scan parquet default.web_sales [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] [ws_quantity,ws_wholesale_cost,ws_order_number,ws_sold_date_sk,ws_bill_customer_sk,ws_sales_price,ws_item_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [wr_item_sk,wr_order_number] - Filter [wr_order_number,wr_item_sk] + Filter [wr_item_sk,wr_order_number] Scan parquet default.web_returns [wr_item_sk,wr_order_number] [wr_item_sk,wr_order_number] InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 @@ -58,24 +58,24 @@ TakeOrderedAndProject [cs_qty,store_wholesale_cost,store_sales_price,ws_qty,ss_q BroadcastExchange #7 WholeStageCodegen Filter [cs_qty] - HashAggregate [d_year,sum,sum(UnscaledValue(cs_sales_price)),sum(cast(cs_quantity as bigint)),sum(UnscaledValue(cs_wholesale_cost)),cs_item_sk,cs_bill_customer_sk,sum,sum] [cs_qty,cs_wc,sum,sum(UnscaledValue(cs_sales_price)),sum(cast(cs_quantity as bigint)),cs_sold_year,cs_sp,sum(UnscaledValue(cs_wholesale_cost)),sum,cs_customer_sk,sum] + HashAggregate [cs_bill_customer_sk,cs_item_sk,d_year,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] [cs_customer_sk,cs_qty,cs_sold_year,cs_sp,cs_wc,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] InputAdapter - Exchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 + Exchange [cs_bill_customer_sk,cs_item_sk,d_year] #8 WholeStageCodegen - HashAggregate [d_year,sum,sum,sum,cs_item_sk,cs_bill_customer_sk,sum,sum,cs_sales_price,cs_quantity,sum,cs_wholesale_cost] [sum,sum,sum,sum,sum,sum] - Project [cs_wholesale_cost,cs_quantity,d_year,cs_bill_customer_sk,cs_sales_price,cs_item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] Filter [cr_order_number] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] - Filter [cs_sold_date_sk,cs_item_sk,cs_bill_customer_sk] - Scan parquet default.catalog_sales [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] [cs_wholesale_cost,cs_quantity,cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk,cs_order_number,cs_item_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] InputAdapter BroadcastExchange #9 WholeStageCodegen Project [cr_item_sk,cr_order_number] - Filter [cr_order_number,cr_item_sk] + Filter [cr_item_sk,cr_order_number] Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] [cr_item_sk,cr_order_number] InputAdapter ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt index cee3e4c79..01ce92730 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt @@ -1,38 +1,38 @@ -TakeOrderedAndProject [profit,amt,substring(s_city, 1, 30),s_city,c_last_name,ss_ticket_number,c_first_name] +TakeOrderedAndProject [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number,substring(s_city, 1, 30)] WholeStageCodegen - Project [profit,amt,s_city,c_last_name,ss_ticket_number,c_first_name] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - HashAggregate [sum(UnscaledValue(ss_net_profit)),ss_customer_sk,sum(UnscaledValue(ss_coupon_amt)),sum,sum,s_city,ss_ticket_number,ss_addr_sk] [amt,sum(UnscaledValue(ss_net_profit)),profit,sum(UnscaledValue(ss_coupon_amt)),sum,sum] + Project [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] [amt,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] InputAdapter - Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city] #1 + Exchange [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 WholeStageCodegen - HashAggregate [sum,ss_customer_sk,ss_coupon_amt,sum,sum,sum,s_city,ss_ticket_number,ss_addr_sk,ss_net_profit] [sum,sum,sum,sum] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,s_city,ss_ticket_number] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,s_city,ss_hdemo_sk,ss_ticket_number] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_hdemo_sk,ss_ticket_number] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_store_sk,ss_customer_sk,ss_net_profit,ss_sold_date_sk,ss_hdemo_sk,ss_ticket_number] + HashAggregate [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [d_date_sk] - Filter [d_dow,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_dow] [d_date_sk,d_year,d_dow] + Filter [d_date_sk,d_dow,d_year] + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] [d_date_sk,d_dow,d_year] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [s_store_sk,s_city] + Project [s_city,s_store_sk] Filter [s_number_employees,s_store_sk] - Scan parquet default.store [s_store_sk,s_number_employees,s_city] [s_store_sk,s_number_employees,s_city] + Scan parquet default.store [s_city,s_number_employees,s_store_sk] [s_city,s_number_employees,s_store_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt index 8ae1eb34e..482275fd7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt @@ -1,7 +1,7 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST], output=[s_store_name#1,sum(ss_net_profit)#2]) +- *(9) HashAggregate(keys=[s_store_name#1], functions=[sum(UnscaledValue(ss_net_profit#3))]) - +- Exchange hashpartitioning(s_store_name#1, 200) + +- Exchange hashpartitioning(s_store_name#1, 5) +- *(8) HashAggregate(keys=[s_store_name#1], functions=[partial_sum(UnscaledValue(ss_net_profit#3))]) +- *(8) Project [ss_net_profit#3, s_store_name#1] +- *(8) BroadcastHashJoin [substring(s_zip#4, 1, 2)], [substring(ca_zip#5, 1, 2)], Inner, BuildRight @@ -22,7 +22,7 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST], outpu : +- *(2) FileScan parquet default.store[s_store_sk#7,s_store_name#1,s_zip#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(substring(input[0, string, true], 1, 2))) +- *(7) HashAggregate(keys=[ca_zip#5], functions=[]) - +- Exchange hashpartitioning(ca_zip#5, 200) + +- Exchange hashpartitioning(ca_zip#5, 5) +- *(6) HashAggregate(keys=[ca_zip#5], functions=[]) +- *(6) BroadcastHashJoin [coalesce(ca_zip#5, )], [coalesce(ca_zip#12, )], LeftSemi, BuildRight, (ca_zip#5 <=> ca_zip#12) :- *(6) Project [substring(ca_zip#13, 1, 5) AS ca_zip#5] @@ -32,7 +32,7 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST], outpu +- *(5) Project [ca_zip#12] +- *(5) Filter (count(1)#14 > 10) +- *(5) HashAggregate(keys=[ca_zip#13], functions=[count(1)]) - +- Exchange hashpartitioning(ca_zip#13, 200) + +- Exchange hashpartitioning(ca_zip#13, 5) +- *(4) HashAggregate(keys=[ca_zip#13], functions=[partial_count(1)]) +- *(4) Project [ca_zip#13] +- *(4) BroadcastHashJoin [ca_address_sk#15], [c_current_addr_sk#16], Inner, BuildRight diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt index 855f6d67d..fd00e42e4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt @@ -1,31 +1,31 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] WholeStageCodegen - HashAggregate [s_store_name,sum,sum(UnscaledValue(ss_net_profit))] [sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit),sum] + HashAggregate [s_store_name,sum,sum(UnscaledValue(ss_net_profit))] [sum,sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit)] InputAdapter Exchange [s_store_name] #1 WholeStageCodegen HashAggregate [s_store_name,ss_net_profit,sum,sum] [sum,sum] - Project [ss_net_profit,s_store_name] - BroadcastHashJoin [s_zip,ca_zip] - Project [ss_net_profit,s_store_name,s_zip] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_store_sk,ss_net_profit] + Project [s_store_name,ss_net_profit] + BroadcastHashJoin [ca_zip,s_zip] + Project [s_store_name,s_zip,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] [ss_sold_date_sk,ss_store_sk,ss_net_profit] + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [d_date_sk] - Filter [d_qoy,d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] [d_date_sk,d_year,d_qoy] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [s_store_sk,s_store_name,s_zip] + Project [s_store_name,s_store_sk,s_zip] Filter [s_store_sk,s_zip] - Scan parquet default.store [s_store_sk,s_store_name,s_zip] [s_store_sk,s_store_name,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -43,13 +43,13 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] WholeStageCodegen Project [ca_zip] Filter [count(1)] - HashAggregate [ca_zip,count,count(1)] [count(1),ca_zip,count(1),count] + HashAggregate [ca_zip,count,count(1)] [ca_zip,count,count(1),count(1)] InputAdapter Exchange [ca_zip] #7 WholeStageCodegen HashAggregate [ca_zip,count,count] [count,count] Project [ca_zip] - BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] Project [ca_address_sk,ca_zip] Filter [ca_address_sk] Scan parquet default.customer_address [ca_address_sk,ca_zip] [ca_address_sk,ca_zip] @@ -57,5 +57,5 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] BroadcastExchange #8 WholeStageCodegen Project [c_current_addr_sk] - Filter [c_preferred_cust_flag,c_current_addr_sk] + Filter [c_current_addr_sk,c_preferred_cust_flag] Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag] [c_current_addr_sk,c_preferred_cust_flag] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt index e16daec1c..28b1a009b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt @@ -1,132 +1,132 @@ -TakeOrderedAndProject [profit,sales,id,channel,returns] +TakeOrderedAndProject [channel,id,profit,returns,sales] WholeStageCodegen - HashAggregate [sum,sum(sales),sum(returns),spark_grouping_id,id,channel,sum(profit),sum,sum] [profit,sum,sales,sum(sales),sum(returns),returns,sum(profit),sum,sum] + HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] InputAdapter Exchange [channel,id,spark_grouping_id] #1 WholeStageCodegen - HashAggregate [profit,sum,sum,returns,spark_grouping_id,sum,id,channel,sales,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Expand [profit,id,returns,sales,channel] + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] InputAdapter Union WholeStageCodegen - HashAggregate [sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),s_store_id,sum,sum(UnscaledValue(ss_ext_sales_price)),sum] [id,sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sales,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),profit,sum,returns,sum(UnscaledValue(ss_ext_sales_price)),channel,sum] + HashAggregate [s_store_id,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] InputAdapter Exchange [s_store_id] #2 WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sr_return_amt,s_store_id,ss_ext_sales_price,sum,sr_net_loss,ss_net_profit,sum] [sum,sum,sum,sum,sum,sum] - Project [s_store_id,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [s_store_id,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_id,ss_item_sk,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_promo_sk,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] - Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk] - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_promo_sk,ss_ticket_number] + HashAggregate [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_promo_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + Project [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + Scan parquet default.store_returns [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [s_store_sk,s_store_id] + Project [s_store_id,s_store_sk] Filter [s_store_sk] - Scan parquet default.store [s_store_sk,s_store_id] [s_store_sk,s_store_id] + Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [i_item_sk] Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_item_sk,i_current_price] [i_item_sk,i_current_price] + Scan parquet default.item [i_current_price,i_item_sk] [i_current_price,i_item_sk] InputAdapter BroadcastExchange #7 WholeStageCodegen Project [p_promo_sk] Filter [p_channel_tv,p_promo_sk] - Scan parquet default.promotion [p_promo_sk,p_channel_tv] [p_promo_sk,p_channel_tv] + Scan parquet default.promotion [p_channel_tv,p_promo_sk] [p_channel_tv,p_promo_sk] WholeStageCodegen - HashAggregate [sum(UnscaledValue(cs_ext_sales_price)),cp_catalog_page_id,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] [sum(UnscaledValue(cs_ext_sales_price)),sales,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,sum,sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),returns,id,profit] + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] InputAdapter Exchange [cp_catalog_page_id] #8 WholeStageCodegen - HashAggregate [cp_catalog_page_id,sum,sum,sum,sum,cr_return_amount,sum,cs_net_profit,sum,cr_net_loss,cs_ext_sales_price] [sum,sum,sum,sum,sum,sum] - Project [cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_net_profit,cs_ext_sales_price] + HashAggregate [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_promo_sk,cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_net_profit,cs_ext_sales_price] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit,cs_promo_sk] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_promo_sk,cr_net_loss,cp_catalog_page_id,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] - BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] - Project [cs_promo_sk,cr_net_loss,cs_catalog_page_sk,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] + BroadcastHashJoin [cp_catalog_page_sk,cs_catalog_page_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_promo_sk,cr_net_loss,cs_catalog_page_sk,cs_sold_date_sk,cr_return_amount,cs_item_sk,cs_net_profit,cs_ext_sales_price] - BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] - Project [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] - Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk] - Scan parquet default.catalog_sales [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] [cs_promo_sk,cs_catalog_page_sk,cs_sold_date_sk,cs_order_number,cs_item_sk,cs_net_profit,cs_ext_sales_price] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk,cs_sold_date_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] + Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #9 WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + Project [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + Scan parquet default.catalog_returns [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #4 InputAdapter BroadcastExchange #10 WholeStageCodegen - Project [cp_catalog_page_sk,cp_catalog_page_id] + Project [cp_catalog_page_id,cp_catalog_page_sk] Filter [cp_catalog_page_sk] - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] [cp_catalog_page_sk,cp_catalog_page_id] + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] [cp_catalog_page_id,cp_catalog_page_sk] InputAdapter ReusedExchange [i_item_sk] [i_item_sk] #6 InputAdapter ReusedExchange [p_promo_sk] [p_promo_sk] #7 WholeStageCodegen - HashAggregate [web_site_id,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] [sum,sales,id,returns,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),profit,sum,sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),channel] + HashAggregate [sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),web_site_id] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] InputAdapter Exchange [web_site_id] #11 WholeStageCodegen - HashAggregate [web_site_id,sum,sum,sum,sum,ws_ext_sales_price,ws_net_profit,sum,wr_return_amt,wr_net_loss,sum] [sum,sum,sum,sum,sum,sum] - Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss] - BroadcastHashJoin [ws_promo_sk,p_promo_sk] - Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [web_site_id,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk,ws_item_sk] - BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_web_site_sk,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_promo_sk,ws_item_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_web_site_sk,wr_return_amt,ws_ext_sales_price,ws_net_profit,wr_net_loss,ws_sold_date_sk,ws_promo_sk,ws_item_sk] - BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] - Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] - Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk] - Scan parquet default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_promo_sk,ws_item_sk] + HashAggregate [sum,sum,sum,sum,sum,sum,web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum,sum,sum] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [p_promo_sk,ws_promo_sk] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_promo_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + Filter [ws_item_sk,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] InputAdapter BroadcastExchange #12 WholeStageCodegen - Project [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + Project [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] Filter [wr_item_sk,wr_order_number] - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #4 InputAdapter BroadcastExchange #13 WholeStageCodegen - Project [web_site_sk,web_site_id] + Project [web_site_id,web_site_sk] Filter [web_site_sk] - Scan parquet default.web_site [web_site_sk,web_site_id] [web_site_sk,web_site_id] + Scan parquet default.web_site [web_site_id,web_site_sk] [web_site_id,web_site_sk] InputAdapter ReusedExchange [i_item_sk] [i_item_sk] #6 InputAdapter diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt index 8c75e6308..f42772616 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt @@ -1,29 +1,29 @@ -TakeOrderedAndProject [ca_suite_number,ca_street_name,ca_city,ca_county,ctr_total_return,ca_street_number,c_salutation,ca_country,ca_zip,ca_street_type,ca_gmt_offset,c_customer_id,c_last_name,ca_location_type,c_first_name,ca_state] +TakeOrderedAndProject [c_customer_id,c_first_name,c_last_name,c_salutation,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip,ctr_total_return] WholeStageCodegen - Project [ca_suite_number,ca_street_name,ca_city,ca_county,ctr_total_return,ca_street_number,c_salutation,ca_country,ca_zip,ca_street_type,ca_gmt_offset,c_customer_id,c_last_name,ca_location_type,c_first_name,ca_state] + Project [c_customer_id,c_first_name,c_last_name,c_salutation,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip,ctr_total_return] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_customer_id,c_last_name,c_first_name,c_salutation,ctr_total_return] - BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [c_current_addr_sk,c_customer_id,c_first_name,c_last_name,c_salutation,ctr_total_return] + BroadcastHashJoin [c_customer_sk,ctr_customer_sk] Project [ctr_customer_sk,ctr_total_return] - BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,ctr_state,ctr_total_return] Filter [ctr_total_return] - HashAggregate [cr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] [sum,sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_customer_sk,ctr_total_return] + HashAggregate [ca_state,cr_returning_customer_sk,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] [ctr_customer_sk,ctr_state,ctr_total_return,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] InputAdapter - Exchange [cr_returning_customer_sk,ca_state] #1 + Exchange [ca_state,cr_returning_customer_sk] #1 WholeStageCodegen - HashAggregate [sum,cr_returning_customer_sk,sum,cr_return_amt_inc_tax,ca_state] [sum,sum] - Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] - BroadcastHashJoin [cr_returning_addr_sk,ca_address_sk] - Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + HashAggregate [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk,sum,sum] [sum,sum] + Project [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] + BroadcastHashJoin [ca_address_sk,cr_returning_addr_sk] + Project [cr_return_amt_inc_tax,cr_returning_addr_sk,cr_returning_customer_sk] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + Project [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] Filter [cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + Scan parquet default.catalog_returns [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #3 @@ -35,23 +35,23 @@ TakeOrderedAndProject [ca_suite_number,ca_street_name,ca_city,ca_county,ctr_tota BroadcastExchange #4 WholeStageCodegen Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [ctr_state,sum,count,avg(ctr_total_return)] [count,avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),sum,ctr_state] + HashAggregate [avg(ctr_total_return),count,ctr_state,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_state,sum] InputAdapter Exchange [ctr_state] #5 WholeStageCodegen - HashAggregate [count,ctr_state,sum,count,sum,ctr_total_return] [sum,count,sum,count] - HashAggregate [cr_returning_customer_sk,ca_state,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_total_return,sum] + HashAggregate [count,count,ctr_state,ctr_total_return,sum,sum] [count,count,sum,sum] + HashAggregate [ca_state,cr_returning_customer_sk,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] [ctr_state,ctr_total_return,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] InputAdapter - Exchange [cr_returning_customer_sk,ca_state] #6 + Exchange [ca_state,cr_returning_customer_sk] #6 WholeStageCodegen - HashAggregate [cr_returning_customer_sk,sum,cr_return_amt_inc_tax,sum,ca_state] [sum,sum] - Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] - BroadcastHashJoin [cr_returning_addr_sk,ca_address_sk] - Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + HashAggregate [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk,sum,sum] [sum,sum] + Project [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] + BroadcastHashJoin [ca_address_sk,cr_returning_addr_sk] + Project [cr_return_amt_inc_tax,cr_returning_addr_sk,cr_returning_customer_sk] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + Project [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] Filter [cr_returned_date_sk,cr_returning_addr_sk] - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] + Scan parquet default.catalog_returns [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #2 InputAdapter @@ -59,12 +59,12 @@ TakeOrderedAndProject [ca_suite_number,ca_street_name,ca_city,ca_county,ctr_tota InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [c_current_addr_sk,c_customer_id,c_customer_sk,c_last_name,c_first_name,c_salutation] - Filter [c_customer_sk,c_current_addr_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_id,c_customer_sk,c_last_name,c_first_name,c_salutation] [c_current_addr_sk,c_customer_id,c_customer_sk,c_last_name,c_first_name,c_salutation] + Project [c_current_addr_sk,c_customer_id,c_customer_sk,c_first_name,c_last_name,c_salutation] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_id,c_customer_sk,c_first_name,c_last_name,c_salutation] [c_current_addr_sk,c_customer_id,c_customer_sk,c_first_name,c_last_name,c_salutation] InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [ca_state,ca_zip,ca_street_type,ca_location_type,ca_city,ca_address_sk,ca_county,ca_gmt_offset,ca_street_number,ca_country,ca_suite_number,ca_street_name] - Filter [ca_state,ca_address_sk] - Scan parquet default.customer_address [ca_state,ca_zip,ca_street_type,ca_location_type,ca_city,ca_address_sk,ca_county,ca_gmt_offset,ca_street_number,ca_country,ca_suite_number,ca_street_name] [ca_state,ca_zip,ca_street_type,ca_location_type,ca_city,ca_address_sk,ca_county,ca_gmt_offset,ca_street_number,ca_country,ca_suite_number,ca_street_name] + Project [ca_address_sk,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip] + Filter [ca_address_sk,ca_state] + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip] [ca_address_sk,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt index e7c48e44c..e0e54c6c0 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt @@ -1,31 +1,31 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] +TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,i_current_price] + HashAggregate [i_current_price,i_item_desc,i_item_id] InputAdapter - Exchange [i_item_id,i_item_desc,i_current_price] #1 + Exchange [i_current_price,i_item_desc,i_item_id] #1 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,i_current_price] - Project [i_item_id,i_item_desc,i_current_price] + HashAggregate [i_current_price,i_item_desc,i_item_id] + Project [i_current_price,i_item_desc,i_item_id] BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_item_sk,i_item_id,i_item_desc,i_current_price] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [i_current_price,i_item_sk,inv_date_sk,i_item_desc,i_item_id] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [i_item_sk,i_item_id,i_item_desc,i_current_price] - Filter [i_current_price,i_manufact_id,i_item_sk] - Scan parquet default.item [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] [i_current_price,i_manufact_id,i_item_sk,i_item_desc,i_item_id] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk,i_manufact_id] + Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [inv_date_sk,inv_item_sk] - Filter [inv_quantity_on_hand,inv_item_sk,inv_date_sk] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt index 027dc649e..94ffc5da9 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt @@ -1,35 +1,35 @@ -TakeOrderedAndProject [wr_item_qty,cr_item_qty,wr_dev,sr_dev,sr_item_qty,average,item_id,cr_dev] +TakeOrderedAndProject [average,cr_dev,cr_item_qty,item_id,sr_dev,sr_item_qty,wr_dev,wr_item_qty] WholeStageCodegen - Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty] + Project [cr_item_qty,item_id,sr_item_qty,wr_item_qty] BroadcastHashJoin [item_id,item_id] - Project [item_id,sr_item_qty,cr_item_qty] + Project [cr_item_qty,item_id,sr_item_qty] BroadcastHashJoin [item_id,item_id] - HashAggregate [i_item_id,sum,sum(cast(sr_return_quantity as bigint))] [sum(cast(sr_return_quantity as bigint)),item_id,sr_item_qty,sum] + HashAggregate [i_item_id,sum,sum(cast(sr_return_quantity as bigint))] [item_id,sr_item_qty,sum,sum(cast(sr_return_quantity as bigint))] InputAdapter Exchange [i_item_id] #1 WholeStageCodegen HashAggregate [i_item_id,sr_return_quantity,sum,sum] [sum,sum] - Project [sr_return_quantity,i_item_id] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [sr_returned_date_sk,sr_return_quantity,i_item_id] - BroadcastHashJoin [sr_item_sk,i_item_sk] - Project [sr_returned_date_sk,sr_item_sk,sr_return_quantity] + Project [i_item_id,sr_return_quantity] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [i_item_id,sr_return_quantity,sr_returned_date_sk] + BroadcastHashJoin [i_item_sk,sr_item_sk] + Project [sr_item_sk,sr_return_quantity,sr_returned_date_sk] Filter [sr_item_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_return_quantity] [sr_returned_date_sk,sr_item_sk,sr_return_quantity] + Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk] [sr_item_sk,sr_return_quantity,sr_returned_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [i_item_sk,i_item_id] - Filter [i_item_sk,i_item_id] - Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] + Filter [i_item_id,i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Project [d_date_sk,d_date] + Project [d_date,d_date_sk] Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -45,50 +45,50 @@ TakeOrderedAndProject [wr_item_qty,cr_item_qty,wr_dev,sr_dev,sr_item_qty,average InputAdapter BroadcastExchange #6 WholeStageCodegen - HashAggregate [i_item_id,sum,sum(cast(cr_return_quantity as bigint))] [sum(cast(cr_return_quantity as bigint)),item_id,cr_item_qty,sum] + HashAggregate [i_item_id,sum,sum(cast(cr_return_quantity as bigint))] [cr_item_qty,item_id,sum,sum(cast(cr_return_quantity as bigint))] InputAdapter Exchange [i_item_id] #7 WholeStageCodegen - HashAggregate [i_item_id,cr_return_quantity,sum,sum] [sum,sum] + HashAggregate [cr_return_quantity,i_item_id,sum,sum] [sum,sum] Project [cr_return_quantity,i_item_id] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_returned_date_sk,cr_return_quantity,i_item_id] + Project [cr_return_quantity,cr_returned_date_sk,i_item_id] BroadcastHashJoin [cr_item_sk,i_item_sk] - Project [cr_returned_date_sk,cr_item_sk,cr_return_quantity] + Project [cr_item_sk,cr_return_quantity,cr_returned_date_sk] Filter [cr_item_sk,cr_returned_date_sk] - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_item_sk,cr_return_quantity] [cr_returned_date_sk,cr_item_sk,cr_return_quantity] + Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk] [cr_item_sk,cr_return_quantity,cr_returned_date_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [i_item_sk,i_item_id] - Filter [i_item_sk,i_item_id] - Scan parquet default.item [i_item_sk,i_item_id] [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] + Filter [i_item_id,i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #9 WholeStageCodegen Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Project [d_date_sk,d_date] + Project [d_date,d_date_sk] Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter ReusedExchange [d_date] [d_date] #4 InputAdapter BroadcastExchange #10 WholeStageCodegen - HashAggregate [i_item_id,sum,sum(cast(wr_return_quantity as bigint))] [sum(cast(wr_return_quantity as bigint)),item_id,wr_item_qty,sum] + HashAggregate [i_item_id,sum,sum(cast(wr_return_quantity as bigint))] [item_id,sum,sum(cast(wr_return_quantity as bigint)),wr_item_qty] InputAdapter Exchange [i_item_id] #11 WholeStageCodegen - HashAggregate [i_item_id,wr_return_quantity,sum,sum] [sum,sum] - Project [wr_return_quantity,i_item_id] - BroadcastHashJoin [wr_returned_date_sk,d_date_sk] - Project [wr_returned_date_sk,wr_return_quantity,i_item_id] - BroadcastHashJoin [wr_item_sk,i_item_sk] - Project [wr_returned_date_sk,wr_item_sk,wr_return_quantity] + HashAggregate [i_item_id,sum,sum,wr_return_quantity] [sum,sum] + Project [i_item_id,wr_return_quantity] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Project [i_item_id,wr_return_quantity,wr_returned_date_sk] + BroadcastHashJoin [i_item_sk,wr_item_sk] + Project [wr_item_sk,wr_return_quantity,wr_returned_date_sk] Filter [wr_item_sk,wr_returned_date_sk] - Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_return_quantity] [wr_returned_date_sk,wr_item_sk,wr_return_quantity] + Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk] [wr_item_sk,wr_return_quantity,wr_returned_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_item_id] [i_item_sk,i_item_id] #2 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt index 95001b56d..894b15298 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt @@ -1,23 +1,23 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername] WholeStageCodegen - Project [c_customer_id,c_last_name,c_first_name] + Project [c_customer_id,c_first_name,c_last_name] BroadcastHashJoin [cd_demo_sk,sr_cdemo_sk] Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [cd_demo_sk,c_customer_id,hd_income_band_sk,c_last_name,c_first_name] + Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [cd_demo_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + Project [c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name,cd_demo_sk] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + Project [c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] - Scan parquet default.customer [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] [c_current_cdemo_sk,c_current_addr_sk,c_customer_id,c_current_hdemo_sk,c_last_name,c_first_name] + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] InputAdapter BroadcastExchange #1 WholeStageCodegen Project [ca_address_sk] - Filter [ca_city,ca_address_sk] + Filter [ca_address_sk,ca_city] Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] InputAdapter BroadcastExchange #2 @@ -35,7 +35,7 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername] BroadcastExchange #4 WholeStageCodegen Project [ib_income_band_sk] - Filter [ib_lower_bound,ib_upper_bound,ib_income_band_sk] + Filter [ib_income_band_sk,ib_lower_bound,ib_upper_bound] Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] [ib_income_band_sk,ib_lower_bound,ib_upper_bound] InputAdapter BroadcastExchange #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt index b9fdfc26f..86a427106 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt @@ -1,33 +1,33 @@ -TakeOrderedAndProject [avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_desc, 1, 20),avg(wr_refunded_cash)] +TakeOrderedAndProject [aggOrder,avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),substring(r_reason_desc, 1, 20)] WholeStageCodegen - HashAggregate [r_reason_desc,count,sum,avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),sum,count,count,avg(cast(ws_quantity as bigint)),sum] [count,avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_desc, 1, 20),sum,avg(UnscaledValue(wr_fee)),avg(wr_refunded_cash),avg(UnscaledValue(wr_refunded_cash)),sum,count,count,avg(cast(ws_quantity as bigint)),sum] + HashAggregate [avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),avg(cast(ws_quantity as bigint)),count,count,count,r_reason_desc,sum,sum,sum] [aggOrder,avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),avg(cast(ws_quantity as bigint)),avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),count,count,count,substring(r_reason_desc, 1, 20),sum,sum,sum] InputAdapter Exchange [r_reason_desc] #1 WholeStageCodegen - HashAggregate [r_reason_desc,count,wr_refunded_cash,sum,count,wr_fee,sum,sum,sum,count,count,count,count,sum,ws_quantity,sum] [count,sum,count,sum,sum,sum,count,count,count,count,sum,sum] - Project [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] - BroadcastHashJoin [wr_reason_sk,r_reason_sk] - Project [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [wr_reason_sk,ws_quantity,wr_refunded_cash,ws_sold_date_sk,wr_fee] - BroadcastHashJoin [wr_refunded_addr_sk,ca_address_sk,ca_state,ws_net_profit] - Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,wr_refunded_cash,ws_sold_date_sk,wr_fee] - BroadcastHashJoin [cd_education_status,cd_demo_sk,wr_returning_cdemo_sk,cd_education_status,cd_marital_status,cd_marital_status] - Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,cd_marital_status,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,cd_education_status] - BroadcastHashJoin [cd_education_status,ws_sales_price,cd_demo_sk,cd_marital_status,wr_refunded_cdemo_sk] - Project [wr_refunded_addr_sk,wr_reason_sk,ws_quantity,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,ws_sales_price] - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Project [wr_refunded_addr_sk,wr_reason_sk,ws_web_page_sk,ws_quantity,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_cash,ws_sold_date_sk,wr_fee,wr_returning_cdemo_sk,ws_sales_price] - BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] - Project [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] - Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] [ws_web_page_sk,ws_quantity,ws_net_profit,ws_order_number,ws_sold_date_sk,ws_sales_price,ws_item_sk] + HashAggregate [count,count,count,count,count,count,r_reason_desc,sum,sum,sum,sum,sum,sum,wr_fee,wr_refunded_cash,ws_quantity] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum] + Project [r_reason_desc,wr_fee,wr_refunded_cash,ws_quantity] + BroadcastHashJoin [r_reason_sk,wr_reason_sk] + Project [wr_fee,wr_reason_sk,wr_refunded_cash,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_fee,wr_reason_sk,wr_refunded_cash,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [ca_address_sk,ca_state,wr_refunded_addr_sk,ws_net_profit] + Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,ws_net_profit,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_education_status,cd_marital_status,cd_marital_status,wr_returning_cdemo_sk] + Project [cd_education_status,cd_marital_status,wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,wr_refunded_cdemo_sk,ws_sales_price] + Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price,ws_sold_date_sk] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] + Filter [ws_item_sk,ws_order_number,ws_sold_date_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] - Filter [wr_item_sk,wr_reason_sk,wr_order_number,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_refunded_cdemo_sk] - Scan parquet default.web_returns [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] [wr_order_number,wr_refunded_addr_sk,wr_reason_sk,wr_refunded_cdemo_sk,wr_refunded_cash,wr_fee,wr_returning_cdemo_sk,wr_item_sk] + Project [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] + Filter [wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] + Scan parquet default.web_returns [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -37,30 +37,30 @@ TakeOrderedAndProject [avg(wr_fee),aggOrder,avg(ws_quantity),substring(r_reason_ InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [cd_demo_sk,cd_marital_status,cd_education_status] + Project [cd_demo_sk,cd_education_status,cd_marital_status] Filter [cd_demo_sk,cd_education_status,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [cd_demo_sk,cd_marital_status,cd_education_status] - Filter [cd_education_status,cd_demo_sk,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + Project [cd_demo_sk,cd_education_status,cd_marital_status] + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [ca_address_sk,ca_state] - Filter [ca_country,ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] [ca_address_sk,ca_state,ca_country] + Filter [ca_address_sk,ca_country] + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] InputAdapter BroadcastExchange #7 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] + Filter [d_date_sk,d_year] Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #8 WholeStageCodegen - Project [r_reason_sk,r_reason_desc] + Project [r_reason_desc,r_reason_sk] Filter [r_reason_sk] - Scan parquet default.reason [r_reason_sk,r_reason_desc] [r_reason_sk,r_reason_desc] + Scan parquet default.reason [r_reason_desc,r_reason_sk] [r_reason_desc,r_reason_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt index 71aae564f..5ec040bda 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt @@ -1,35 +1,35 @@ -TakeOrderedAndProject [i_category,total_sum,rank_within_parent,lochierarchy,i_class] +TakeOrderedAndProject [i_category,i_class,lochierarchy,rank_within_parent,total_sum] WholeStageCodegen - Project [i_category,total_sum,rank_within_parent,lochierarchy,i_class] + Project [i_category,i_class,lochierarchy,rank_within_parent,total_sum] InputAdapter - Window [_w3,_w1,_w2] + Window [_w1,_w2,_w3] WholeStageCodegen Sort [_w1,_w2,_w3] InputAdapter Exchange [_w1,_w2] #1 WholeStageCodegen - HashAggregate [i_category,sum(UnscaledValue(ws_net_paid)),sum,i_class,spark_grouping_id] [_w1,total_sum,sum(UnscaledValue(ws_net_paid)),sum,_w2,lochierarchy,_w3] + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum(UnscaledValue(ws_net_paid))] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ws_net_paid)),total_sum] InputAdapter Exchange [i_category,i_class,spark_grouping_id] #2 WholeStageCodegen - HashAggregate [i_category,sum,ws_net_paid,i_class,sum,spark_grouping_id] [sum,sum] - Expand [ws_net_paid,i_category,i_class] - Project [ws_net_paid,i_category,i_class] - BroadcastHashJoin [ws_item_sk,i_item_sk] + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum,ws_net_paid] [sum,sum] + Expand [i_category,i_class,ws_net_paid] + Project [i_category,i_class,ws_net_paid] + BroadcastHashJoin [i_item_sk,ws_item_sk] Project [ws_item_sk,ws_net_paid] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_net_paid] - Filter [ws_sold_date_sk,ws_item_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid] [ws_sold_date_sk,ws_item_sk,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_item_sk,ws_net_paid,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_item_sk,i_class,i_category] + Project [i_category,i_class,i_item_sk] Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_class,i_category] [i_item_sk,i_class,i_category] + Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt index c5008bdd8..1a7672916 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt @@ -1,33 +1,33 @@ WholeStageCodegen - HashAggregate [count,count(1)] [count(1),count(1),count] + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #1 WholeStageCodegen HashAggregate [count,count] [count,count] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_first_name,d_date,d_date,c_last_name,c_last_name,c_first_name] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [d_date,c_last_name,c_first_name,c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + HashAggregate [c_first_name,c_last_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #2 + Exchange [c_first_name,c_last_name,d_date] #2 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_customer_sk] - Filter [ss_sold_date_sk,ss_customer_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] [ss_sold_date_sk,ss_customer_sk] + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_date,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] [d_date_sk,d_date,d_month_seq] + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -37,38 +37,38 @@ WholeStageCodegen InputAdapter BroadcastExchange #5 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #6 + Exchange [c_first_name,c_last_name,d_date] #6 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] Project [cs_bill_customer_sk,d_date] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_bill_customer_sk] - Filter [cs_sold_date_sk,cs_bill_customer_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] [cs_sold_date_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 InputAdapter ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 InputAdapter BroadcastExchange #7 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #8 + Exchange [c_first_name,c_last_name,d_date] #8 WholeStageCodegen - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_bill_customer_sk,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_bill_customer_sk] - Filter [ws_sold_date_sk,ws_bill_customer_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] [ws_sold_date_sk,ws_bill_customer_sk] + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [d_date,ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_date] [d_date_sk,d_date] #3 + ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 InputAdapter ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt index 0a4bce5c6..77bbef312 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt @@ -6,54 +6,54 @@ BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin WholeStageCodegen - HashAggregate [count,count(1)] [count(1),h8_30_to_9,count] + HashAggregate [count,count(1)] [count,count(1),h8_30_to_9] InputAdapter Exchange #1 WholeStageCodegen HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [s_store_sk] Filter [s_store_name,s_store_sk] - Scan parquet default.store [s_store_sk,s_store_name] [s_store_sk,s_store_name] + Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] BroadcastExchange #5 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),h9_to_9_30,count] + HashAggregate [count,count(1)] [count,count(1),h9_to_9_30] InputAdapter Exchange #6 WholeStageCodegen HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter @@ -61,25 +61,25 @@ BroadcastNestedLoopJoin WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #8 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),h9_30_to_10,count] + HashAggregate [count,count(1)] [count,count(1),h9_30_to_10] InputAdapter Exchange #9 WholeStageCodegen HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter @@ -87,25 +87,25 @@ BroadcastNestedLoopJoin WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #11 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),h10_to_10_30,count] + HashAggregate [count,count(1)] [count,count(1),h10_to_10_30] InputAdapter Exchange #12 WholeStageCodegen HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter @@ -113,25 +113,25 @@ BroadcastNestedLoopJoin WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #14 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),h10_30_to_11,count] + HashAggregate [count,count(1)] [count,count(1),h10_30_to_11] InputAdapter Exchange #15 WholeStageCodegen HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter @@ -139,25 +139,25 @@ BroadcastNestedLoopJoin WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #17 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),h11_to_11_30,count] + HashAggregate [count,count(1)] [count,count(1),h11_to_11_30] InputAdapter Exchange #18 WholeStageCodegen HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter @@ -165,25 +165,25 @@ BroadcastNestedLoopJoin WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #20 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),h11_30_to_12,count] + HashAggregate [count,count(1)] [count,count(1),h11_30_to_12] InputAdapter Exchange #21 WholeStageCodegen HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter @@ -191,25 +191,25 @@ BroadcastNestedLoopJoin WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #23 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),h12_to_12_30,count] + HashAggregate [count,count(1)] [count,count(1),h12_to_12_30] InputAdapter Exchange #24 WholeStageCodegen HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter @@ -217,6 +217,6 @@ BroadcastNestedLoopJoin WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter ReusedExchange [s_store_sk] [s_store_sk] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt index 31fd0675f..33e1ef0af 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt @@ -1,43 +1,43 @@ -TakeOrderedAndProject [s_company_name,d_moy,sum_sales,i_brand,i_category,avg_monthly_sales,s_store_name,i_class] +TakeOrderedAndProject [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] WholeStageCodegen - Project [s_company_name,d_moy,sum_sales,i_brand,avg_monthly_sales,i_category,i_class,s_store_name] + Project [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] Filter [avg_monthly_sales,sum_sales] InputAdapter - Window [s_company_name,i_brand,i_category,s_store_name,_w0] + Window [_w0,i_brand,i_category,s_company_name,s_store_name] WholeStageCodegen - Sort [i_category,i_brand,s_store_name,s_company_name] + Sort [i_brand,i_category,s_company_name,s_store_name] InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 WholeStageCodegen - HashAggregate [s_company_name,d_moy,i_brand,sum(UnscaledValue(ss_sales_price)),i_category,sum,i_class,s_store_name] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] InputAdapter - Exchange [s_company_name,d_moy,i_brand,i_category,i_class,s_store_name] #2 + Exchange [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name] #2 WholeStageCodegen - HashAggregate [s_company_name,d_moy,i_brand,ss_sales_price,i_category,sum,sum,i_class,s_store_name] [sum,sum] - Project [i_class,s_store_name,s_company_name,d_moy,i_category,ss_sales_price,i_brand] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_class,d_moy,ss_store_sk,i_category,ss_sales_price,i_brand] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_class,ss_store_sk,i_category,ss_sales_price,ss_sold_date_sk,i_brand] + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] + Project [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,i_brand,i_category,i_class,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_category,i_class,ss_sales_price,ss_sold_date_sk,ss_store_sk] BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_item_sk,i_brand,i_class,i_category] + Project [i_brand,i_category,i_class,i_item_sk] Filter [i_category,i_class,i_item_sk] - Scan parquet default.item [i_item_sk,i_brand,i_class,i_category] [i_item_sk,i_brand,i_class,i_category] + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk] [i_brand,i_category,i_class,i_item_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_year,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [s_store_sk,s_store_name,s_company_name] + Project [s_company_name,s_store_name,s_store_sk] Filter [s_store_sk] - Scan parquet default.store [s_store_sk,s_store_name,s_company_name] [s_store_sk,s_store_name,s_company_name] + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt index 1851df752..f0bccc7ec 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt @@ -2,7 +2,7 @@ WholeStageCodegen Project Subquery #1 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),count(1),count] + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #1 WholeStageCodegen @@ -12,27 +12,27 @@ WholeStageCodegen Scan parquet default.store_sales [ss_quantity] [ss_quantity] Subquery #2 WholeStageCodegen - HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] InputAdapter Exchange #2 WholeStageCodegen - HashAggregate [sum,ss_ext_discount_amt,sum,count,count] [sum,count,sum,count] + HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] Project [ss_ext_discount_amt] Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] Subquery #3 WholeStageCodegen - HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] InputAdapter Exchange #3 WholeStageCodegen - HashAggregate [sum,count,ss_net_paid,sum,count] [sum,count,sum,count] + HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] Project [ss_net_paid] Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] Subquery #4 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),count(1),count] + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #4 WholeStageCodegen @@ -42,27 +42,27 @@ WholeStageCodegen Scan parquet default.store_sales [ss_quantity] [ss_quantity] Subquery #5 WholeStageCodegen - HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] InputAdapter Exchange #5 WholeStageCodegen - HashAggregate [count,sum,count,ss_ext_discount_amt,sum] [sum,count,sum,count] + HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] Project [ss_ext_discount_amt] Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] Subquery #6 WholeStageCodegen - HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] InputAdapter Exchange #6 WholeStageCodegen - HashAggregate [sum,sum,ss_net_paid,count,count] [sum,count,sum,count] + HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] Project [ss_net_paid] Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] Subquery #7 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),count(1),count] + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #7 WholeStageCodegen @@ -72,27 +72,27 @@ WholeStageCodegen Scan parquet default.store_sales [ss_quantity] [ss_quantity] Subquery #8 WholeStageCodegen - HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] InputAdapter Exchange #8 WholeStageCodegen - HashAggregate [sum,ss_ext_discount_amt,count,count,sum] [sum,count,sum,count] + HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] Project [ss_ext_discount_amt] Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] Subquery #9 WholeStageCodegen - HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] InputAdapter Exchange #9 WholeStageCodegen - HashAggregate [sum,count,count,sum,ss_net_paid] [sum,count,sum,count] + HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] Project [ss_net_paid] Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] Subquery #10 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),count(1),count] + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #10 WholeStageCodegen @@ -102,27 +102,27 @@ WholeStageCodegen Scan parquet default.store_sales [ss_quantity] [ss_quantity] Subquery #11 WholeStageCodegen - HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] InputAdapter Exchange #11 WholeStageCodegen - HashAggregate [ss_ext_discount_amt,sum,count,count,sum] [sum,count,sum,count] + HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] Project [ss_ext_discount_amt] Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] Subquery #12 WholeStageCodegen - HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] InputAdapter Exchange #12 WholeStageCodegen - HashAggregate [count,sum,ss_net_paid,count,sum] [sum,count,sum,count] + HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] Project [ss_net_paid] Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] Subquery #13 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),count(1),count] + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #13 WholeStageCodegen @@ -132,23 +132,23 @@ WholeStageCodegen Scan parquet default.store_sales [ss_quantity] [ss_quantity] Subquery #14 WholeStageCodegen - HashAggregate [sum,count,avg(UnscaledValue(ss_ext_discount_amt))] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] InputAdapter Exchange #14 WholeStageCodegen - HashAggregate [sum,ss_ext_discount_amt,sum,count,count] [sum,count,sum,count] + HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] Project [ss_ext_discount_amt] Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] [ss_quantity,ss_ext_discount_amt] + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] Subquery #15 WholeStageCodegen - HashAggregate [sum,count,avg(UnscaledValue(ss_net_paid))] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] InputAdapter Exchange #15 WholeStageCodegen - HashAggregate [count,ss_net_paid,sum,count,sum] [sum,count,sum,count] + HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] Project [ss_net_paid] Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity,ss_net_paid] [ss_quantity,ss_net_paid] + Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] Filter [r_reason_sk] Scan parquet default.reason [r_reason_sk] [r_reason_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt index c14580e27..1d0dc7ad2 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt @@ -4,54 +4,54 @@ TakeOrderedAndProject [am_pm_ratio] InputAdapter BroadcastNestedLoopJoin WholeStageCodegen - HashAggregate [count,count(1)] [count(1),amc,count] + HashAggregate [count,count(1)] [amc,count,count(1)] InputAdapter Exchange #1 WholeStageCodegen HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] Project [ws_web_page_sk] - BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + BroadcastHashJoin [t_time_sk,ws_sold_time_sk] Project [ws_sold_time_sk,ws_web_page_sk] - BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] - Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] + Project [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count] Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour] [t_time_sk,t_hour] + Scan parquet default.time_dim [t_hour,t_time_sk] [t_hour,t_time_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [wp_web_page_sk] Filter [wp_char_count,wp_web_page_sk] - Scan parquet default.web_page [wp_web_page_sk,wp_char_count] [wp_web_page_sk,wp_char_count] + Scan parquet default.web_page [wp_char_count,wp_web_page_sk] [wp_char_count,wp_web_page_sk] BroadcastExchange #5 WholeStageCodegen - HashAggregate [count,count(1)] [count(1),pmc,count] + HashAggregate [count,count(1)] [count,count(1),pmc] InputAdapter Exchange #6 WholeStageCodegen HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] Project [ws_web_page_sk] - BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + BroadcastHashJoin [t_time_sk,ws_sold_time_sk] Project [ws_sold_time_sk,ws_web_page_sk] - BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] - Project [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] + Project [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] InputAdapter ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter @@ -59,6 +59,6 @@ TakeOrderedAndProject [am_pm_ratio] WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour] [t_time_sk,t_hour] + Scan parquet default.time_dim [t_hour,t_time_sk] [t_hour,t_time_sk] InputAdapter ReusedExchange [wp_web_page_sk] [wp_web_page_sk] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt index 4b2c97c9f..1030b9095 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt @@ -3,59 +3,59 @@ WholeStageCodegen InputAdapter Exchange [Returns_Loss] #1 WholeStageCodegen - HashAggregate [cc_call_center_id,sum(UnscaledValue(cr_net_loss)),sum,cc_name,cd_education_status,cc_manager,cd_marital_status] [sum(UnscaledValue(cr_net_loss)),sum,Manager,Call_Center_Name,Returns_Loss,Call_Center] + HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,sum,sum(UnscaledValue(cr_net_loss))] [Call_Center,Call_Center_Name,Manager,Returns_Loss,sum,sum(UnscaledValue(cr_net_loss))] InputAdapter - Exchange [cc_call_center_id,cc_name,cd_education_status,cc_manager,cd_marital_status] #2 + Exchange [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status] #2 WholeStageCodegen - HashAggregate [cc_call_center_id,sum,cc_name,cd_education_status,cc_manager,cr_net_loss,sum,cd_marital_status] [sum,sum] - Project [cr_net_loss,cd_marital_status,cc_call_center_id,cc_name,cc_manager,cd_education_status] + HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss,sum,sum] [sum,sum] + Project [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [cr_net_loss,c_current_hdemo_sk,cd_marital_status,cc_call_center_id,cc_name,cc_manager,cd_education_status] + Project [c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,cr_net_loss,c_current_hdemo_sk,cc_call_center_id,cc_name,cc_manager] + Project [c_current_cdemo_sk,c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cr_net_loss] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_cdemo_sk,c_current_addr_sk,cr_net_loss,c_current_hdemo_sk,cc_call_center_id,cc_name,cc_manager] - BroadcastHashJoin [cr_returning_customer_sk,c_customer_sk] - Project [cr_returning_customer_sk,cr_net_loss,cc_call_center_id,cc_name,cc_manager] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cr_net_loss] + BroadcastHashJoin [c_customer_sk,cr_returning_customer_sk] + Project [cc_call_center_id,cc_manager,cc_name,cr_net_loss,cr_returning_customer_sk] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_returning_customer_sk,cr_net_loss,cc_call_center_id,cr_returned_date_sk,cc_name,cc_manager] + Project [cc_call_center_id,cc_manager,cc_name,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] - Project [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + Project [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] Filter [cc_call_center_sk] - Scan parquet default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + Scan parquet default.call_center [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] + Project [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] + Scan parquet default.catalog_returns [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 WholeStageCodegen - Project [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] - Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] - Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] + Filter [ca_address_sk,ca_gmt_offset] Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #7 WholeStageCodegen - Project [cd_demo_sk,cd_marital_status,cd_education_status] - Filter [cd_marital_status,cd_education_status,cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] [cd_demo_sk,cd_marital_status,cd_education_status] + Project [cd_demo_sk,cd_education_status,cd_marital_status] + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] InputAdapter BroadcastExchange #8 WholeStageCodegen Project [hd_demo_sk] Filter [hd_buy_potential,hd_demo_sk] - Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] [hd_demo_sk,hd_buy_potential] + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] [hd_buy_potential,hd_demo_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt index 6137607f6..cab3234b8 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt @@ -1,44 +1,44 @@ TakeOrderedAndProject [Excess Discount Amount ] WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ws_ext_discount_amt))] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] + HashAggregate [sum,sum(UnscaledValue(ws_ext_discount_amt))] [Excess Discount Amount ,sum,sum(UnscaledValue(ws_ext_discount_amt))] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [ws_ext_discount_amt,sum,sum] [sum,sum] + HashAggregate [sum,sum,ws_ext_discount_amt] [sum,sum] Project [ws_ext_discount_amt] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_ext_discount_amt] - BroadcastHashJoin [i_item_sk,ws_item_sk,ws_ext_discount_amt,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] - Project [ws_sold_date_sk,ws_ext_discount_amt,i_item_sk] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] - Filter [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_ext_discount_amt,ws_sold_date_sk] + BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),i_item_sk,ws_ext_discount_amt,ws_item_sk] + Project [i_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + Filter [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [i_item_sk] - Filter [i_manufact_id,i_item_sk] + Filter [i_item_sk,i_manufact_id] Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #3 WholeStageCodegen Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [ws_item_sk,sum,count,avg(UnscaledValue(ws_ext_discount_amt))] [sum,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(ws_ext_discount_amt)),count,ws_item_sk] + HashAggregate [avg(UnscaledValue(ws_ext_discount_amt)),count,sum,ws_item_sk] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(ws_ext_discount_amt)),count,sum,ws_item_sk] InputAdapter Exchange [ws_item_sk] #4 WholeStageCodegen - HashAggregate [ws_ext_discount_amt,ws_item_sk,sum,sum,count,count] [sum,count,sum,count] - Project [ws_item_sk,ws_ext_discount_amt] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] - Filter [ws_sold_date_sk,ws_item_sk] - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + HashAggregate [count,count,sum,sum,ws_ext_discount_amt,ws_item_sk] [count,count,sum,sum] + Project [ws_ext_discount_amt,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt index 5c7f07a1b..6b2505ff0 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt @@ -1,24 +1,24 @@ -TakeOrderedAndProject [sumsales,ss_customer_sk] +TakeOrderedAndProject [ss_customer_sk,sumsales] WholeStageCodegen - HashAggregate [ss_customer_sk,sum,sum(act_sales)] [sum(act_sales),sumsales,sum] + HashAggregate [ss_customer_sk,sum,sum(act_sales)] [sum,sum(act_sales),sumsales] InputAdapter Exchange [ss_customer_sk] #1 WholeStageCodegen - HashAggregate [ss_customer_sk,act_sales,sum,sum] [sum,sum] - Project [ss_customer_sk,sr_return_quantity,ss_quantity,ss_sales_price] - BroadcastHashJoin [sr_reason_sk,r_reason_sk] - Project [ss_quantity,sr_return_quantity,ss_customer_sk,sr_reason_sk,ss_sales_price] - BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Scan parquet default.store_sales [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_ticket_number] [ss_quantity,ss_item_sk,ss_customer_sk,ss_sales_price,ss_ticket_number] + HashAggregate [act_sales,ss_customer_sk,sum,sum] [sum,sum] + Project [sr_return_quantity,ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [r_reason_sk,sr_reason_sk] + Project [sr_reason_sk,sr_return_quantity,ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_ticket_number] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] - Filter [sr_item_sk,sr_ticket_number,sr_reason_sk] - Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + Project [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] + Filter [sr_item_sk,sr_reason_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [r_reason_sk] Filter [r_reason_desc,r_reason_sk] - Scan parquet default.reason [r_reason_sk,r_reason_desc] [r_reason_sk,r_reason_desc] + Scan parquet default.reason [r_reason_desc,r_reason_sk] [r_reason_desc,r_reason_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt index cc709ac3c..c7a5e5d90 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -1,32 +1,32 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum] [sum,sum(UnscaledValue(ws_net_profit)),order count ,total net profit ,count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),count,sum,total shipping cost ] + HashAggregate [count,count(ws_order_number),sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum,count,sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,sum] [sum,count,sum,sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum(UnscaledValue(ws_ext_ship_cost)),sum,count,sum] - HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,sum] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),sum,sum] + HashAggregate [count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] InputAdapter Exchange [ws_order_number] #2 WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),ws_order_number,sum,ws_net_profit,ws_ext_ship_cost] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum(UnscaledValue(ws_ext_ship_cost)),sum,sum] - Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] - BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] - BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] - Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number] - BroadcastHashJoin [ws_ship_date_sk,d_date_sk] - BroadcastHashJoin [ws_order_number,wr_order_number] - Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] + HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_web_site_sk] + BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + BroadcastHashJoin [wr_order_number,ws_order_number] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] - Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk,ws_warehouse_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] + Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen - Project [ws_warehouse_sk,ws_order_number] - Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + Project [ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen @@ -36,16 +36,16 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #6 WholeStageCodegen Project [ca_address_sk] - Filter [ca_state,ca_address_sk] + Filter [ca_address_sk,ca_state] Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] InputAdapter BroadcastExchange #7 WholeStageCodegen Project [web_site_sk] Filter [web_company_name,web_site_sk] - Scan parquet default.web_site [web_site_sk,web_company_name] [web_site_sk,web_company_name] + Scan parquet default.web_site [web_company_name,web_site_sk] [web_company_name,web_site_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt index 5b6f33ebd..343982d5a 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -1,40 +1,40 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] WholeStageCodegen - HashAggregate [count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] [order count ,total shipping cost ,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,total net profit ,sum(UnscaledValue(ws_ext_ship_cost)),sum] + HashAggregate [count,count(ws_order_number),sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),sum,ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),sum] [sum,sum,count,count(ws_order_number),sum(UnscaledValue(ws_net_profit)),count,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] - HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),sum] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] + HashAggregate [count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] InputAdapter Exchange [ws_order_number] #2 WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(ws_net_profit)),ws_order_number,sum(UnscaledValue(ws_ext_ship_cost)),ws_net_profit,ws_ext_ship_cost] [sum,sum,sum(UnscaledValue(ws_net_profit)),sum,sum(UnscaledValue(ws_ext_ship_cost)),sum] - Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] - BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] - BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] - Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number] - BroadcastHashJoin [ws_ship_date_sk,d_date_sk] - BroadcastHashJoin [ws_order_number,wr_order_number] + HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_web_site_sk] + BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + BroadcastHashJoin [wr_order_number,ws_order_number] BroadcastHashJoin [ws_order_number,ws_order_number] - Project [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] - Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] [ws_web_site_sk,ws_ship_addr_sk,ws_net_profit,ws_ext_ship_cost,ws_order_number,ws_ship_date_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [ws_order_number] BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Project [ws_warehouse_sk,ws_order_number] + Project [ws_order_number,ws_warehouse_sk] Filter [ws_order_number,ws_warehouse_sk] - Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [ws_warehouse_sk,ws_order_number] + Project [ws_order_number,ws_warehouse_sk] Filter [ws_order_number,ws_warehouse_sk] - Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen @@ -48,26 +48,26 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] WholeStageCodegen Project [ws_order_number] BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Project [ws_warehouse_sk,ws_order_number] + Project [ws_order_number,ws_warehouse_sk] Filter [ws_order_number,ws_warehouse_sk] - Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] InputAdapter - ReusedExchange [ws_warehouse_sk,ws_order_number] [ws_warehouse_sk,ws_order_number] #4 + ReusedExchange [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] #4 InputAdapter BroadcastExchange #7 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #8 WholeStageCodegen Project [ca_address_sk] - Filter [ca_state,ca_address_sk] + Filter [ca_address_sk,ca_state] Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] InputAdapter BroadcastExchange #9 WholeStageCodegen Project [web_site_sk] Filter [web_company_name,web_site_sk] - Scan parquet default.web_site [web_site_sk,web_company_name] [web_site_sk,web_company_name] + Scan parquet default.web_site [web_company_name,web_site_sk] [web_company_name,web_site_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt index fada2566d..7f1a66829 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt @@ -1,34 +1,34 @@ TakeOrderedAndProject [count(1)] WholeStageCodegen - HashAggregate [count,count(1)] [count(1),count(1),count] + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #1 WholeStageCodegen HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_demo_sk] + Filter [hd_demo_sk,hd_dep_count] Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] InputAdapter BroadcastExchange #3 WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [s_store_sk] Filter [s_store_name,s_store_sk] - Scan parquet default.store [s_store_sk,s_store_name] [s_store_sk,s_store_name] + Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt index a3cc25d23..40d75add7 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt @@ -1,13 +1,13 @@ CollectLimit WholeStageCodegen - HashAggregate [sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] [store_only,sum,catalog_only,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_and_catalog] + HashAggregate [sum,sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] [catalog_only,store_and_catalog,store_only,sum,sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] InputAdapter Exchange #1 WholeStageCodegen - HashAggregate [sum,sum,sum,customer_sk,sum,customer_sk,sum,sum] [sum,sum,sum,sum,sum,sum] + HashAggregate [customer_sk,customer_sk,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] Project [customer_sk,customer_sk] InputAdapter - SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + SortMergeJoin [customer_sk,customer_sk,item_sk,item_sk] WholeStageCodegen Sort [customer_sk,item_sk] InputAdapter @@ -18,16 +18,16 @@ CollectLimit Exchange [ss_customer_sk,ss_item_sk] #3 WholeStageCodegen HashAggregate [ss_customer_sk,ss_item_sk] - Project [ss_item_sk,ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + Project [ss_customer_sk,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk] Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] [ss_customer_sk,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] WholeStageCodegen Sort [customer_sk,item_sk] @@ -41,8 +41,8 @@ CollectLimit HashAggregate [cs_bill_customer_sk,cs_item_sk] Project [cs_bill_customer_sk,cs_item_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] InputAdapter ReusedExchange [d_date_sk] [d_date_sk] #4 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt index be26ab7ec..914d8934b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt @@ -1,10 +1,10 @@ WholeStageCodegen - Project [revenueratio,i_item_desc,itemrevenue,i_category,i_current_price,i_class] - Sort [revenueratio,i_item_id,i_item_desc,i_category,i_class] + Project [i_category,i_class,i_current_price,i_item_desc,itemrevenue,revenueratio] + Sort [i_category,i_class,i_item_desc,i_item_id,revenueratio] InputAdapter - Exchange [revenueratio,i_item_id,i_item_desc,i_category,i_class] #1 + Exchange [i_category,i_class,i_item_desc,i_item_id,revenueratio] #1 WholeStageCodegen - Project [i_item_id,i_item_desc,_w0,itemrevenue,i_category,_we0,i_current_price,i_class] + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] InputAdapter Window [_w1,i_class] WholeStageCodegen @@ -12,27 +12,27 @@ WholeStageCodegen InputAdapter Exchange [i_class] #2 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum,sum(UnscaledValue(ss_ext_sales_price)),i_category,i_current_price,i_class] [sum,sum(UnscaledValue(ss_ext_sales_price)),_w0,itemrevenue,_w1] + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter - Exchange [i_item_id,i_item_desc,i_category,i_current_price,i_class] #3 + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #3 WholeStageCodegen - HashAggregate [i_item_id,i_item_desc,sum,i_category,ss_ext_sales_price,sum,i_current_price,i_class] [sum,sum] - Project [i_class,i_current_price,ss_ext_sales_price,i_category,i_item_desc,i_item_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_class,i_current_price,ss_ext_sales_price,i_category,ss_sold_date_sk,i_item_desc,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #4 WholeStageCodegen - Project [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] Filter [i_category,i_item_sk] - Scan parquet default.item [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] [i_class,i_current_price,i_item_sk,i_category,i_item_desc,i_item_id] + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] InputAdapter BroadcastExchange #5 WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_date] [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt index 934bf3e47..4e22db2fe 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt @@ -1,27 +1,27 @@ -TakeOrderedAndProject [substring(w_warehouse_name, 1, 20),61 - 90 days ,cc_name,>120 days ,91 - 120 days ,30 days ,sm_type,31 - 60 days ] +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,cc_name,sm_type,substring(w_warehouse_name, 1, 20)] WholeStageCodegen - HashAggregate [sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum,cc_name,sum,sm_type,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum] [sum,substring(w_warehouse_name, 1, 20),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),61 - 90 days ,sum,>120 days ,sum,91 - 120 days ,30 days ,31 - 60 days ,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),sum] + HashAggregate [cc_name,sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] InputAdapter - Exchange [substring(w_warehouse_name, 1, 20),sm_type,cc_name] #1 + Exchange [cc_name,sm_type,substring(w_warehouse_name, 1, 20)] #1 WholeStageCodegen - HashAggregate [sum,sum,sum,substring(w_warehouse_name, 1, 20),w_warehouse_name,sum,sum,cs_sold_date_sk,sum,cc_name,sum,sum,sm_type,cs_ship_date_sk,sum,sum] [sum,sum,sum,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum] - Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cc_name] + HashAggregate [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name] [substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cc_name] - BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] - Project [cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name,cs_call_center_sk] + Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] - Project [cs_ship_date_sk,cs_sold_date_sk,cs_ship_mode_sk,w_warehouse_name,cs_call_center_sk] + Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] - Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk] - Scan parquet default.catalog_sales [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] [cs_ship_date_sk,cs_warehouse_sk,cs_sold_date_sk,cs_ship_mode_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] + Filter [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_warehouse_sk] + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] InputAdapter BroadcastExchange #2 WholeStageCodegen - Project [w_warehouse_sk,w_warehouse_name] + Project [w_warehouse_name,w_warehouse_sk] Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] [w_warehouse_sk,w_warehouse_name] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #3 WholeStageCodegen @@ -38,5 +38,5 @@ TakeOrderedAndProject [substring(w_warehouse_name, 1, 20),61 - 90 days ,cc_name, BroadcastExchange #5 WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] From b70f00bbc7e9bfef77f9fde5603e98f62d91b8bc Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 10 Mar 2021 19:12:05 -0800 Subject: [PATCH 11/31] fix q49 --- .../approved-plans-v1_4/q49/explain.txt | 92 ++++++------- .../approved-plans-v1_4/q49/simplified.txt | 126 +++++++++--------- 2 files changed, 111 insertions(+), 107 deletions(-) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt index a714636a3..1b4779912 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt @@ -6,29 +6,30 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank# +- Union :- *(7) Project [web AS channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3] : +- *(7) Filter ((return_rank#2 <= 10) || (currency_rank#3 <= 10)) - : +- Window [rank(currency_ratio#6) windowspecdefinition(currency_ratio#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#3], [currency_ratio#6 ASC NULLS FIRST] - : +- *(6) Sort [currency_ratio#6 ASC NULLS FIRST], false, 0 - : +- Window [rank(return_ratio#5) windowspecdefinition(return_ratio#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#2], [return_ratio#5 ASC NULLS FIRST] - : +- *(5) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 - : +- Exchange SinglePartition - : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(ws_item_sk#7, 5) - : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) - : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] - : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] - : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight - : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] - : : : +- *(3) Filter ((((((((isnotnull(ws_net_profit#17) && isnotnull(ws_quantity#9)) && isnotnull(ws_net_paid#11)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) - : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_quantity), IsNotNull(ws_net_paid), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_item_sk#16)) && isnotnull(wr_order_number#15)) - : : +- *(1) FileScan parquet default.web_returns[wr_item_sk#16,wr_order_number#15,wr_return_quantity#8,wr_return_amt#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_item_sk), IsNotNull(..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#13] - : +- *(2) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#19)) && (d_year#18 = 2001)) && (d_moy#19 = 12)) && isnotnull(d_date_sk#13)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct + : +- Window [rank(return_ratio#5) windowspecdefinition(return_ratio#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#2], [return_ratio#5 ASC NULLS FIRST] + : +- *(6) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 + : +- *(6) Project [item#4, return_ratio#5, currency_rank#3] + : +- Window [rank(currency_ratio#6) windowspecdefinition(currency_ratio#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#3], [currency_ratio#6 ASC NULLS FIRST] + : +- *(5) Sort [currency_ratio#6 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(ws_item_sk#7, 5) + : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight + : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] + : : : +- *(3) Filter ((((((((isnotnull(ws_net_paid#11) && isnotnull(ws_quantity#9)) && isnotnull(ws_net_profit#17)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_paid), IsNotNull(ws_quantity), IsNotNull(ws_net_profit), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_order_number#15)) && isnotnull(wr_item_sk#16)) + : : +- *(1) FileScan parquet default.web_returns[wr_item_sk#16,wr_order_number#15,wr_return_quantity#8,wr_return_amt#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNot..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#13] + : +- *(2) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#19)) && (d_year#18 = 2001)) && (d_moy#19 = 12)) && isnotnull(d_date_sk#13)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct :- *(14) Project [catalog AS channel#20, item#21, return_ratio#22, return_rank#23, currency_rank#24] : +- *(14) Filter ((return_rank#23 <= 10) || (currency_rank#24 <= 10)) : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] @@ -44,8 +45,8 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank# : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] - : : : +- *(10) Filter ((((((((isnotnull(cs_net_profit#35) && isnotnull(cs_net_paid#30)) && isnotnull(cs_quantity#28)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) - : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_pro..., ReadSchema: struct 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) + : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_item_sk#34)) && isnotnull(cr_order_number#33)) @@ -53,23 +54,24 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank# : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(21) Project [store AS channel#36, item#37, return_ratio#38, return_rank#39, currency_rank#40] +- *(21) Filter ((return_rank#39 <= 10) || (currency_rank#40 <= 10)) - +- Window [rank(currency_ratio#41) windowspecdefinition(currency_ratio#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#40], [currency_ratio#41 ASC NULLS FIRST] - +- *(20) Sort [currency_ratio#41 ASC NULLS FIRST], false, 0 - +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] - +- *(19) Sort [return_ratio#38 ASC NULLS FIRST], false, 0 - +- Exchange SinglePartition - +- *(18) HashAggregate(keys=[ss_item_sk#42], functions=[sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), sum(cast(coalesce(ss_quantity#44, 0) as bigint)), sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) - +- Exchange hashpartitioning(ss_item_sk#42, 5) - +- *(17) HashAggregate(keys=[ss_item_sk#42], functions=[partial_sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) - +- *(17) Project [ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] - +- *(17) BroadcastHashJoin [ss_sold_date_sk#47], [d_date_sk#13], Inner, BuildRight - :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] - : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight - : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] - : : +- *(17) Filter ((((((((isnotnull(ss_quantity#44) && isnotnull(ss_net_paid#46)) && isnotnull(ss_net_profit#51)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) - : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_ticket_number#49)) && isnotnull(sr_item_sk#50)) - : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNo..., ReadSchema: struct - +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file + +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] + +- *(20) Sort [return_ratio#38 ASC NULLS FIRST], false, 0 + +- *(20) Project [item#37, return_ratio#38, currency_rank#40] + +- Window [rank(currency_ratio#41) windowspecdefinition(currency_ratio#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#40], [currency_ratio#41 ASC NULLS FIRST] + +- *(19) Sort [currency_ratio#41 ASC NULLS FIRST], false, 0 + +- Exchange SinglePartition + +- *(18) HashAggregate(keys=[ss_item_sk#42], functions=[sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), sum(cast(coalesce(ss_quantity#44, 0) as bigint)), sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- Exchange hashpartitioning(ss_item_sk#42, 5) + +- *(17) HashAggregate(keys=[ss_item_sk#42], functions=[partial_sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- *(17) Project [ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + +- *(17) BroadcastHashJoin [ss_sold_date_sk#47], [d_date_sk#13], Inner, BuildRight + :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight + : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] + : : +- *(17) Filter ((((((((isnotnull(ss_net_profit#51) && isnotnull(ss_quantity#44)) && isnotnull(ss_net_paid#46)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) + : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_quantity), IsNotNull(ss_net_paid), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_ticket_number#49)) && isnotnull(sr_item_sk#50)) + : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNo..., ReadSchema: struct + +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt index 7c9fedc27..623f3acb4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt @@ -11,40 +11,41 @@ TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] Project [currency_rank,item,return_rank,return_ratio] Filter [currency_rank,return_rank] InputAdapter - Window [currency_ratio] + Window [return_ratio] WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - InputAdapter - Exchange #2 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),ws_item_sk] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] - InputAdapter - Exchange [ws_item_sk] #3 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] - Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] - Filter [wr_item_sk,wr_order_number,wr_return_amt] - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Sort [return_ratio] + Project [currency_rank,item,return_ratio] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),ws_item_sk] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] + Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + Filter [wr_item_sk,wr_order_number,wr_return_amt] + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] WholeStageCodegen Project [currency_rank,item,return_rank,return_ratio] Filter [currency_rank,return_rank] @@ -83,33 +84,34 @@ TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] Project [currency_rank,item,return_rank,return_ratio] Filter [currency_rank,return_rank] InputAdapter - Window [currency_ratio] + Window [return_ratio] WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - InputAdapter - Exchange #9 - WholeStageCodegen - HashAggregate [ss_item_sk,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] - InputAdapter - Exchange [ss_item_sk] #10 - WholeStageCodegen - HashAggregate [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk,ss_ticket_number] - Filter [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - Project [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] - Filter [sr_item_sk,sr_return_amt,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 + Sort [return_ratio] + Project [currency_rank,item,return_ratio] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen + HashAggregate [ss_item_sk,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen + HashAggregate [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk,ss_ticket_number] + Filter [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + Filter [sr_item_sk,sr_return_amt,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 From 7880f40f9c188ea36230bf4695ded9cdcbcb83e4 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 10 Mar 2021 19:15:02 -0800 Subject: [PATCH 12/31] updated instructions on how to run tests --- .../hyperspace/goldstandard/PlanStabilitySuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala index 08b18aaeb..6b1e4204e 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala @@ -57,22 +57,22 @@ import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec} * * To run the entire test suite: * {{{ - * build/sbt "sql/testOnly *PlanStabilitySuite" + * sbt "test:testOnly *PlanStabilitySuite" * }}} * * To run a single test file upon change: * {{{ - * build/sbt "sql/testOnly *PlanStabilitySuite -- -z (tpcds-v1.4/q49)" + * sbt "test:testOnly *PlanStabilitySuite -- -z (tpcds-v1.4/q49)" * }}} * * To re-generate golden files for entire suite, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStabilitySuite" + * SPARK_GENERATE_GOLDEN_FILES=1 sbt "test:testOnly *PlanStabilitySuite" * }}} * * To re-generate golden file for a single test, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStabilitySuite -- -z (tpcds-v1.4/q49)" + * SPARK_GENERATE_GOLDEN_FILES=1 sbt "test:testOnly *PlanStabilitySuite -- -z (tpcds-v1.4/q49)" * }}} */ // scalastyle:on filelinelengthchecker From 3dcd2d57e364c0f96826b1cd28b69558ee6ec62c Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Thu, 11 Mar 2021 16:17:25 -0800 Subject: [PATCH 13/31] test with updated plans for q47, q49 --- .../approved-plans-v1_4/q47/explain.txt | 85 +++++++------- .../approved-plans-v1_4/q47/simplified.txt | 103 +++++++++-------- .../approved-plans-v1_4/q49/explain.txt | 96 ++++++++-------- .../approved-plans-v1_4/q49/simplified.txt | 108 +++++++++--------- 4 files changed, 198 insertions(+), 194 deletions(-) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt index e7dcf37e4..47228048b 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt @@ -1,51 +1,52 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,s_store_name#3,s_company_name#6,d_year#7,d_moy#8,avg_monthly_sales#2,sum_sales#1,psum#9,nsum#10]) -+- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] - +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight - :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] - : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight - : :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13] - : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) - : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] - : : +- *(7) Filter (isnotnull(d_year#7) && (d_year#7 = 1999)) - : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] - : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 5) - : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) - : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) - : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] - : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight - : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight - : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight - : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] - : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] - : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] - : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) - : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct ++- *(23) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] + +- *(23) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight + :- *(23) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] + : +- *(23) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight + : :- *(23) Filter (((((isnotnull(d_year#7) && isnotnull(avg_monthly_sales#2)) && (d_year#7 = 1999)) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) + : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] + : : +- *(8) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 5) + : : +- *(7) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2] + : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] + : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, 5) + : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) + : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) + : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] + : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight + : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight + : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight + : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_category#4)) && isnotnull(i_brand#5)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] + : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] + : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) + : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1))) - : +- *(14) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] - : +- *(14) Filter isnotnull(rn#23) + : +- *(15) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] + : +- *(15) Filter isnotnull(rn#23) : +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#23], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] - : +- *(13) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + : +- *(14) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) - : +- *(12) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : +- *(13) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) : +- ReusedExchange [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33, sum#34], Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1))) - +- *(21) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] - +- *(21) Filter isnotnull(rn#18) + +- *(22) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] + +- *(22) Filter isnotnull(rn#18) +- Window [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] - +- *(20) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +- *(21) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 +- ReusedExchange [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35, d_moy#36, sum_sales#12], Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt index f0ed21cce..9eeaffdaa 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt @@ -4,53 +4,56 @@ TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,ps BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales,sum_sales] BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] - Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] - Filter [avg_monthly_sales,rn,sum_sales] - InputAdapter - Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Filter [d_year] - InputAdapter - Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - InputAdapter - Exchange [i_brand,i_category,s_company_name,s_store_name] #1 - WholeStageCodegen - HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] - InputAdapter - Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #2 - WholeStageCodegen - HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] - Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_moy,d_year,i_brand,i_category,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand,i_category,ss_sales_price,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_brand,i_category,i_item_sk] - Filter [i_brand,i_category,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk,d_moy,d_year] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [s_company_name,s_store_name,s_store_sk] - Filter [s_company_name,s_store_name,s_store_sk] - Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] + Filter [avg_monthly_sales,d_year,rn,sum_sales] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 + WholeStageCodegen + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] + InputAdapter + Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen + Sort [d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [d_year,i_brand,i_category,s_company_name,s_store_name] #2 + WholeStageCodegen + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #3 + WholeStageCodegen + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] + Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_year,i_brand,i_category,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_category,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_brand,i_category,i_item_sk] + Filter [i_brand,i_category,i_item_sk] + Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk,d_moy,d_year] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [s_company_name,s_store_name,s_store_sk] + Filter [s_company_name,s_store_name,s_store_sk] + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] InputAdapter - BroadcastExchange #6 + BroadcastExchange #7 WholeStageCodegen Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] Filter [rn] @@ -59,13 +62,13 @@ TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,ps WholeStageCodegen Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] InputAdapter - Exchange [i_brand,i_category,s_company_name,s_store_name] #7 + Exchange [i_brand,i_category,s_company_name,s_store_name] #8 WholeStageCodegen HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [sum,sum(UnscaledValue(ss_sales_price)),sum_sales] InputAdapter - ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #2 + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #3 InputAdapter - BroadcastExchange #8 + BroadcastExchange #9 WholeStageCodegen Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] Filter [rn] @@ -74,4 +77,4 @@ TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,ps WholeStageCodegen Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] InputAdapter - ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #7 + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #8 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt index 1b4779912..2b0459139 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt @@ -6,52 +6,52 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank# +- Union :- *(7) Project [web AS channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3] : +- *(7) Filter ((return_rank#2 <= 10) || (currency_rank#3 <= 10)) - : +- Window [rank(return_ratio#5) windowspecdefinition(return_ratio#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#2], [return_ratio#5 ASC NULLS FIRST] - : +- *(6) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 - : +- *(6) Project [item#4, return_ratio#5, currency_rank#3] - : +- Window [rank(currency_ratio#6) windowspecdefinition(currency_ratio#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#3], [currency_ratio#6 ASC NULLS FIRST] - : +- *(5) Sort [currency_ratio#6 ASC NULLS FIRST], false, 0 - : +- Exchange SinglePartition - : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(ws_item_sk#7, 5) - : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) - : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] - : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] - : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight - : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] - : : : +- *(3) Filter ((((((((isnotnull(ws_net_paid#11) && isnotnull(ws_quantity#9)) && isnotnull(ws_net_profit#17)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) - : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_paid), IsNotNull(ws_quantity), IsNotNull(ws_net_profit), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_order_number#15)) && isnotnull(wr_item_sk#16)) - : : +- *(1) FileScan parquet default.web_returns[wr_item_sk#16,wr_order_number#15,wr_return_quantity#8,wr_return_amt#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNot..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#13] - : +- *(2) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#19)) && (d_year#18 = 2001)) && (d_moy#19 = 12)) && isnotnull(d_date_sk#13)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct + : +- Window [rank(currency_ratio#6) windowspecdefinition(currency_ratio#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#3], [currency_ratio#6 ASC NULLS FIRST] + : +- *(6) Sort [currency_ratio#6 ASC NULLS FIRST], false, 0 + : +- Window [rank(return_ratio#5) windowspecdefinition(return_ratio#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#2], [return_ratio#5 ASC NULLS FIRST] + : +- *(5) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(ws_item_sk#7, 5) + : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight + : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] + : : : +- *(3) Filter ((((((((isnotnull(ws_net_profit#17) && isnotnull(ws_net_paid#11)) && isnotnull(ws_quantity#9)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_item_sk#16)) && isnotnull(wr_order_number#15)) + : : +- *(1) FileScan parquet default.web_returns[wr_item_sk#16,wr_order_number#15,wr_return_quantity#8,wr_return_amt#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_item_sk), IsNotNull(..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#13] + : +- *(2) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#19)) && (d_year#18 = 2001)) && (d_moy#19 = 12)) && isnotnull(d_date_sk#13)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct :- *(14) Project [catalog AS channel#20, item#21, return_ratio#22, return_rank#23, currency_rank#24] : +- *(14) Filter ((return_rank#23 <= 10) || (currency_rank#24 <= 10)) - : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] - : +- *(13) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 - : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] - : +- *(12) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 - : +- Exchange SinglePartition - : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(cs_item_sk#26, 5) - : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight - : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight - : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] - : : : +- *(10) Filter ((((((((isnotnull(cs_net_paid#30) && isnotnull(cs_quantity#28)) && isnotnull(cs_net_profit#35)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) - : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_item_sk#34)) && isnotnull(cr_order_number#33)) - : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_item_sk), IsNo..., ReadSchema: struct - : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] + : +- *(13) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 + : +- *(13) Project [item#21, return_ratio#22, currency_rank#24] + : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] + : +- *(12) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(cs_item_sk#26, 5) + : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight + : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight + : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] + : : : +- *(10) Filter ((((((((isnotnull(cs_net_paid#30) && isnotnull(cs_quantity#28)) && isnotnull(cs_net_profit#35)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) + : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_order_number#33)) && isnotnull(cr_item_sk#34)) + : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number),..., ReadSchema: struct + : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(21) Project [store AS channel#36, item#37, return_ratio#38, return_rank#39, currency_rank#40] +- *(21) Filter ((return_rank#39 <= 10) || (currency_rank#40 <= 10)) +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] @@ -68,10 +68,10 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank# :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] - : : +- *(17) Filter ((((((((isnotnull(ss_net_profit#51) && isnotnull(ss_quantity#44)) && isnotnull(ss_net_paid#46)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) - : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_quantity), IsNotNull(ss_net_paid), GreaterThan(ss_net_pro..., ReadSchema: struct 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) + : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), IsNotNull(ss_quantity), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_ticket_number#49)) && isnotnull(sr_item_sk#50)) - : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNo..., ReadSchema: struct + : +- *(15) Filter (((isnotnull(sr_return_amt#45) && (sr_return_amt#45 > 10000.00)) && isnotnull(sr_item_sk#50)) && isnotnull(sr_ticket_number#49)) + : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_item_sk), IsNotNull(..., ReadSchema: struct +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt index 623f3acb4..238eb448c 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt @@ -7,6 +7,44 @@ TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] HashAggregate [channel,currency_rank,item,return_rank,return_ratio] InputAdapter Union + WholeStageCodegen + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),ws_item_sk] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] + Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + Filter [wr_item_sk,wr_order_number,wr_return_amt] + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] WholeStageCodegen Project [currency_rank,item,return_rank,return_ratio] Filter [currency_rank,return_rank] @@ -20,66 +58,28 @@ TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] WholeStageCodegen Sort [currency_ratio] InputAdapter - Exchange #2 + Exchange #6 WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),ws_item_sk] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + HashAggregate [cs_item_sk,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] InputAdapter - Exchange [ws_item_sk] #3 + Exchange [cs_item_sk] #7 WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] - Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] InputAdapter - BroadcastExchange #4 + BroadcastExchange #8 WholeStageCodegen - Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] - Filter [wr_item_sk,wr_order_number,wr_return_amt] - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + Project [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + Filter [cr_item_sk,cr_order_number,cr_return_amount] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - WholeStageCodegen - Project [currency_rank,item,return_rank,return_ratio] - Filter [currency_rank,return_rank] - InputAdapter - Window [currency_ratio] - WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - InputAdapter - Exchange #6 - WholeStageCodegen - HashAggregate [cs_item_sk,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] - InputAdapter - Exchange [cs_item_sk] #7 - WholeStageCodegen - HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,cs_sold_date_sk] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] - Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] - Filter [cr_item_sk,cr_order_number,cr_return_amount] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 + ReusedExchange [d_date_sk] [d_date_sk] #5 WholeStageCodegen Project [currency_rank,item,return_rank,return_ratio] Filter [currency_rank,return_rank] From a7a1149a1ecf66820246f5ee2940182d8337dcb8 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Thu, 11 Mar 2021 17:36:04 -0800 Subject: [PATCH 14/31] update q47, 49 plans --- .../approved-plans-v1_4/q47/explain.txt | 85 +++++++------- .../approved-plans-v1_4/q47/simplified.txt | 103 ++++++++--------- .../approved-plans-v1_4/q49/explain.txt | 96 ++++++++-------- .../approved-plans-v1_4/q49/simplified.txt | 108 +++++++++--------- 4 files changed, 194 insertions(+), 198 deletions(-) diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt index 47228048b..e7dcf37e4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt @@ -1,52 +1,51 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,s_store_name#3,s_company_name#6,d_year#7,d_moy#8,avg_monthly_sales#2,sum_sales#1,psum#9,nsum#10]) -+- *(23) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] - +- *(23) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight - :- *(23) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] - : +- *(23) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight - : :- *(23) Filter (((((isnotnull(d_year#7) && isnotnull(avg_monthly_sales#2)) && (d_year#7 = 1999)) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) - : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] - : : +- *(8) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 5) - : : +- *(7) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2] - : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] - : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], false, 0 - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, 5) - : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) - : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) - : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] - : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight - : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight - : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight - : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] - : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_category#4)) && isnotnull(i_brand#5)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] - : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] - : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) - : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct ++- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] + +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight + :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] + : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight + : :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13] + : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) + : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] + : : +- *(7) Filter (isnotnull(d_year#7) && (d_year#7 = 1999)) + : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] + : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 5) + : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) + : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) + : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] + : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight + : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight + : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight + : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] + : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] + : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) + : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1))) - : +- *(15) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] - : +- *(15) Filter isnotnull(rn#23) + : +- *(14) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] + : +- *(14) Filter isnotnull(rn#23) : +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#23], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] - : +- *(14) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + : +- *(13) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) - : +- *(13) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : +- *(12) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) : +- ReusedExchange [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33, sum#34], Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1))) - +- *(22) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] - +- *(22) Filter isnotnull(rn#18) + +- *(21) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] + +- *(21) Filter isnotnull(rn#18) +- Window [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] - +- *(21) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +- *(20) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 +- ReusedExchange [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35, d_moy#36, sum_sales#12], Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt index 9eeaffdaa..f0ed21cce 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt @@ -4,56 +4,53 @@ TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,ps BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales,sum_sales] BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] - Filter [avg_monthly_sales,d_year,rn,sum_sales] - InputAdapter - Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - InputAdapter - Exchange [i_brand,i_category,s_company_name,s_store_name] #1 - WholeStageCodegen - Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] - InputAdapter - Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Sort [d_year,i_brand,i_category,s_company_name,s_store_name] - InputAdapter - Exchange [d_year,i_brand,i_category,s_company_name,s_store_name] #2 - WholeStageCodegen - HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] - InputAdapter - Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #3 - WholeStageCodegen - HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] - Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_moy,d_year,i_brand,i_category,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand,i_category,ss_sales_price,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_brand,i_category,i_item_sk] - Filter [i_brand,i_category,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk,d_moy,d_year] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [s_company_name,s_store_name,s_store_sk] - Filter [s_company_name,s_store_name,s_store_sk] - Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + Filter [avg_monthly_sales,rn,sum_sales] + InputAdapter + Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen + Filter [d_year] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 + WholeStageCodegen + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #2 + WholeStageCodegen + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] + Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_year,i_brand,i_category,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_category,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_brand,i_category,i_item_sk] + Filter [i_brand,i_category,i_item_sk] + Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_moy,d_year] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_company_name,s_store_name,s_store_sk] + Filter [s_company_name,s_store_name,s_store_sk] + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] InputAdapter - BroadcastExchange #7 + BroadcastExchange #6 WholeStageCodegen Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] Filter [rn] @@ -62,13 +59,13 @@ TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,ps WholeStageCodegen Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] InputAdapter - Exchange [i_brand,i_category,s_company_name,s_store_name] #8 + Exchange [i_brand,i_category,s_company_name,s_store_name] #7 WholeStageCodegen HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [sum,sum(UnscaledValue(ss_sales_price)),sum_sales] InputAdapter - ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #3 + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #2 InputAdapter - BroadcastExchange #9 + BroadcastExchange #8 WholeStageCodegen Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] Filter [rn] @@ -77,4 +74,4 @@ TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,ps WholeStageCodegen Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] InputAdapter - ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #8 + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #7 diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt index 2b0459139..1b4779912 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt @@ -6,52 +6,52 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank# +- Union :- *(7) Project [web AS channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3] : +- *(7) Filter ((return_rank#2 <= 10) || (currency_rank#3 <= 10)) - : +- Window [rank(currency_ratio#6) windowspecdefinition(currency_ratio#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#3], [currency_ratio#6 ASC NULLS FIRST] - : +- *(6) Sort [currency_ratio#6 ASC NULLS FIRST], false, 0 - : +- Window [rank(return_ratio#5) windowspecdefinition(return_ratio#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#2], [return_ratio#5 ASC NULLS FIRST] - : +- *(5) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 - : +- Exchange SinglePartition - : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(ws_item_sk#7, 5) - : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) - : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] - : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] - : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight - : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] - : : : +- *(3) Filter ((((((((isnotnull(ws_net_profit#17) && isnotnull(ws_net_paid#11)) && isnotnull(ws_quantity#9)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) - : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_item_sk#16)) && isnotnull(wr_order_number#15)) - : : +- *(1) FileScan parquet default.web_returns[wr_item_sk#16,wr_order_number#15,wr_return_quantity#8,wr_return_amt#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_item_sk), IsNotNull(..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#13] - : +- *(2) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#19)) && (d_year#18 = 2001)) && (d_moy#19 = 12)) && isnotnull(d_date_sk#13)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct + : +- Window [rank(return_ratio#5) windowspecdefinition(return_ratio#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#2], [return_ratio#5 ASC NULLS FIRST] + : +- *(6) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 + : +- *(6) Project [item#4, return_ratio#5, currency_rank#3] + : +- Window [rank(currency_ratio#6) windowspecdefinition(currency_ratio#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#3], [currency_ratio#6 ASC NULLS FIRST] + : +- *(5) Sort [currency_ratio#6 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(ws_item_sk#7, 5) + : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight + : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] + : : : +- *(3) Filter ((((((((isnotnull(ws_net_paid#11) && isnotnull(ws_quantity#9)) && isnotnull(ws_net_profit#17)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_paid), IsNotNull(ws_quantity), IsNotNull(ws_net_profit), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_order_number#15)) && isnotnull(wr_item_sk#16)) + : : +- *(1) FileScan parquet default.web_returns[wr_item_sk#16,wr_order_number#15,wr_return_quantity#8,wr_return_amt#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNot..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#13] + : +- *(2) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#19)) && (d_year#18 = 2001)) && (d_moy#19 = 12)) && isnotnull(d_date_sk#13)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct :- *(14) Project [catalog AS channel#20, item#21, return_ratio#22, return_rank#23, currency_rank#24] : +- *(14) Filter ((return_rank#23 <= 10) || (currency_rank#24 <= 10)) - : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] - : +- *(13) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 - : +- *(13) Project [item#21, return_ratio#22, currency_rank#24] - : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] - : +- *(12) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 - : +- Exchange SinglePartition - : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(cs_item_sk#26, 5) - : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight - : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight - : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] - : : : +- *(10) Filter ((((((((isnotnull(cs_net_paid#30) && isnotnull(cs_quantity#28)) && isnotnull(cs_net_profit#35)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) - : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_order_number#33)) && isnotnull(cr_item_sk#34)) - : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number),..., ReadSchema: struct - : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] + : +- *(13) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 + : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] + : +- *(12) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(cs_item_sk#26, 5) + : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight + : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight + : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] + : : : +- *(10) Filter ((((((((isnotnull(cs_net_paid#30) && isnotnull(cs_quantity#28)) && isnotnull(cs_net_profit#35)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) + : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_item_sk#34)) && isnotnull(cr_order_number#33)) + : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_item_sk), IsNo..., ReadSchema: struct + : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(21) Project [store AS channel#36, item#37, return_ratio#38, return_rank#39, currency_rank#40] +- *(21) Filter ((return_rank#39 <= 10) || (currency_rank#40 <= 10)) +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] @@ -68,10 +68,10 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank# :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] - : : +- *(17) Filter ((((((((isnotnull(ss_net_paid#46) && isnotnull(ss_net_profit#51)) && isnotnull(ss_quantity#44)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) - : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), IsNotNull(ss_quantity), GreaterThan(ss_net_pro..., ReadSchema: struct 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) + : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_quantity), IsNotNull(ss_net_paid), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_item_sk#50)) && isnotnull(sr_ticket_number#49)) - : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_item_sk), IsNotNull(..., ReadSchema: struct + : +- *(15) Filter (((isnotnull(sr_return_amt#45) && (sr_return_amt#45 > 10000.00)) && isnotnull(sr_ticket_number#49)) && isnotnull(sr_item_sk#50)) + : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNo..., ReadSchema: struct +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt index 238eb448c..623f3acb4 100644 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt @@ -7,44 +7,6 @@ TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] HashAggregate [channel,currency_rank,item,return_rank,return_ratio] InputAdapter Union - WholeStageCodegen - Project [currency_rank,item,return_rank,return_ratio] - Filter [currency_rank,return_rank] - InputAdapter - Window [currency_ratio] - WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - InputAdapter - Exchange #2 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),ws_item_sk] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] - InputAdapter - Exchange [ws_item_sk] #3 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] - Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] - Filter [wr_item_sk,wr_order_number,wr_return_amt] - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] WholeStageCodegen Project [currency_rank,item,return_rank,return_ratio] Filter [currency_rank,return_rank] @@ -58,28 +20,66 @@ TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] WholeStageCodegen Sort [currency_ratio] InputAdapter - Exchange #6 + Exchange #2 WholeStageCodegen - HashAggregate [cs_item_sk,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] + HashAggregate [sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),ws_item_sk] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] InputAdapter - Exchange [cs_item_sk] #7 + Exchange [ws_item_sk] #3 WholeStageCodegen - HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,cs_sold_date_sk] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] - Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] + Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] InputAdapter - BroadcastExchange #8 + BroadcastExchange #4 WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] - Filter [cr_item_sk,cr_order_number,cr_return_amount] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + Filter [wr_item_sk,wr_order_number,wr_return_amt] + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + WholeStageCodegen + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [cs_item_sk,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen + HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + Filter [cr_item_sk,cr_order_number,cr_return_amount] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 WholeStageCodegen Project [currency_rank,item,return_rank,return_ratio] Filter [currency_rank,return_rank] From dd295a9629518f96ab8d9a9ba1f763c6b5edc659 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Fri, 12 Mar 2021 11:13:44 -0800 Subject: [PATCH 15/31] remove rogue query --- .../approved-plans-v1_4/q49/explain.txt | 77 ----------- .../approved-plans-v1_4/q49/simplified.txt | 117 ---------------- src/test/resources/tpcds/q49.sql | 126 ------------------ 3 files changed, 320 deletions(-) delete mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt delete mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt delete mode 100644 src/test/resources/tpcds/q49.sql diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt deleted file mode 100644 index 1b4779912..000000000 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt +++ /dev/null @@ -1,77 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank#2 ASC NULLS FIRST,currency_rank#3 ASC NULLS FIRST], output=[channel#1,item#4,return_ratio#5,return_rank#2,currency_rank#3]) -+- *(23) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) - +- Exchange hashpartitioning(channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3, 5) - +- *(22) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) - +- Union - :- *(7) Project [web AS channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3] - : +- *(7) Filter ((return_rank#2 <= 10) || (currency_rank#3 <= 10)) - : +- Window [rank(return_ratio#5) windowspecdefinition(return_ratio#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#2], [return_ratio#5 ASC NULLS FIRST] - : +- *(6) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 - : +- *(6) Project [item#4, return_ratio#5, currency_rank#3] - : +- Window [rank(currency_ratio#6) windowspecdefinition(currency_ratio#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#3], [currency_ratio#6 ASC NULLS FIRST] - : +- *(5) Sort [currency_ratio#6 ASC NULLS FIRST], false, 0 - : +- Exchange SinglePartition - : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(ws_item_sk#7, 5) - : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) - : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] - : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] - : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight - : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] - : : : +- *(3) Filter ((((((((isnotnull(ws_net_paid#11) && isnotnull(ws_quantity#9)) && isnotnull(ws_net_profit#17)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) - : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_paid), IsNotNull(ws_quantity), IsNotNull(ws_net_profit), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_order_number#15)) && isnotnull(wr_item_sk#16)) - : : +- *(1) FileScan parquet default.web_returns[wr_item_sk#16,wr_order_number#15,wr_return_quantity#8,wr_return_amt#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNot..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#13] - : +- *(2) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#19)) && (d_year#18 = 2001)) && (d_moy#19 = 12)) && isnotnull(d_date_sk#13)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct - :- *(14) Project [catalog AS channel#20, item#21, return_ratio#22, return_rank#23, currency_rank#24] - : +- *(14) Filter ((return_rank#23 <= 10) || (currency_rank#24 <= 10)) - : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] - : +- *(13) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 - : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] - : +- *(12) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 - : +- Exchange SinglePartition - : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(cs_item_sk#26, 5) - : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight - : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight - : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] - : : : +- *(10) Filter ((((((((isnotnull(cs_net_paid#30) && isnotnull(cs_quantity#28)) && isnotnull(cs_net_profit#35)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) - : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_item_sk#34)) && isnotnull(cr_order_number#33)) - : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_item_sk), IsNo..., ReadSchema: struct - : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(21) Project [store AS channel#36, item#37, return_ratio#38, return_rank#39, currency_rank#40] - +- *(21) Filter ((return_rank#39 <= 10) || (currency_rank#40 <= 10)) - +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] - +- *(20) Sort [return_ratio#38 ASC NULLS FIRST], false, 0 - +- *(20) Project [item#37, return_ratio#38, currency_rank#40] - +- Window [rank(currency_ratio#41) windowspecdefinition(currency_ratio#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#40], [currency_ratio#41 ASC NULLS FIRST] - +- *(19) Sort [currency_ratio#41 ASC NULLS FIRST], false, 0 - +- Exchange SinglePartition - +- *(18) HashAggregate(keys=[ss_item_sk#42], functions=[sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), sum(cast(coalesce(ss_quantity#44, 0) as bigint)), sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) - +- Exchange hashpartitioning(ss_item_sk#42, 5) - +- *(17) HashAggregate(keys=[ss_item_sk#42], functions=[partial_sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) - +- *(17) Project [ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] - +- *(17) BroadcastHashJoin [ss_sold_date_sk#47], [d_date_sk#13], Inner, BuildRight - :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] - : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight - : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] - : : +- *(17) Filter ((((((((isnotnull(ss_net_profit#51) && isnotnull(ss_quantity#44)) && isnotnull(ss_net_paid#46)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) - : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_quantity), IsNotNull(ss_net_paid), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_ticket_number#49)) && isnotnull(sr_item_sk#50)) - : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNo..., ReadSchema: struct - +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt deleted file mode 100644 index 623f3acb4..000000000 --- a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt +++ /dev/null @@ -1,117 +0,0 @@ -TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] - WholeStageCodegen - HashAggregate [channel,currency_rank,item,return_rank,return_ratio] - InputAdapter - Exchange [channel,currency_rank,item,return_rank,return_ratio] #1 - WholeStageCodegen - HashAggregate [channel,currency_rank,item,return_rank,return_ratio] - InputAdapter - Union - WholeStageCodegen - Project [currency_rank,item,return_rank,return_ratio] - Filter [currency_rank,return_rank] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - Project [currency_rank,item,return_ratio] - InputAdapter - Window [currency_ratio] - WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Exchange #2 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),ws_item_sk] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] - InputAdapter - Exchange [ws_item_sk] #3 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] - Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] - Filter [wr_item_sk,wr_order_number,wr_return_amt] - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - WholeStageCodegen - Project [currency_rank,item,return_rank,return_ratio] - Filter [currency_rank,return_rank] - InputAdapter - Window [currency_ratio] - WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - InputAdapter - Exchange #6 - WholeStageCodegen - HashAggregate [cs_item_sk,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] - InputAdapter - Exchange [cs_item_sk] #7 - WholeStageCodegen - HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,cs_sold_date_sk] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] - Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] - Filter [cr_item_sk,cr_order_number,cr_return_amount] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 - WholeStageCodegen - Project [currency_rank,item,return_rank,return_ratio] - Filter [currency_rank,return_rank] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - Project [currency_rank,item,return_ratio] - InputAdapter - Window [currency_ratio] - WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Exchange #9 - WholeStageCodegen - HashAggregate [ss_item_sk,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] - InputAdapter - Exchange [ss_item_sk] #10 - WholeStageCodegen - HashAggregate [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk,ss_ticket_number] - Filter [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - Project [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] - Filter [sr_item_sk,sr_return_amt,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds/q49.sql b/src/test/resources/tpcds/q49.sql deleted file mode 100644 index 9568d8b92..000000000 --- a/src/test/resources/tpcds/q49.sql +++ /dev/null @@ -1,126 +0,0 @@ -SELECT - 'web' AS channel, - web.item, - web.return_ratio, - web.return_rank, - web.currency_rank -FROM ( - SELECT - item, - return_ratio, - currency_ratio, - rank() - OVER ( - ORDER BY return_ratio) AS return_rank, - rank() - OVER ( - ORDER BY currency_ratio) AS currency_rank - FROM - (SELECT - ws.ws_item_sk AS item, - (cast(sum(coalesce(wr.wr_return_quantity, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(ws.ws_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, - (cast(sum(coalesce(wr.wr_return_amt, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(ws.ws_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio - FROM - web_sales ws LEFT OUTER JOIN web_returns wr - ON (ws.ws_order_number = wr.wr_order_number AND - ws.ws_item_sk = wr.wr_item_sk) - , date_dim - WHERE - wr.wr_return_amt > 10000 - AND ws.ws_net_profit > 1 - AND ws.ws_net_paid > 0 - AND ws.ws_quantity > 0 - AND ws_sold_date_sk = d_date_sk - AND d_year = 2001 - AND d_moy = 12 - GROUP BY ws.ws_item_sk - ) in_web - ) web -WHERE (web.return_rank <= 10 OR web.currency_rank <= 10) -UNION -SELECT - 'catalog' AS channel, - catalog.item, - catalog.return_ratio, - catalog.return_rank, - catalog.currency_rank -FROM ( - SELECT - item, - return_ratio, - currency_ratio, - rank() - OVER ( - ORDER BY return_ratio) AS return_rank, - rank() - OVER ( - ORDER BY currency_ratio) AS currency_rank - FROM - (SELECT - cs.cs_item_sk AS item, - (cast(sum(coalesce(cr.cr_return_quantity, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(cs.cs_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, - (cast(sum(coalesce(cr.cr_return_amount, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(cs.cs_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio - FROM - catalog_sales cs LEFT OUTER JOIN catalog_returns cr - ON (cs.cs_order_number = cr.cr_order_number AND - cs.cs_item_sk = cr.cr_item_sk) - , date_dim - WHERE - cr.cr_return_amount > 10000 - AND cs.cs_net_profit > 1 - AND cs.cs_net_paid > 0 - AND cs.cs_quantity > 0 - AND cs_sold_date_sk = d_date_sk - AND d_year = 2001 - AND d_moy = 12 - GROUP BY cs.cs_item_sk - ) in_cat - ) catalog -WHERE (catalog.return_rank <= 10 OR catalog.currency_rank <= 10) -UNION -SELECT - 'store' AS channel, - store.item, - store.return_ratio, - store.return_rank, - store.currency_rank -FROM ( - SELECT - item, - return_ratio, - currency_ratio, - rank() - OVER ( - ORDER BY return_ratio) AS return_rank, - rank() - OVER ( - ORDER BY currency_ratio) AS currency_rank - FROM - (SELECT - sts.ss_item_sk AS item, - (cast(sum(coalesce(sr.sr_return_quantity, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(sts.ss_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, - (cast(sum(coalesce(sr.sr_return_amt, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(sts.ss_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio - FROM - store_sales sts LEFT OUTER JOIN store_returns sr - ON (sts.ss_ticket_number = sr.sr_ticket_number AND sts.ss_item_sk = sr.sr_item_sk) - , date_dim - WHERE - sr.sr_return_amt > 10000 - AND sts.ss_net_profit > 1 - AND sts.ss_net_paid > 0 - AND sts.ss_quantity > 0 - AND ss_sold_date_sk = d_date_sk - AND d_year = 2001 - AND d_moy = 12 - GROUP BY sts.ss_item_sk - ) in_store - ) store -WHERE (store.return_rank <= 10 OR store.currency_rank <= 10) -ORDER BY 1, 4, 5 -LIMIT 100 From f47fbd3500fff30a86d68bae7f1f3cbaea2ea8e4 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Fri, 12 Mar 2021 11:34:33 -0800 Subject: [PATCH 16/31] remove q49 --- .../com/microsoft/hyperspace/goldstandard/TPCDSBase.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala index 6c48e37b9..8708187ab 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala @@ -36,13 +36,14 @@ trait TPCDSBase extends SparkFunSuite with SparkInvolvedSuite { val conf = SQLConf.get - // The TPCDS queries below are based on v1.4 + // The TPCDS queries below are based on v1.4. + // TODO: Fix bulid pipeline for q49 and reenable q49. val tpcdsQueries = Seq( "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40", - "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q49", "q50", + "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", /*"q49"*/, "q50", "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60", "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70", "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", From d15a4e8c9ac4b8a3cd5050f650e0530ff4296693 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Fri, 12 Mar 2021 12:07:15 -0800 Subject: [PATCH 17/31] fix scalastyle --- .../scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala index 8708187ab..4be946432 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala @@ -43,7 +43,7 @@ trait TPCDSBase extends SparkFunSuite with SparkInvolvedSuite { "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40", - "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", /*"q49"*/, "q50", + "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q50", "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60", "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70", "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", From 4c511fb7370cfea54e34cf9b688daa396a92aece Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Fri, 12 Mar 2021 12:41:36 -0800 Subject: [PATCH 18/31] add query files for q49.sql --- .../approved-plans-v1_4/q49/explain.txt | 77 +++++++++++ .../approved-plans-v1_4/q49/simplified.txt | 117 ++++++++++++++++ src/test/resources/tpcds/q49.sql | 126 ++++++++++++++++++ 3 files changed, 320 insertions(+) create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt create mode 100644 src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt create mode 100644 src/test/resources/tpcds/q49.sql diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt new file mode 100644 index 000000000..2b0459139 --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt @@ -0,0 +1,77 @@ +== Physical Plan == +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank#2 ASC NULLS FIRST,currency_rank#3 ASC NULLS FIRST], output=[channel#1,item#4,return_ratio#5,return_rank#2,currency_rank#3]) ++- *(23) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) + +- Exchange hashpartitioning(channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3, 5) + +- *(22) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) + +- Union + :- *(7) Project [web AS channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3] + : +- *(7) Filter ((return_rank#2 <= 10) || (currency_rank#3 <= 10)) + : +- Window [rank(currency_ratio#6) windowspecdefinition(currency_ratio#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#3], [currency_ratio#6 ASC NULLS FIRST] + : +- *(6) Sort [currency_ratio#6 ASC NULLS FIRST], false, 0 + : +- Window [rank(return_ratio#5) windowspecdefinition(return_ratio#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#2], [return_ratio#5 ASC NULLS FIRST] + : +- *(5) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(ws_item_sk#7, 5) + : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) + : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] + : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight + : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] + : : : +- *(3) Filter ((((((((isnotnull(ws_net_profit#17) && isnotnull(ws_net_paid#11)) && isnotnull(ws_quantity#9)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_item_sk#16)) && isnotnull(wr_order_number#15)) + : : +- *(1) FileScan parquet default.web_returns[wr_item_sk#16,wr_order_number#15,wr_return_quantity#8,wr_return_amt#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_item_sk), IsNotNull(..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#13] + : +- *(2) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#19)) && (d_year#18 = 2001)) && (d_moy#19 = 12)) && isnotnull(d_date_sk#13)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct + :- *(14) Project [catalog AS channel#20, item#21, return_ratio#22, return_rank#23, currency_rank#24] + : +- *(14) Filter ((return_rank#23 <= 10) || (currency_rank#24 <= 10)) + : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] + : +- *(13) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 + : +- *(13) Project [item#21, return_ratio#22, currency_rank#24] + : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] + : +- *(12) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 + : +- Exchange SinglePartition + : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- Exchange hashpartitioning(cs_item_sk#26, 5) + : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) + : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight + : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] + : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight + : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] + : : : +- *(10) Filter ((((((((isnotnull(cs_net_paid#30) && isnotnull(cs_quantity#28)) && isnotnull(cs_net_profit#35)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) + : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_order_number#33)) && isnotnull(cr_item_sk#34)) + : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number),..., ReadSchema: struct + : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(21) Project [store AS channel#36, item#37, return_ratio#38, return_rank#39, currency_rank#40] + +- *(21) Filter ((return_rank#39 <= 10) || (currency_rank#40 <= 10)) + +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] + +- *(20) Sort [return_ratio#38 ASC NULLS FIRST], false, 0 + +- *(20) Project [item#37, return_ratio#38, currency_rank#40] + +- Window [rank(currency_ratio#41) windowspecdefinition(currency_ratio#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#40], [currency_ratio#41 ASC NULLS FIRST] + +- *(19) Sort [currency_ratio#41 ASC NULLS FIRST], false, 0 + +- Exchange SinglePartition + +- *(18) HashAggregate(keys=[ss_item_sk#42], functions=[sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), sum(cast(coalesce(ss_quantity#44, 0) as bigint)), sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- Exchange hashpartitioning(ss_item_sk#42, 5) + +- *(17) HashAggregate(keys=[ss_item_sk#42], functions=[partial_sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) + +- *(17) Project [ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + +- *(17) BroadcastHashJoin [ss_sold_date_sk#47], [d_date_sk#13], Inner, BuildRight + :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] + : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight + : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] + : : +- *(17) Filter ((((((((isnotnull(ss_net_paid#46) && isnotnull(ss_net_profit#51)) && isnotnull(ss_quantity#44)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) + : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), IsNotNull(ss_quantity), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_item_sk#50)) && isnotnull(sr_ticket_number#49)) + : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_item_sk), IsNotNull(..., ReadSchema: struct + +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt new file mode 100644 index 000000000..238eb448c --- /dev/null +++ b/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt @@ -0,0 +1,117 @@ +TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] + WholeStageCodegen + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] + InputAdapter + Exchange [channel,currency_rank,item,return_rank,return_ratio] #1 + WholeStageCodegen + HashAggregate [channel,currency_rank,item,return_rank,return_ratio] + InputAdapter + Union + WholeStageCodegen + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),ws_item_sk] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] + Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + Filter [wr_item_sk,wr_order_number,wr_return_amt] + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + WholeStageCodegen + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + Project [currency_rank,item,return_ratio] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen + HashAggregate [cs_item_sk,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen + HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,cs_sold_date_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + Filter [cr_item_sk,cr_order_number,cr_return_amount] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 + WholeStageCodegen + Project [currency_rank,item,return_rank,return_ratio] + Filter [currency_rank,return_rank] + InputAdapter + Window [return_ratio] + WholeStageCodegen + Sort [return_ratio] + Project [currency_rank,item,return_ratio] + InputAdapter + Window [currency_ratio] + WholeStageCodegen + Sort [currency_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen + HashAggregate [ss_item_sk,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen + HashAggregate [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk,ss_ticket_number] + Filter [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen + Project [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + Filter [sr_item_sk,sr_return_amt,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds/q49.sql b/src/test/resources/tpcds/q49.sql new file mode 100644 index 000000000..9568d8b92 --- /dev/null +++ b/src/test/resources/tpcds/q49.sql @@ -0,0 +1,126 @@ +SELECT + 'web' AS channel, + web.item, + web.return_ratio, + web.return_rank, + web.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + ws.ws_item_sk AS item, + (cast(sum(coalesce(wr.wr_return_quantity, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(ws.ws_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, + (cast(sum(coalesce(wr.wr_return_amt, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(ws.ws_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio + FROM + web_sales ws LEFT OUTER JOIN web_returns wr + ON (ws.ws_order_number = wr.wr_order_number AND + ws.ws_item_sk = wr.wr_item_sk) + , date_dim + WHERE + wr.wr_return_amt > 10000 + AND ws.ws_net_profit > 1 + AND ws.ws_net_paid > 0 + AND ws.ws_quantity > 0 + AND ws_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY ws.ws_item_sk + ) in_web + ) web +WHERE (web.return_rank <= 10 OR web.currency_rank <= 10) +UNION +SELECT + 'catalog' AS channel, + catalog.item, + catalog.return_ratio, + catalog.return_rank, + catalog.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + cs.cs_item_sk AS item, + (cast(sum(coalesce(cr.cr_return_quantity, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(cs.cs_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, + (cast(sum(coalesce(cr.cr_return_amount, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(cs.cs_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio + FROM + catalog_sales cs LEFT OUTER JOIN catalog_returns cr + ON (cs.cs_order_number = cr.cr_order_number AND + cs.cs_item_sk = cr.cr_item_sk) + , date_dim + WHERE + cr.cr_return_amount > 10000 + AND cs.cs_net_profit > 1 + AND cs.cs_net_paid > 0 + AND cs.cs_quantity > 0 + AND cs_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY cs.cs_item_sk + ) in_cat + ) catalog +WHERE (catalog.return_rank <= 10 OR catalog.currency_rank <= 10) +UNION +SELECT + 'store' AS channel, + store.item, + store.return_ratio, + store.return_rank, + store.currency_rank +FROM ( + SELECT + item, + return_ratio, + currency_ratio, + rank() + OVER ( + ORDER BY return_ratio) AS return_rank, + rank() + OVER ( + ORDER BY currency_ratio) AS currency_rank + FROM + (SELECT + sts.ss_item_sk AS item, + (cast(sum(coalesce(sr.sr_return_quantity, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(sts.ss_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, + (cast(sum(coalesce(sr.sr_return_amt, 0)) AS DECIMAL(15, 4)) / + cast(sum(coalesce(sts.ss_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio + FROM + store_sales sts LEFT OUTER JOIN store_returns sr + ON (sts.ss_ticket_number = sr.sr_ticket_number AND sts.ss_item_sk = sr.sr_item_sk) + , date_dim + WHERE + sr.sr_return_amt > 10000 + AND sts.ss_net_profit > 1 + AND sts.ss_net_paid > 0 + AND sts.ss_quantity > 0 + AND ss_sold_date_sk = d_date_sk + AND d_year = 2001 + AND d_moy = 12 + GROUP BY sts.ss_item_sk + ) in_store + ) store +WHERE (store.return_rank <= 10 OR store.currency_rank <= 10) +ORDER BY 1, 4, 5 +LIMIT 100 From e48c71390b237cf1f6ac33abc1f8f0e70468cf61 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Fri, 12 Mar 2021 13:32:47 -0800 Subject: [PATCH 19/31] restructuring --- src/test/resources/tpcds/{ => queries}/q1.sql | 0 src/test/resources/tpcds/{ => queries}/q10.sql | 0 src/test/resources/tpcds/{ => queries}/q11.sql | 0 src/test/resources/tpcds/{ => queries}/q12.sql | 0 src/test/resources/tpcds/{ => queries}/q13.sql | 0 src/test/resources/tpcds/{ => queries}/q14a.sql | 0 src/test/resources/tpcds/{ => queries}/q14b.sql | 0 src/test/resources/tpcds/{ => queries}/q15.sql | 0 src/test/resources/tpcds/{ => queries}/q16.sql | 0 src/test/resources/tpcds/{ => queries}/q17.sql | 0 src/test/resources/tpcds/{ => queries}/q18.sql | 0 src/test/resources/tpcds/{ => queries}/q19.sql | 0 src/test/resources/tpcds/{ => queries}/q2.sql | 0 src/test/resources/tpcds/{ => queries}/q20.sql | 0 src/test/resources/tpcds/{ => queries}/q21.sql | 0 src/test/resources/tpcds/{ => queries}/q22.sql | 0 src/test/resources/tpcds/{ => queries}/q23a.sql | 0 src/test/resources/tpcds/{ => queries}/q23b.sql | 0 src/test/resources/tpcds/{ => queries}/q24a.sql | 0 src/test/resources/tpcds/{ => queries}/q24b.sql | 0 src/test/resources/tpcds/{ => queries}/q25.sql | 0 src/test/resources/tpcds/{ => queries}/q26.sql | 0 src/test/resources/tpcds/{ => queries}/q27.sql | 0 src/test/resources/tpcds/{ => queries}/q28.sql | 0 src/test/resources/tpcds/{ => queries}/q29.sql | 0 src/test/resources/tpcds/{ => queries}/q3.sql | 0 src/test/resources/tpcds/{ => queries}/q30.sql | 0 src/test/resources/tpcds/{ => queries}/q31.sql | 0 src/test/resources/tpcds/{ => queries}/q32.sql | 0 src/test/resources/tpcds/{ => queries}/q33.sql | 0 src/test/resources/tpcds/{ => queries}/q34.sql | 0 src/test/resources/tpcds/{ => queries}/q35.sql | 0 src/test/resources/tpcds/{ => queries}/q36.sql | 0 src/test/resources/tpcds/{ => queries}/q37.sql | 0 src/test/resources/tpcds/{ => queries}/q38.sql | 0 src/test/resources/tpcds/{ => queries}/q39a.sql | 0 src/test/resources/tpcds/{ => queries}/q39b.sql | 0 src/test/resources/tpcds/{ => queries}/q4.sql | 0 src/test/resources/tpcds/{ => queries}/q40.sql | 0 src/test/resources/tpcds/{ => queries}/q41.sql | 0 src/test/resources/tpcds/{ => queries}/q42.sql | 0 src/test/resources/tpcds/{ => queries}/q43.sql | 0 src/test/resources/tpcds/{ => queries}/q44.sql | 0 src/test/resources/tpcds/{ => queries}/q45.sql | 0 src/test/resources/tpcds/{ => queries}/q46.sql | 0 src/test/resources/tpcds/{ => queries}/q47.sql | 0 src/test/resources/tpcds/{ => queries}/q48.sql | 0 src/test/resources/tpcds/{ => queries}/q49.sql | 0 src/test/resources/tpcds/{ => queries}/q5.sql | 0 src/test/resources/tpcds/{ => queries}/q50.sql | 0 src/test/resources/tpcds/{ => queries}/q51.sql | 0 src/test/resources/tpcds/{ => queries}/q52.sql | 0 src/test/resources/tpcds/{ => queries}/q53.sql | 0 src/test/resources/tpcds/{ => queries}/q54.sql | 0 src/test/resources/tpcds/{ => queries}/q55.sql | 0 src/test/resources/tpcds/{ => queries}/q56.sql | 0 src/test/resources/tpcds/{ => queries}/q57.sql | 0 src/test/resources/tpcds/{ => queries}/q58.sql | 0 src/test/resources/tpcds/{ => queries}/q59.sql | 0 src/test/resources/tpcds/{ => queries}/q6.sql | 0 src/test/resources/tpcds/{ => queries}/q60.sql | 0 src/test/resources/tpcds/{ => queries}/q61.sql | 0 src/test/resources/tpcds/{ => queries}/q62.sql | 0 src/test/resources/tpcds/{ => queries}/q63.sql | 0 src/test/resources/tpcds/{ => queries}/q64.sql | 0 src/test/resources/tpcds/{ => queries}/q65.sql | 0 src/test/resources/tpcds/{ => queries}/q66.sql | 0 src/test/resources/tpcds/{ => queries}/q67.sql | 0 src/test/resources/tpcds/{ => queries}/q68.sql | 0 src/test/resources/tpcds/{ => queries}/q69.sql | 0 src/test/resources/tpcds/{ => queries}/q7.sql | 0 src/test/resources/tpcds/{ => queries}/q70.sql | 0 src/test/resources/tpcds/{ => queries}/q71.sql | 0 src/test/resources/tpcds/{ => queries}/q72.sql | 0 src/test/resources/tpcds/{ => queries}/q73.sql | 0 src/test/resources/tpcds/{ => queries}/q74.sql | 0 src/test/resources/tpcds/{ => queries}/q75.sql | 0 src/test/resources/tpcds/{ => queries}/q76.sql | 0 src/test/resources/tpcds/{ => queries}/q77.sql | 0 src/test/resources/tpcds/{ => queries}/q78.sql | 0 src/test/resources/tpcds/{ => queries}/q79.sql | 0 src/test/resources/tpcds/{ => queries}/q8.sql | 0 src/test/resources/tpcds/{ => queries}/q80.sql | 0 src/test/resources/tpcds/{ => queries}/q81.sql | 0 src/test/resources/tpcds/{ => queries}/q82.sql | 0 src/test/resources/tpcds/{ => queries}/q83.sql | 0 src/test/resources/tpcds/{ => queries}/q84.sql | 0 src/test/resources/tpcds/{ => queries}/q85.sql | 0 src/test/resources/tpcds/{ => queries}/q86.sql | 0 src/test/resources/tpcds/{ => queries}/q87.sql | 0 src/test/resources/tpcds/{ => queries}/q88.sql | 0 src/test/resources/tpcds/{ => queries}/q89.sql | 0 src/test/resources/tpcds/{ => queries}/q9.sql | 0 src/test/resources/tpcds/{ => queries}/q90.sql | 0 src/test/resources/tpcds/{ => queries}/q91.sql | 0 src/test/resources/tpcds/{ => queries}/q92.sql | 0 src/test/resources/tpcds/{ => queries}/q93.sql | 0 src/test/resources/tpcds/{ => queries}/q94.sql | 0 src/test/resources/tpcds/{ => queries}/q95.sql | 0 src/test/resources/tpcds/{ => queries}/q96.sql | 0 src/test/resources/tpcds/{ => queries}/q97.sql | 0 src/test/resources/tpcds/{ => queries}/q98.sql | 0 src/test/resources/tpcds/{ => queries}/q99.sql | 0 .../spark-2.4}/approved-plans-v1_4/q1/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q1/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q10/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q10/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q11/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q11/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q12/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q12/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q13/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q13/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q14a/explain.txt | 0 .../approved-plans-v1_4/q14a/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q14b/explain.txt | 0 .../approved-plans-v1_4/q14b/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q15/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q15/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q16/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q16/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q17/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q17/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q18/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q18/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q19/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q19/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q2/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q2/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q20/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q20/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q21/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q21/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q22/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q22/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q23a/explain.txt | 0 .../approved-plans-v1_4/q23a/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q23b/explain.txt | 0 .../approved-plans-v1_4/q23b/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q24a/explain.txt | 0 .../approved-plans-v1_4/q24a/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q24b/explain.txt | 0 .../approved-plans-v1_4/q24b/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q25/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q25/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q26/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q26/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q27/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q27/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q28/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q28/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q29/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q29/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q3/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q3/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q30/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q30/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q31/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q31/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q32/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q32/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q33/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q33/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q34/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q34/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q35/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q35/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q36/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q36/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q37/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q37/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q38/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q38/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q39a/explain.txt | 0 .../approved-plans-v1_4/q39a/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q39b/explain.txt | 0 .../approved-plans-v1_4/q39b/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q4/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q4/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q40/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q40/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q41/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q41/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q42/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q42/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q43/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q43/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q44/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q44/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q45/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q45/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q46/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q46/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q47/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q47/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q48/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q48/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q49/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q49/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q5/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q5/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q50/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q50/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q51/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q51/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q52/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q52/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q53/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q53/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q54/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q54/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q55/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q55/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q56/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q56/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q57/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q57/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q58/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q58/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q59/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q59/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q6/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q6/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q60/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q60/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q61/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q61/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q62/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q62/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q63/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q63/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q64/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q64/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q65/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q65/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q66/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q66/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q67/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q67/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q68/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q68/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q69/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q69/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q7/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q7/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q70/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q70/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q71/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q71/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q72/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q72/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q73/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q73/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q74/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q74/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q75/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q75/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q76/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q76/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q77/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q77/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q78/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q78/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q79/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q79/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q8/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q8/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q80/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q80/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q81/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q81/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q82/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q82/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q83/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q83/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q84/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q84/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q85/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q85/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q86/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q86/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q87/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q87/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q88/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q88/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q89/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q89/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q9/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q9/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q90/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q90/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q91/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q91/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q92/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q92/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q93/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q93/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q94/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q94/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q95/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q95/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q96/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q96/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q97/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q97/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q98/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q98/simplified.txt | 0 .../spark-2.4}/approved-plans-v1_4/q99/explain.txt | 0 .../spark-2.4}/approved-plans-v1_4/q99/simplified.txt | 0 .../hyperspace/goldstandard/PlanStabilitySuite.scala | 11 +++++++---- 310 files changed, 7 insertions(+), 4 deletions(-) rename src/test/resources/tpcds/{ => queries}/q1.sql (100%) rename src/test/resources/tpcds/{ => queries}/q10.sql (100%) rename src/test/resources/tpcds/{ => queries}/q11.sql (100%) rename src/test/resources/tpcds/{ => queries}/q12.sql (100%) rename src/test/resources/tpcds/{ => queries}/q13.sql (100%) rename src/test/resources/tpcds/{ => queries}/q14a.sql (100%) rename src/test/resources/tpcds/{ => queries}/q14b.sql (100%) rename src/test/resources/tpcds/{ => queries}/q15.sql (100%) rename src/test/resources/tpcds/{ => queries}/q16.sql (100%) rename src/test/resources/tpcds/{ => queries}/q17.sql (100%) rename src/test/resources/tpcds/{ => queries}/q18.sql (100%) rename src/test/resources/tpcds/{ => queries}/q19.sql (100%) rename src/test/resources/tpcds/{ => queries}/q2.sql (100%) rename src/test/resources/tpcds/{ => queries}/q20.sql (100%) rename src/test/resources/tpcds/{ => queries}/q21.sql (100%) rename src/test/resources/tpcds/{ => queries}/q22.sql (100%) rename src/test/resources/tpcds/{ => queries}/q23a.sql (100%) rename src/test/resources/tpcds/{ => queries}/q23b.sql (100%) rename src/test/resources/tpcds/{ => queries}/q24a.sql (100%) rename src/test/resources/tpcds/{ => queries}/q24b.sql (100%) rename src/test/resources/tpcds/{ => queries}/q25.sql (100%) rename src/test/resources/tpcds/{ => queries}/q26.sql (100%) rename src/test/resources/tpcds/{ => queries}/q27.sql (100%) rename src/test/resources/tpcds/{ => queries}/q28.sql (100%) rename src/test/resources/tpcds/{ => queries}/q29.sql (100%) rename src/test/resources/tpcds/{ => queries}/q3.sql (100%) rename src/test/resources/tpcds/{ => queries}/q30.sql (100%) rename src/test/resources/tpcds/{ => queries}/q31.sql (100%) rename src/test/resources/tpcds/{ => queries}/q32.sql (100%) rename src/test/resources/tpcds/{ => queries}/q33.sql (100%) rename src/test/resources/tpcds/{ => queries}/q34.sql (100%) rename src/test/resources/tpcds/{ => queries}/q35.sql (100%) rename src/test/resources/tpcds/{ => queries}/q36.sql (100%) rename src/test/resources/tpcds/{ => queries}/q37.sql (100%) rename src/test/resources/tpcds/{ => queries}/q38.sql (100%) rename src/test/resources/tpcds/{ => queries}/q39a.sql (100%) rename src/test/resources/tpcds/{ => queries}/q39b.sql (100%) rename src/test/resources/tpcds/{ => queries}/q4.sql (100%) rename src/test/resources/tpcds/{ => queries}/q40.sql (100%) rename src/test/resources/tpcds/{ => queries}/q41.sql (100%) rename src/test/resources/tpcds/{ => queries}/q42.sql (100%) rename src/test/resources/tpcds/{ => queries}/q43.sql (100%) rename src/test/resources/tpcds/{ => queries}/q44.sql (100%) rename src/test/resources/tpcds/{ => queries}/q45.sql (100%) rename src/test/resources/tpcds/{ => queries}/q46.sql (100%) rename src/test/resources/tpcds/{ => queries}/q47.sql (100%) rename src/test/resources/tpcds/{ => queries}/q48.sql (100%) rename src/test/resources/tpcds/{ => queries}/q49.sql (100%) rename src/test/resources/tpcds/{ => queries}/q5.sql (100%) rename src/test/resources/tpcds/{ => queries}/q50.sql (100%) rename src/test/resources/tpcds/{ => queries}/q51.sql (100%) rename src/test/resources/tpcds/{ => queries}/q52.sql (100%) rename src/test/resources/tpcds/{ => queries}/q53.sql (100%) rename src/test/resources/tpcds/{ => queries}/q54.sql (100%) rename src/test/resources/tpcds/{ => queries}/q55.sql (100%) rename src/test/resources/tpcds/{ => queries}/q56.sql (100%) rename src/test/resources/tpcds/{ => queries}/q57.sql (100%) rename src/test/resources/tpcds/{ => queries}/q58.sql (100%) rename src/test/resources/tpcds/{ => queries}/q59.sql (100%) rename src/test/resources/tpcds/{ => queries}/q6.sql (100%) rename src/test/resources/tpcds/{ => queries}/q60.sql (100%) rename src/test/resources/tpcds/{ => queries}/q61.sql (100%) rename src/test/resources/tpcds/{ => queries}/q62.sql (100%) rename src/test/resources/tpcds/{ => queries}/q63.sql (100%) rename src/test/resources/tpcds/{ => queries}/q64.sql (100%) rename src/test/resources/tpcds/{ => queries}/q65.sql (100%) rename src/test/resources/tpcds/{ => queries}/q66.sql (100%) rename src/test/resources/tpcds/{ => queries}/q67.sql (100%) rename src/test/resources/tpcds/{ => queries}/q68.sql (100%) rename src/test/resources/tpcds/{ => queries}/q69.sql (100%) rename src/test/resources/tpcds/{ => queries}/q7.sql (100%) rename src/test/resources/tpcds/{ => queries}/q70.sql (100%) rename src/test/resources/tpcds/{ => queries}/q71.sql (100%) rename src/test/resources/tpcds/{ => queries}/q72.sql (100%) rename src/test/resources/tpcds/{ => queries}/q73.sql (100%) rename src/test/resources/tpcds/{ => queries}/q74.sql (100%) rename src/test/resources/tpcds/{ => queries}/q75.sql (100%) rename src/test/resources/tpcds/{ => queries}/q76.sql (100%) rename src/test/resources/tpcds/{ => queries}/q77.sql (100%) rename src/test/resources/tpcds/{ => queries}/q78.sql (100%) rename src/test/resources/tpcds/{ => queries}/q79.sql (100%) rename src/test/resources/tpcds/{ => queries}/q8.sql (100%) rename src/test/resources/tpcds/{ => queries}/q80.sql (100%) rename src/test/resources/tpcds/{ => queries}/q81.sql (100%) rename src/test/resources/tpcds/{ => queries}/q82.sql (100%) rename src/test/resources/tpcds/{ => queries}/q83.sql (100%) rename src/test/resources/tpcds/{ => queries}/q84.sql (100%) rename src/test/resources/tpcds/{ => queries}/q85.sql (100%) rename src/test/resources/tpcds/{ => queries}/q86.sql (100%) rename src/test/resources/tpcds/{ => queries}/q87.sql (100%) rename src/test/resources/tpcds/{ => queries}/q88.sql (100%) rename src/test/resources/tpcds/{ => queries}/q89.sql (100%) rename src/test/resources/tpcds/{ => queries}/q9.sql (100%) rename src/test/resources/tpcds/{ => queries}/q90.sql (100%) rename src/test/resources/tpcds/{ => queries}/q91.sql (100%) rename src/test/resources/tpcds/{ => queries}/q92.sql (100%) rename src/test/resources/tpcds/{ => queries}/q93.sql (100%) rename src/test/resources/tpcds/{ => queries}/q94.sql (100%) rename src/test/resources/tpcds/{ => queries}/q95.sql (100%) rename src/test/resources/tpcds/{ => queries}/q96.sql (100%) rename src/test/resources/tpcds/{ => queries}/q97.sql (100%) rename src/test/resources/tpcds/{ => queries}/q98.sql (100%) rename src/test/resources/tpcds/{ => queries}/q99.sql (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q1/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q1/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q10/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q10/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q11/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q11/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q12/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q12/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q13/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q13/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q14a/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q14a/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q14b/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q14b/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q15/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q15/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q16/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q16/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q17/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q17/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q18/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q18/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q19/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q19/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q2/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q2/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q20/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q20/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q21/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q21/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q22/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q22/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q23a/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q23a/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q23b/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q23b/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q24a/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q24a/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q24b/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q24b/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q25/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q25/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q26/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q26/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q27/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q27/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q28/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q28/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q29/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q29/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q3/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q3/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q30/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q30/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q31/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q31/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q32/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q32/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q33/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q33/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q34/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q34/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q35/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q35/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q36/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q36/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q37/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q37/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q38/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q38/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q39a/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q39a/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q39b/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q39b/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q4/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q4/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q40/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q40/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q41/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q41/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q42/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q42/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q43/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q43/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q44/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q44/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q45/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q45/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q46/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q46/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q47/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q47/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q48/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q48/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q49/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q49/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q5/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q5/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q50/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q50/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q51/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q51/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q52/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q52/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q53/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q53/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q54/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q54/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q55/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q55/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q56/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q56/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q57/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q57/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q58/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q58/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q59/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q59/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q6/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q6/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q60/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q60/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q61/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q61/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q62/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q62/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q63/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q63/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q64/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q64/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q65/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q65/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q66/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q66/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q67/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q67/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q68/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q68/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q69/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q69/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q7/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q7/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q70/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q70/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q71/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q71/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q72/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q72/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q73/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q73/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q74/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q74/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q75/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q75/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q76/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q76/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q77/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q77/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q78/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q78/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q79/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q79/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q8/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q8/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q80/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q80/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q81/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q81/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q82/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q82/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q83/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q83/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q84/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q84/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q85/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q85/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q86/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q86/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q87/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q87/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q88/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q88/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q89/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q89/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q9/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q9/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q90/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q90/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q91/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q91/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q92/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q92/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q93/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q93/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q94/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q94/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q95/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q95/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q96/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q96/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q97/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q97/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q98/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q98/simplified.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q99/explain.txt (100%) rename src/test/resources/{tpcds-plan-stability => tpcds/spark-2.4}/approved-plans-v1_4/q99/simplified.txt (100%) diff --git a/src/test/resources/tpcds/q1.sql b/src/test/resources/tpcds/queries/q1.sql similarity index 100% rename from src/test/resources/tpcds/q1.sql rename to src/test/resources/tpcds/queries/q1.sql diff --git a/src/test/resources/tpcds/q10.sql b/src/test/resources/tpcds/queries/q10.sql similarity index 100% rename from src/test/resources/tpcds/q10.sql rename to src/test/resources/tpcds/queries/q10.sql diff --git a/src/test/resources/tpcds/q11.sql b/src/test/resources/tpcds/queries/q11.sql similarity index 100% rename from src/test/resources/tpcds/q11.sql rename to src/test/resources/tpcds/queries/q11.sql diff --git a/src/test/resources/tpcds/q12.sql b/src/test/resources/tpcds/queries/q12.sql similarity index 100% rename from src/test/resources/tpcds/q12.sql rename to src/test/resources/tpcds/queries/q12.sql diff --git a/src/test/resources/tpcds/q13.sql b/src/test/resources/tpcds/queries/q13.sql similarity index 100% rename from src/test/resources/tpcds/q13.sql rename to src/test/resources/tpcds/queries/q13.sql diff --git a/src/test/resources/tpcds/q14a.sql b/src/test/resources/tpcds/queries/q14a.sql similarity index 100% rename from src/test/resources/tpcds/q14a.sql rename to src/test/resources/tpcds/queries/q14a.sql diff --git a/src/test/resources/tpcds/q14b.sql b/src/test/resources/tpcds/queries/q14b.sql similarity index 100% rename from src/test/resources/tpcds/q14b.sql rename to src/test/resources/tpcds/queries/q14b.sql diff --git a/src/test/resources/tpcds/q15.sql b/src/test/resources/tpcds/queries/q15.sql similarity index 100% rename from src/test/resources/tpcds/q15.sql rename to src/test/resources/tpcds/queries/q15.sql diff --git a/src/test/resources/tpcds/q16.sql b/src/test/resources/tpcds/queries/q16.sql similarity index 100% rename from src/test/resources/tpcds/q16.sql rename to src/test/resources/tpcds/queries/q16.sql diff --git a/src/test/resources/tpcds/q17.sql b/src/test/resources/tpcds/queries/q17.sql similarity index 100% rename from src/test/resources/tpcds/q17.sql rename to src/test/resources/tpcds/queries/q17.sql diff --git a/src/test/resources/tpcds/q18.sql b/src/test/resources/tpcds/queries/q18.sql similarity index 100% rename from src/test/resources/tpcds/q18.sql rename to src/test/resources/tpcds/queries/q18.sql diff --git a/src/test/resources/tpcds/q19.sql b/src/test/resources/tpcds/queries/q19.sql similarity index 100% rename from src/test/resources/tpcds/q19.sql rename to src/test/resources/tpcds/queries/q19.sql diff --git a/src/test/resources/tpcds/q2.sql b/src/test/resources/tpcds/queries/q2.sql similarity index 100% rename from src/test/resources/tpcds/q2.sql rename to src/test/resources/tpcds/queries/q2.sql diff --git a/src/test/resources/tpcds/q20.sql b/src/test/resources/tpcds/queries/q20.sql similarity index 100% rename from src/test/resources/tpcds/q20.sql rename to src/test/resources/tpcds/queries/q20.sql diff --git a/src/test/resources/tpcds/q21.sql b/src/test/resources/tpcds/queries/q21.sql similarity index 100% rename from src/test/resources/tpcds/q21.sql rename to src/test/resources/tpcds/queries/q21.sql diff --git a/src/test/resources/tpcds/q22.sql b/src/test/resources/tpcds/queries/q22.sql similarity index 100% rename from src/test/resources/tpcds/q22.sql rename to src/test/resources/tpcds/queries/q22.sql diff --git a/src/test/resources/tpcds/q23a.sql b/src/test/resources/tpcds/queries/q23a.sql similarity index 100% rename from src/test/resources/tpcds/q23a.sql rename to src/test/resources/tpcds/queries/q23a.sql diff --git a/src/test/resources/tpcds/q23b.sql b/src/test/resources/tpcds/queries/q23b.sql similarity index 100% rename from src/test/resources/tpcds/q23b.sql rename to src/test/resources/tpcds/queries/q23b.sql diff --git a/src/test/resources/tpcds/q24a.sql b/src/test/resources/tpcds/queries/q24a.sql similarity index 100% rename from src/test/resources/tpcds/q24a.sql rename to src/test/resources/tpcds/queries/q24a.sql diff --git a/src/test/resources/tpcds/q24b.sql b/src/test/resources/tpcds/queries/q24b.sql similarity index 100% rename from src/test/resources/tpcds/q24b.sql rename to src/test/resources/tpcds/queries/q24b.sql diff --git a/src/test/resources/tpcds/q25.sql b/src/test/resources/tpcds/queries/q25.sql similarity index 100% rename from src/test/resources/tpcds/q25.sql rename to src/test/resources/tpcds/queries/q25.sql diff --git a/src/test/resources/tpcds/q26.sql b/src/test/resources/tpcds/queries/q26.sql similarity index 100% rename from src/test/resources/tpcds/q26.sql rename to src/test/resources/tpcds/queries/q26.sql diff --git a/src/test/resources/tpcds/q27.sql b/src/test/resources/tpcds/queries/q27.sql similarity index 100% rename from src/test/resources/tpcds/q27.sql rename to src/test/resources/tpcds/queries/q27.sql diff --git a/src/test/resources/tpcds/q28.sql b/src/test/resources/tpcds/queries/q28.sql similarity index 100% rename from src/test/resources/tpcds/q28.sql rename to src/test/resources/tpcds/queries/q28.sql diff --git a/src/test/resources/tpcds/q29.sql b/src/test/resources/tpcds/queries/q29.sql similarity index 100% rename from src/test/resources/tpcds/q29.sql rename to src/test/resources/tpcds/queries/q29.sql diff --git a/src/test/resources/tpcds/q3.sql b/src/test/resources/tpcds/queries/q3.sql similarity index 100% rename from src/test/resources/tpcds/q3.sql rename to src/test/resources/tpcds/queries/q3.sql diff --git a/src/test/resources/tpcds/q30.sql b/src/test/resources/tpcds/queries/q30.sql similarity index 100% rename from src/test/resources/tpcds/q30.sql rename to src/test/resources/tpcds/queries/q30.sql diff --git a/src/test/resources/tpcds/q31.sql b/src/test/resources/tpcds/queries/q31.sql similarity index 100% rename from src/test/resources/tpcds/q31.sql rename to src/test/resources/tpcds/queries/q31.sql diff --git a/src/test/resources/tpcds/q32.sql b/src/test/resources/tpcds/queries/q32.sql similarity index 100% rename from src/test/resources/tpcds/q32.sql rename to src/test/resources/tpcds/queries/q32.sql diff --git a/src/test/resources/tpcds/q33.sql b/src/test/resources/tpcds/queries/q33.sql similarity index 100% rename from src/test/resources/tpcds/q33.sql rename to src/test/resources/tpcds/queries/q33.sql diff --git a/src/test/resources/tpcds/q34.sql b/src/test/resources/tpcds/queries/q34.sql similarity index 100% rename from src/test/resources/tpcds/q34.sql rename to src/test/resources/tpcds/queries/q34.sql diff --git a/src/test/resources/tpcds/q35.sql b/src/test/resources/tpcds/queries/q35.sql similarity index 100% rename from src/test/resources/tpcds/q35.sql rename to src/test/resources/tpcds/queries/q35.sql diff --git a/src/test/resources/tpcds/q36.sql b/src/test/resources/tpcds/queries/q36.sql similarity index 100% rename from src/test/resources/tpcds/q36.sql rename to src/test/resources/tpcds/queries/q36.sql diff --git a/src/test/resources/tpcds/q37.sql b/src/test/resources/tpcds/queries/q37.sql similarity index 100% rename from src/test/resources/tpcds/q37.sql rename to src/test/resources/tpcds/queries/q37.sql diff --git a/src/test/resources/tpcds/q38.sql b/src/test/resources/tpcds/queries/q38.sql similarity index 100% rename from src/test/resources/tpcds/q38.sql rename to src/test/resources/tpcds/queries/q38.sql diff --git a/src/test/resources/tpcds/q39a.sql b/src/test/resources/tpcds/queries/q39a.sql similarity index 100% rename from src/test/resources/tpcds/q39a.sql rename to src/test/resources/tpcds/queries/q39a.sql diff --git a/src/test/resources/tpcds/q39b.sql b/src/test/resources/tpcds/queries/q39b.sql similarity index 100% rename from src/test/resources/tpcds/q39b.sql rename to src/test/resources/tpcds/queries/q39b.sql diff --git a/src/test/resources/tpcds/q4.sql b/src/test/resources/tpcds/queries/q4.sql similarity index 100% rename from src/test/resources/tpcds/q4.sql rename to src/test/resources/tpcds/queries/q4.sql diff --git a/src/test/resources/tpcds/q40.sql b/src/test/resources/tpcds/queries/q40.sql similarity index 100% rename from src/test/resources/tpcds/q40.sql rename to src/test/resources/tpcds/queries/q40.sql diff --git a/src/test/resources/tpcds/q41.sql b/src/test/resources/tpcds/queries/q41.sql similarity index 100% rename from src/test/resources/tpcds/q41.sql rename to src/test/resources/tpcds/queries/q41.sql diff --git a/src/test/resources/tpcds/q42.sql b/src/test/resources/tpcds/queries/q42.sql similarity index 100% rename from src/test/resources/tpcds/q42.sql rename to src/test/resources/tpcds/queries/q42.sql diff --git a/src/test/resources/tpcds/q43.sql b/src/test/resources/tpcds/queries/q43.sql similarity index 100% rename from src/test/resources/tpcds/q43.sql rename to src/test/resources/tpcds/queries/q43.sql diff --git a/src/test/resources/tpcds/q44.sql b/src/test/resources/tpcds/queries/q44.sql similarity index 100% rename from src/test/resources/tpcds/q44.sql rename to src/test/resources/tpcds/queries/q44.sql diff --git a/src/test/resources/tpcds/q45.sql b/src/test/resources/tpcds/queries/q45.sql similarity index 100% rename from src/test/resources/tpcds/q45.sql rename to src/test/resources/tpcds/queries/q45.sql diff --git a/src/test/resources/tpcds/q46.sql b/src/test/resources/tpcds/queries/q46.sql similarity index 100% rename from src/test/resources/tpcds/q46.sql rename to src/test/resources/tpcds/queries/q46.sql diff --git a/src/test/resources/tpcds/q47.sql b/src/test/resources/tpcds/queries/q47.sql similarity index 100% rename from src/test/resources/tpcds/q47.sql rename to src/test/resources/tpcds/queries/q47.sql diff --git a/src/test/resources/tpcds/q48.sql b/src/test/resources/tpcds/queries/q48.sql similarity index 100% rename from src/test/resources/tpcds/q48.sql rename to src/test/resources/tpcds/queries/q48.sql diff --git a/src/test/resources/tpcds/q49.sql b/src/test/resources/tpcds/queries/q49.sql similarity index 100% rename from src/test/resources/tpcds/q49.sql rename to src/test/resources/tpcds/queries/q49.sql diff --git a/src/test/resources/tpcds/q5.sql b/src/test/resources/tpcds/queries/q5.sql similarity index 100% rename from src/test/resources/tpcds/q5.sql rename to src/test/resources/tpcds/queries/q5.sql diff --git a/src/test/resources/tpcds/q50.sql b/src/test/resources/tpcds/queries/q50.sql similarity index 100% rename from src/test/resources/tpcds/q50.sql rename to src/test/resources/tpcds/queries/q50.sql diff --git a/src/test/resources/tpcds/q51.sql b/src/test/resources/tpcds/queries/q51.sql similarity index 100% rename from src/test/resources/tpcds/q51.sql rename to src/test/resources/tpcds/queries/q51.sql diff --git a/src/test/resources/tpcds/q52.sql b/src/test/resources/tpcds/queries/q52.sql similarity index 100% rename from src/test/resources/tpcds/q52.sql rename to src/test/resources/tpcds/queries/q52.sql diff --git a/src/test/resources/tpcds/q53.sql b/src/test/resources/tpcds/queries/q53.sql similarity index 100% rename from src/test/resources/tpcds/q53.sql rename to src/test/resources/tpcds/queries/q53.sql diff --git a/src/test/resources/tpcds/q54.sql b/src/test/resources/tpcds/queries/q54.sql similarity index 100% rename from src/test/resources/tpcds/q54.sql rename to src/test/resources/tpcds/queries/q54.sql diff --git a/src/test/resources/tpcds/q55.sql b/src/test/resources/tpcds/queries/q55.sql similarity index 100% rename from src/test/resources/tpcds/q55.sql rename to src/test/resources/tpcds/queries/q55.sql diff --git a/src/test/resources/tpcds/q56.sql b/src/test/resources/tpcds/queries/q56.sql similarity index 100% rename from src/test/resources/tpcds/q56.sql rename to src/test/resources/tpcds/queries/q56.sql diff --git a/src/test/resources/tpcds/q57.sql b/src/test/resources/tpcds/queries/q57.sql similarity index 100% rename from src/test/resources/tpcds/q57.sql rename to src/test/resources/tpcds/queries/q57.sql diff --git a/src/test/resources/tpcds/q58.sql b/src/test/resources/tpcds/queries/q58.sql similarity index 100% rename from src/test/resources/tpcds/q58.sql rename to src/test/resources/tpcds/queries/q58.sql diff --git a/src/test/resources/tpcds/q59.sql b/src/test/resources/tpcds/queries/q59.sql similarity index 100% rename from src/test/resources/tpcds/q59.sql rename to src/test/resources/tpcds/queries/q59.sql diff --git a/src/test/resources/tpcds/q6.sql b/src/test/resources/tpcds/queries/q6.sql similarity index 100% rename from src/test/resources/tpcds/q6.sql rename to src/test/resources/tpcds/queries/q6.sql diff --git a/src/test/resources/tpcds/q60.sql b/src/test/resources/tpcds/queries/q60.sql similarity index 100% rename from src/test/resources/tpcds/q60.sql rename to src/test/resources/tpcds/queries/q60.sql diff --git a/src/test/resources/tpcds/q61.sql b/src/test/resources/tpcds/queries/q61.sql similarity index 100% rename from src/test/resources/tpcds/q61.sql rename to src/test/resources/tpcds/queries/q61.sql diff --git a/src/test/resources/tpcds/q62.sql b/src/test/resources/tpcds/queries/q62.sql similarity index 100% rename from src/test/resources/tpcds/q62.sql rename to src/test/resources/tpcds/queries/q62.sql diff --git a/src/test/resources/tpcds/q63.sql b/src/test/resources/tpcds/queries/q63.sql similarity index 100% rename from src/test/resources/tpcds/q63.sql rename to src/test/resources/tpcds/queries/q63.sql diff --git a/src/test/resources/tpcds/q64.sql b/src/test/resources/tpcds/queries/q64.sql similarity index 100% rename from src/test/resources/tpcds/q64.sql rename to src/test/resources/tpcds/queries/q64.sql diff --git a/src/test/resources/tpcds/q65.sql b/src/test/resources/tpcds/queries/q65.sql similarity index 100% rename from src/test/resources/tpcds/q65.sql rename to src/test/resources/tpcds/queries/q65.sql diff --git a/src/test/resources/tpcds/q66.sql b/src/test/resources/tpcds/queries/q66.sql similarity index 100% rename from src/test/resources/tpcds/q66.sql rename to src/test/resources/tpcds/queries/q66.sql diff --git a/src/test/resources/tpcds/q67.sql b/src/test/resources/tpcds/queries/q67.sql similarity index 100% rename from src/test/resources/tpcds/q67.sql rename to src/test/resources/tpcds/queries/q67.sql diff --git a/src/test/resources/tpcds/q68.sql b/src/test/resources/tpcds/queries/q68.sql similarity index 100% rename from src/test/resources/tpcds/q68.sql rename to src/test/resources/tpcds/queries/q68.sql diff --git a/src/test/resources/tpcds/q69.sql b/src/test/resources/tpcds/queries/q69.sql similarity index 100% rename from src/test/resources/tpcds/q69.sql rename to src/test/resources/tpcds/queries/q69.sql diff --git a/src/test/resources/tpcds/q7.sql b/src/test/resources/tpcds/queries/q7.sql similarity index 100% rename from src/test/resources/tpcds/q7.sql rename to src/test/resources/tpcds/queries/q7.sql diff --git a/src/test/resources/tpcds/q70.sql b/src/test/resources/tpcds/queries/q70.sql similarity index 100% rename from src/test/resources/tpcds/q70.sql rename to src/test/resources/tpcds/queries/q70.sql diff --git a/src/test/resources/tpcds/q71.sql b/src/test/resources/tpcds/queries/q71.sql similarity index 100% rename from src/test/resources/tpcds/q71.sql rename to src/test/resources/tpcds/queries/q71.sql diff --git a/src/test/resources/tpcds/q72.sql b/src/test/resources/tpcds/queries/q72.sql similarity index 100% rename from src/test/resources/tpcds/q72.sql rename to src/test/resources/tpcds/queries/q72.sql diff --git a/src/test/resources/tpcds/q73.sql b/src/test/resources/tpcds/queries/q73.sql similarity index 100% rename from src/test/resources/tpcds/q73.sql rename to src/test/resources/tpcds/queries/q73.sql diff --git a/src/test/resources/tpcds/q74.sql b/src/test/resources/tpcds/queries/q74.sql similarity index 100% rename from src/test/resources/tpcds/q74.sql rename to src/test/resources/tpcds/queries/q74.sql diff --git a/src/test/resources/tpcds/q75.sql b/src/test/resources/tpcds/queries/q75.sql similarity index 100% rename from src/test/resources/tpcds/q75.sql rename to src/test/resources/tpcds/queries/q75.sql diff --git a/src/test/resources/tpcds/q76.sql b/src/test/resources/tpcds/queries/q76.sql similarity index 100% rename from src/test/resources/tpcds/q76.sql rename to src/test/resources/tpcds/queries/q76.sql diff --git a/src/test/resources/tpcds/q77.sql b/src/test/resources/tpcds/queries/q77.sql similarity index 100% rename from src/test/resources/tpcds/q77.sql rename to src/test/resources/tpcds/queries/q77.sql diff --git a/src/test/resources/tpcds/q78.sql b/src/test/resources/tpcds/queries/q78.sql similarity index 100% rename from src/test/resources/tpcds/q78.sql rename to src/test/resources/tpcds/queries/q78.sql diff --git a/src/test/resources/tpcds/q79.sql b/src/test/resources/tpcds/queries/q79.sql similarity index 100% rename from src/test/resources/tpcds/q79.sql rename to src/test/resources/tpcds/queries/q79.sql diff --git a/src/test/resources/tpcds/q8.sql b/src/test/resources/tpcds/queries/q8.sql similarity index 100% rename from src/test/resources/tpcds/q8.sql rename to src/test/resources/tpcds/queries/q8.sql diff --git a/src/test/resources/tpcds/q80.sql b/src/test/resources/tpcds/queries/q80.sql similarity index 100% rename from src/test/resources/tpcds/q80.sql rename to src/test/resources/tpcds/queries/q80.sql diff --git a/src/test/resources/tpcds/q81.sql b/src/test/resources/tpcds/queries/q81.sql similarity index 100% rename from src/test/resources/tpcds/q81.sql rename to src/test/resources/tpcds/queries/q81.sql diff --git a/src/test/resources/tpcds/q82.sql b/src/test/resources/tpcds/queries/q82.sql similarity index 100% rename from src/test/resources/tpcds/q82.sql rename to src/test/resources/tpcds/queries/q82.sql diff --git a/src/test/resources/tpcds/q83.sql b/src/test/resources/tpcds/queries/q83.sql similarity index 100% rename from src/test/resources/tpcds/q83.sql rename to src/test/resources/tpcds/queries/q83.sql diff --git a/src/test/resources/tpcds/q84.sql b/src/test/resources/tpcds/queries/q84.sql similarity index 100% rename from src/test/resources/tpcds/q84.sql rename to src/test/resources/tpcds/queries/q84.sql diff --git a/src/test/resources/tpcds/q85.sql b/src/test/resources/tpcds/queries/q85.sql similarity index 100% rename from src/test/resources/tpcds/q85.sql rename to src/test/resources/tpcds/queries/q85.sql diff --git a/src/test/resources/tpcds/q86.sql b/src/test/resources/tpcds/queries/q86.sql similarity index 100% rename from src/test/resources/tpcds/q86.sql rename to src/test/resources/tpcds/queries/q86.sql diff --git a/src/test/resources/tpcds/q87.sql b/src/test/resources/tpcds/queries/q87.sql similarity index 100% rename from src/test/resources/tpcds/q87.sql rename to src/test/resources/tpcds/queries/q87.sql diff --git a/src/test/resources/tpcds/q88.sql b/src/test/resources/tpcds/queries/q88.sql similarity index 100% rename from src/test/resources/tpcds/q88.sql rename to src/test/resources/tpcds/queries/q88.sql diff --git a/src/test/resources/tpcds/q89.sql b/src/test/resources/tpcds/queries/q89.sql similarity index 100% rename from src/test/resources/tpcds/q89.sql rename to src/test/resources/tpcds/queries/q89.sql diff --git a/src/test/resources/tpcds/q9.sql b/src/test/resources/tpcds/queries/q9.sql similarity index 100% rename from src/test/resources/tpcds/q9.sql rename to src/test/resources/tpcds/queries/q9.sql diff --git a/src/test/resources/tpcds/q90.sql b/src/test/resources/tpcds/queries/q90.sql similarity index 100% rename from src/test/resources/tpcds/q90.sql rename to src/test/resources/tpcds/queries/q90.sql diff --git a/src/test/resources/tpcds/q91.sql b/src/test/resources/tpcds/queries/q91.sql similarity index 100% rename from src/test/resources/tpcds/q91.sql rename to src/test/resources/tpcds/queries/q91.sql diff --git a/src/test/resources/tpcds/q92.sql b/src/test/resources/tpcds/queries/q92.sql similarity index 100% rename from src/test/resources/tpcds/q92.sql rename to src/test/resources/tpcds/queries/q92.sql diff --git a/src/test/resources/tpcds/q93.sql b/src/test/resources/tpcds/queries/q93.sql similarity index 100% rename from src/test/resources/tpcds/q93.sql rename to src/test/resources/tpcds/queries/q93.sql diff --git a/src/test/resources/tpcds/q94.sql b/src/test/resources/tpcds/queries/q94.sql similarity index 100% rename from src/test/resources/tpcds/q94.sql rename to src/test/resources/tpcds/queries/q94.sql diff --git a/src/test/resources/tpcds/q95.sql b/src/test/resources/tpcds/queries/q95.sql similarity index 100% rename from src/test/resources/tpcds/q95.sql rename to src/test/resources/tpcds/queries/q95.sql diff --git a/src/test/resources/tpcds/q96.sql b/src/test/resources/tpcds/queries/q96.sql similarity index 100% rename from src/test/resources/tpcds/q96.sql rename to src/test/resources/tpcds/queries/q96.sql diff --git a/src/test/resources/tpcds/q97.sql b/src/test/resources/tpcds/queries/q97.sql similarity index 100% rename from src/test/resources/tpcds/q97.sql rename to src/test/resources/tpcds/queries/q97.sql diff --git a/src/test/resources/tpcds/q98.sql b/src/test/resources/tpcds/queries/q98.sql similarity index 100% rename from src/test/resources/tpcds/q98.sql rename to src/test/resources/tpcds/queries/q98.sql diff --git a/src/test/resources/tpcds/q99.sql b/src/test/resources/tpcds/queries/q99.sql similarity index 100% rename from src/test/resources/tpcds/q99.sql rename to src/test/resources/tpcds/queries/q99.sql diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt diff --git a/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt similarity index 100% rename from src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt rename to src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala index 6b1e4204e..e9227e097 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala @@ -92,7 +92,7 @@ trait PlanStabilitySuite extends TPCDSBase with Logging { protected val baseResourcePath = { // use the same way as `SQLQueryTestSuite` to get the resource path - java.nio.file.Paths.get("src", "test", "resources", "tpcds-plan-stability").toFile + java.nio.file.Paths.get("src", "test", "resources", "tpcds").toFile } private val referenceRegex = "#\\d+".r @@ -274,13 +274,16 @@ trait PlanStabilitySuite extends TPCDSBase with Logging { } } -class TPCDSV1_4_PlanStabilitySuite extends PlanStabilitySuite { +/** + * Spark Only Suite. + */ +class TPCDSV1_4_SparkPlanStabilitySuite extends PlanStabilitySuite { override val goldenFilePath: String = - new File(baseResourcePath, s"approved-plans-v1_4").getAbsolutePath + new File(baseResourcePath, "spark-2.4/approved-plans-v1_4").getAbsolutePath tpcdsQueries.foreach { q => test(s"check simplified (tpcds-v1.4/$q)") { - testQuery("tpcds", q) + testQuery("tpcds/queries", q) } } } From 6c5a2f751762ddab793aad8bd03c844a6d9871e0 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Fri, 12 Mar 2021 19:46:42 -0800 Subject: [PATCH 20/31] cleanup before rebase --- src/test/resources/tpcds/queries/q1.sql | 19 -- src/test/resources/tpcds/queries/q10.sql | 57 ---- src/test/resources/tpcds/queries/q11.sql | 68 ----- src/test/resources/tpcds/queries/q12.sql | 22 -- src/test/resources/tpcds/queries/q13.sql | 49 ---- src/test/resources/tpcds/queries/q14a.sql | 120 -------- src/test/resources/tpcds/queries/q14b.sql | 95 ------- src/test/resources/tpcds/queries/q15.sql | 15 - src/test/resources/tpcds/queries/q16.sql | 23 -- src/test/resources/tpcds/queries/q17.sql | 33 --- src/test/resources/tpcds/queries/q18.sql | 28 -- src/test/resources/tpcds/queries/q19.sql | 19 -- src/test/resources/tpcds/queries/q2.sql | 81 ------ src/test/resources/tpcds/queries/q20.sql | 18 -- src/test/resources/tpcds/queries/q21.sql | 25 -- src/test/resources/tpcds/queries/q22.sql | 14 - src/test/resources/tpcds/queries/q23a.sql | 53 ---- src/test/resources/tpcds/queries/q23b.sql | 68 ----- src/test/resources/tpcds/queries/q24a.sql | 34 --- src/test/resources/tpcds/queries/q24b.sql | 34 --- src/test/resources/tpcds/queries/q25.sql | 33 --- src/test/resources/tpcds/queries/q26.sql | 19 -- src/test/resources/tpcds/queries/q27.sql | 21 -- src/test/resources/tpcds/queries/q28.sql | 56 ---- src/test/resources/tpcds/queries/q29.sql | 32 --- src/test/resources/tpcds/queries/q3.sql | 13 - src/test/resources/tpcds/queries/q30.sql | 35 --- src/test/resources/tpcds/queries/q31.sql | 60 ---- src/test/resources/tpcds/queries/q32.sql | 15 - src/test/resources/tpcds/queries/q33.sql | 65 ----- src/test/resources/tpcds/queries/q34.sql | 32 --- src/test/resources/tpcds/queries/q35.sql | 46 --- src/test/resources/tpcds/queries/q36.sql | 26 -- src/test/resources/tpcds/queries/q37.sql | 15 - src/test/resources/tpcds/queries/q38.sql | 30 -- src/test/resources/tpcds/queries/q39a.sql | 47 --- src/test/resources/tpcds/queries/q39b.sql | 48 ---- src/test/resources/tpcds/queries/q4.sql | 120 -------- src/test/resources/tpcds/queries/q40.sql | 25 -- src/test/resources/tpcds/queries/q41.sql | 49 ---- src/test/resources/tpcds/queries/q42.sql | 18 -- src/test/resources/tpcds/queries/q43.sql | 33 --- src/test/resources/tpcds/queries/q44.sql | 46 --- src/test/resources/tpcds/queries/q45.sql | 21 -- src/test/resources/tpcds/queries/q46.sql | 32 --- src/test/resources/tpcds/queries/q47.sql | 63 ----- src/test/resources/tpcds/queries/q48.sql | 63 ----- src/test/resources/tpcds/queries/q49.sql | 126 --------- src/test/resources/tpcds/queries/q5.sql | 131 --------- src/test/resources/tpcds/queries/q50.sql | 47 --- src/test/resources/tpcds/queries/q51.sql | 55 ---- src/test/resources/tpcds/queries/q52.sql | 14 - src/test/resources/tpcds/queries/q53.sql | 30 -- src/test/resources/tpcds/queries/q54.sql | 61 ---- src/test/resources/tpcds/queries/q55.sql | 13 - src/test/resources/tpcds/queries/q56.sql | 65 ----- src/test/resources/tpcds/queries/q57.sql | 56 ---- src/test/resources/tpcds/queries/q58.sql | 59 ---- src/test/resources/tpcds/queries/q59.sql | 75 ----- src/test/resources/tpcds/queries/q6.sql | 21 -- src/test/resources/tpcds/queries/q60.sql | 62 ---- src/test/resources/tpcds/queries/q61.sql | 33 --- src/test/resources/tpcds/queries/q62.sql | 35 --- src/test/resources/tpcds/queries/q63.sql | 31 -- src/test/resources/tpcds/queries/q64.sql | 92 ------ src/test/resources/tpcds/queries/q65.sql | 33 --- src/test/resources/tpcds/queries/q66.sql | 240 ---------------- src/test/resources/tpcds/queries/q67.sql | 38 --- src/test/resources/tpcds/queries/q68.sql | 34 --- src/test/resources/tpcds/queries/q69.sql | 38 --- src/test/resources/tpcds/queries/q7.sql | 19 -- src/test/resources/tpcds/queries/q70.sql | 38 --- src/test/resources/tpcds/queries/q71.sql | 44 --- src/test/resources/tpcds/queries/q72.sql | 33 --- src/test/resources/tpcds/queries/q73.sql | 30 -- src/test/resources/tpcds/queries/q74.sql | 58 ---- src/test/resources/tpcds/queries/q75.sql | 76 ----- src/test/resources/tpcds/queries/q76.sql | 47 --- src/test/resources/tpcds/queries/q77.sql | 100 ------- src/test/resources/tpcds/queries/q78.sql | 64 ----- src/test/resources/tpcds/queries/q79.sql | 27 -- src/test/resources/tpcds/queries/q8.sql | 87 ------ src/test/resources/tpcds/queries/q80.sql | 94 ------ src/test/resources/tpcds/queries/q81.sql | 38 --- src/test/resources/tpcds/queries/q82.sql | 15 - src/test/resources/tpcds/queries/q83.sql | 56 ---- src/test/resources/tpcds/queries/q84.sql | 19 -- src/test/resources/tpcds/queries/q85.sql | 82 ------ src/test/resources/tpcds/queries/q86.sql | 24 -- src/test/resources/tpcds/queries/q87.sql | 28 -- src/test/resources/tpcds/queries/q88.sql | 122 -------- src/test/resources/tpcds/queries/q89.sql | 30 -- src/test/resources/tpcds/queries/q9.sql | 48 ---- src/test/resources/tpcds/queries/q90.sql | 19 -- src/test/resources/tpcds/queries/q91.sql | 23 -- src/test/resources/tpcds/queries/q92.sql | 16 -- src/test/resources/tpcds/queries/q93.sql | 19 -- src/test/resources/tpcds/queries/q94.sql | 23 -- src/test/resources/tpcds/queries/q95.sql | 29 -- src/test/resources/tpcds/queries/q96.sql | 11 - src/test/resources/tpcds/queries/q97.sql | 30 -- src/test/resources/tpcds/queries/q98.sql | 21 -- src/test/resources/tpcds/queries/q99.sql | 34 --- .../approved-plans-v1_4/q1/explain.txt | 43 --- .../approved-plans-v1_4/q1/simplified.txt | 58 ---- .../approved-plans-v1_4/q10/explain.txt | 49 ---- .../approved-plans-v1_4/q10/simplified.txt | 65 ----- .../approved-plans-v1_4/q11/explain.txt | 81 ------ .../approved-plans-v1_4/q11/simplified.txt | 109 ------- .../approved-plans-v1_4/q12/explain.txt | 24 -- .../approved-plans-v1_4/q12/simplified.txt | 34 --- .../approved-plans-v1_4/q13/explain.txt | 37 --- .../approved-plans-v1_4/q13/simplified.txt | 49 ---- .../approved-plans-v1_4/q14a/explain.txt | 192 ------------- .../approved-plans-v1_4/q14a/simplified.txt | 267 ------------------ .../approved-plans-v1_4/q14b/explain.txt | 166 ----------- .../approved-plans-v1_4/q14b/simplified.txt | 226 --------------- .../approved-plans-v1_4/q15/explain.txt | 26 -- .../approved-plans-v1_4/q15/simplified.txt | 34 --- .../approved-plans-v1_4/q16/explain.txt | 37 --- .../approved-plans-v1_4/q16/simplified.txt | 51 ---- .../approved-plans-v1_4/q17/explain.txt | 50 ---- .../approved-plans-v1_4/q17/simplified.txt | 66 ----- .../approved-plans-v1_4/q18/explain.txt | 45 --- .../approved-plans-v1_4/q18/simplified.txt | 59 ---- .../approved-plans-v1_4/q19/explain.txt | 38 --- .../approved-plans-v1_4/q19/simplified.txt | 50 ---- .../approved-plans-v1_4/q2/explain.txt | 36 --- .../approved-plans-v1_4/q2/simplified.txt | 52 ---- .../approved-plans-v1_4/q20/explain.txt | 24 -- .../approved-plans-v1_4/q20/simplified.txt | 34 --- .../approved-plans-v1_4/q21/explain.txt | 27 -- .../approved-plans-v1_4/q21/simplified.txt | 35 --- .../approved-plans-v1_4/q22/explain.txt | 27 -- .../approved-plans-v1_4/q22/simplified.txt | 35 --- .../approved-plans-v1_4/q23a/explain.txt | 89 ------ .../approved-plans-v1_4/q23a/simplified.txt | 122 -------- .../approved-plans-v1_4/q23b/explain.txt | 101 ------- .../approved-plans-v1_4/q23b/simplified.txt | 138 --------- .../approved-plans-v1_4/q24a/explain.txt | 82 ------ .../approved-plans-v1_4/q24a/simplified.txt | 111 -------- .../approved-plans-v1_4/q24b/explain.txt | 82 ------ .../approved-plans-v1_4/q24b/simplified.txt | 111 -------- .../approved-plans-v1_4/q25/explain.txt | 50 ---- .../approved-plans-v1_4/q25/simplified.txt | 66 ----- .../approved-plans-v1_4/q26/explain.txt | 32 --- .../approved-plans-v1_4/q26/simplified.txt | 42 --- .../approved-plans-v1_4/q27/explain.txt | 33 --- .../approved-plans-v1_4/q27/simplified.txt | 43 --- .../approved-plans-v1_4/q28/explain.txt | 66 ----- .../approved-plans-v1_4/q28/simplified.txt | 95 ------- .../approved-plans-v1_4/q29/explain.txt | 50 ---- .../approved-plans-v1_4/q29/simplified.txt | 66 ----- .../approved-plans-v1_4/q3/explain.txt | 20 -- .../approved-plans-v1_4/q3/simplified.txt | 26 -- .../approved-plans-v1_4/q30/explain.txt | 52 ---- .../approved-plans-v1_4/q30/simplified.txt | 70 ----- .../approved-plans-v1_4/q31/explain.txt | 100 ------- .../approved-plans-v1_4/q31/simplified.txt | 140 --------- .../approved-plans-v1_4/q32/explain.txt | 30 -- .../approved-plans-v1_4/q32/simplified.txt | 39 --- .../approved-plans-v1_4/q33/explain.txt | 65 ----- .../approved-plans-v1_4/q33/simplified.txt | 91 ------ .../approved-plans-v1_4/q34/explain.txt | 34 --- .../approved-plans-v1_4/q34/simplified.txt | 46 --- .../approved-plans-v1_4/q35/explain.txt | 49 ---- .../approved-plans-v1_4/q35/simplified.txt | 65 ----- .../approved-plans-v1_4/q36/explain.txt | 31 -- .../approved-plans-v1_4/q36/simplified.txt | 43 --- .../approved-plans-v1_4/q37/explain.txt | 26 -- .../approved-plans-v1_4/q37/simplified.txt | 34 --- .../approved-plans-v1_4/q38/explain.txt | 55 ---- .../approved-plans-v1_4/q38/simplified.txt | 75 ----- .../approved-plans-v1_4/q39a/explain.txt | 51 ---- .../approved-plans-v1_4/q39a/simplified.txt | 69 ----- .../approved-plans-v1_4/q39b/explain.txt | 51 ---- .../approved-plans-v1_4/q39b/simplified.txt | 69 ----- .../approved-plans-v1_4/q4/explain.txt | 124 -------- .../approved-plans-v1_4/q4/simplified.txt | 165 ----------- .../approved-plans-v1_4/q40/explain.txt | 32 --- .../approved-plans-v1_4/q40/simplified.txt | 42 --- .../approved-plans-v1_4/q41/explain.txt | 19 -- .../approved-plans-v1_4/q41/simplified.txt | 25 -- .../approved-plans-v1_4/q42/explain.txt | 20 -- .../approved-plans-v1_4/q42/simplified.txt | 26 -- .../approved-plans-v1_4/q43/explain.txt | 20 -- .../approved-plans-v1_4/q43/simplified.txt | 26 -- .../approved-plans-v1_4/q44/explain.txt | 50 ---- .../approved-plans-v1_4/q44/simplified.txt | 72 ----- .../approved-plans-v1_4/q45/explain.txt | 39 --- .../approved-plans-v1_4/q45/simplified.txt | 51 ---- .../approved-plans-v1_4/q46/explain.txt | 41 --- .../approved-plans-v1_4/q46/simplified.txt | 54 ---- .../approved-plans-v1_4/q47/explain.txt | 51 ---- .../approved-plans-v1_4/q47/simplified.txt | 77 ----- .../approved-plans-v1_4/q48/explain.txt | 31 -- .../approved-plans-v1_4/q48/simplified.txt | 41 --- .../approved-plans-v1_4/q49/explain.txt | 77 ----- .../approved-plans-v1_4/q49/simplified.txt | 117 -------- .../approved-plans-v1_4/q5/explain.txt | 76 ----- .../approved-plans-v1_4/q5/simplified.txt | 110 -------- .../approved-plans-v1_4/q50/explain.txt | 32 --- .../approved-plans-v1_4/q50/simplified.txt | 42 --- .../approved-plans-v1_4/q51/explain.txt | 41 --- .../approved-plans-v1_4/q51/simplified.txt | 67 ----- .../approved-plans-v1_4/q52/explain.txt | 20 -- .../approved-plans-v1_4/q52/simplified.txt | 26 -- .../approved-plans-v1_4/q53/explain.txt | 31 -- .../approved-plans-v1_4/q53/simplified.txt | 43 --- .../approved-plans-v1_4/q54/explain.txt | 88 ------ .../approved-plans-v1_4/q54/simplified.txt | 123 -------- .../approved-plans-v1_4/q55/explain.txt | 20 -- .../approved-plans-v1_4/q55/simplified.txt | 26 -- .../approved-plans-v1_4/q56/explain.txt | 65 ----- .../approved-plans-v1_4/q56/simplified.txt | 91 ------ .../approved-plans-v1_4/q57/explain.txt | 51 ---- .../approved-plans-v1_4/q57/simplified.txt | 77 ----- .../approved-plans-v1_4/q58/explain.txt | 101 ------- .../approved-plans-v1_4/q58/simplified.txt | 133 --------- .../approved-plans-v1_4/q59/explain.txt | 43 --- .../approved-plans-v1_4/q59/simplified.txt | 58 ---- .../approved-plans-v1_4/q6/explain.txt | 58 ---- .../approved-plans-v1_4/q6/simplified.txt | 78 ----- .../approved-plans-v1_4/q60/explain.txt | 65 ----- .../approved-plans-v1_4/q60/simplified.txt | 91 ------ .../approved-plans-v1_4/q61/explain.txt | 68 ----- .../approved-plans-v1_4/q61/simplified.txt | 92 ------ .../approved-plans-v1_4/q62/explain.txt | 32 --- .../approved-plans-v1_4/q62/simplified.txt | 42 --- .../approved-plans-v1_4/q63/explain.txt | 31 -- .../approved-plans-v1_4/q63/simplified.txt | 43 --- .../approved-plans-v1_4/q64/explain.txt | 170 ----------- .../approved-plans-v1_4/q64/simplified.txt | 229 --------------- .../approved-plans-v1_4/q65/explain.txt | 42 --- .../approved-plans-v1_4/q65/simplified.txt | 57 ---- .../approved-plans-v1_4/q66/explain.txt | 54 ---- .../approved-plans-v1_4/q66/simplified.txt | 75 ----- .../approved-plans-v1_4/q67/explain.txt | 31 -- .../approved-plans-v1_4/q67/simplified.txt | 43 --- .../approved-plans-v1_4/q68/explain.txt | 41 --- .../approved-plans-v1_4/q68/simplified.txt | 54 ---- .../approved-plans-v1_4/q69/explain.txt | 48 ---- .../approved-plans-v1_4/q69/simplified.txt | 64 ----- .../approved-plans-v1_4/q7/explain.txt | 32 --- .../approved-plans-v1_4/q7/simplified.txt | 42 --- .../approved-plans-v1_4/q70/explain.txt | 46 --- .../approved-plans-v1_4/q70/simplified.txt | 65 ----- .../approved-plans-v1_4/q71/explain.txt | 40 --- .../approved-plans-v1_4/q71/simplified.txt | 56 ---- .../approved-plans-v1_4/q72/explain.txt | 68 ----- .../approved-plans-v1_4/q72/simplified.txt | 90 ------ .../approved-plans-v1_4/q73/explain.txt | 34 --- .../approved-plans-v1_4/q73/simplified.txt | 46 --- .../approved-plans-v1_4/q74/explain.txt | 80 ------ .../approved-plans-v1_4/q74/simplified.txt | 108 ------- .../approved-plans-v1_4/q75/explain.txt | 117 -------- .../approved-plans-v1_4/q75/simplified.txt | 167 ----------- .../approved-plans-v1_4/q76/explain.txt | 39 --- .../approved-plans-v1_4/q76/simplified.txt | 53 ---- .../approved-plans-v1_4/q77/explain.txt | 100 ------- .../approved-plans-v1_4/q77/simplified.txt | 141 --------- .../approved-plans-v1_4/q78/explain.txt | 61 ---- .../approved-plans-v1_4/q78/simplified.txt | 81 ------ .../approved-plans-v1_4/q79/explain.txt | 32 --- .../approved-plans-v1_4/q79/simplified.txt | 42 --- .../approved-plans-v1_4/q8/explain.txt | 45 --- .../approved-plans-v1_4/q8/simplified.txt | 61 ---- .../approved-plans-v1_4/q80/explain.txt | 97 ------- .../approved-plans-v1_4/q80/simplified.txt | 133 --------- .../approved-plans-v1_4/q81/explain.txt | 52 ---- .../approved-plans-v1_4/q81/simplified.txt | 70 ----- .../approved-plans-v1_4/q82/explain.txt | 26 -- .../approved-plans-v1_4/q82/simplified.txt | 34 --- .../approved-plans-v1_4/q83/explain.txt | 69 ----- .../approved-plans-v1_4/q83/simplified.txt | 94 ------ .../approved-plans-v1_4/q84/explain.txt | 35 --- .../approved-plans-v1_4/q84/simplified.txt | 45 --- .../approved-plans-v1_4/q85/explain.txt | 50 ---- .../approved-plans-v1_4/q85/simplified.txt | 66 ----- .../approved-plans-v1_4/q86/explain.txt | 25 -- .../approved-plans-v1_4/q86/simplified.txt | 35 --- .../approved-plans-v1_4/q87/explain.txt | 54 ---- .../approved-plans-v1_4/q87/simplified.txt | 74 ----- .../approved-plans-v1_4/q88/explain.txt | 165 ----------- .../approved-plans-v1_4/q88/simplified.txt | 222 --------------- .../approved-plans-v1_4/q89/explain.txt | 31 -- .../approved-plans-v1_4/q89/simplified.txt | 43 --- .../approved-plans-v1_4/q9/explain.txt | 109 ------- .../approved-plans-v1_4/q9/simplified.txt | 154 ---------- .../approved-plans-v1_4/q90/explain.txt | 47 --- .../approved-plans-v1_4/q90/simplified.txt | 64 ----- .../approved-plans-v1_4/q91/explain.txt | 45 --- .../approved-plans-v1_4/q91/simplified.txt | 61 ---- .../approved-plans-v1_4/q92/explain.txt | 33 --- .../approved-plans-v1_4/q92/simplified.txt | 44 --- .../approved-plans-v1_4/q93/explain.txt | 18 -- .../approved-plans-v1_4/q93/simplified.txt | 24 -- .../approved-plans-v1_4/q94/explain.txt | 37 --- .../approved-plans-v1_4/q94/simplified.txt | 51 ---- .../approved-plans-v1_4/q95/explain.txt | 54 ---- .../approved-plans-v1_4/q95/simplified.txt | 73 ----- .../approved-plans-v1_4/q96/explain.txt | 26 -- .../approved-plans-v1_4/q96/simplified.txt | 34 --- .../approved-plans-v1_4/q97/explain.txt | 32 --- .../approved-plans-v1_4/q97/simplified.txt | 48 ---- .../approved-plans-v1_4/q98/explain.txt | 26 -- .../approved-plans-v1_4/q98/simplified.txt | 38 --- .../approved-plans-v1_4/q99/explain.txt | 32 --- .../approved-plans-v1_4/q99/simplified.txt | 42 --- 309 files changed, 17797 deletions(-) delete mode 100644 src/test/resources/tpcds/queries/q1.sql delete mode 100644 src/test/resources/tpcds/queries/q10.sql delete mode 100644 src/test/resources/tpcds/queries/q11.sql delete mode 100644 src/test/resources/tpcds/queries/q12.sql delete mode 100644 src/test/resources/tpcds/queries/q13.sql delete mode 100644 src/test/resources/tpcds/queries/q14a.sql delete mode 100644 src/test/resources/tpcds/queries/q14b.sql delete mode 100644 src/test/resources/tpcds/queries/q15.sql delete mode 100644 src/test/resources/tpcds/queries/q16.sql delete mode 100644 src/test/resources/tpcds/queries/q17.sql delete mode 100644 src/test/resources/tpcds/queries/q18.sql delete mode 100644 src/test/resources/tpcds/queries/q19.sql delete mode 100644 src/test/resources/tpcds/queries/q2.sql delete mode 100644 src/test/resources/tpcds/queries/q20.sql delete mode 100644 src/test/resources/tpcds/queries/q21.sql delete mode 100644 src/test/resources/tpcds/queries/q22.sql delete mode 100644 src/test/resources/tpcds/queries/q23a.sql delete mode 100644 src/test/resources/tpcds/queries/q23b.sql delete mode 100644 src/test/resources/tpcds/queries/q24a.sql delete mode 100644 src/test/resources/tpcds/queries/q24b.sql delete mode 100644 src/test/resources/tpcds/queries/q25.sql delete mode 100644 src/test/resources/tpcds/queries/q26.sql delete mode 100644 src/test/resources/tpcds/queries/q27.sql delete mode 100644 src/test/resources/tpcds/queries/q28.sql delete mode 100644 src/test/resources/tpcds/queries/q29.sql delete mode 100644 src/test/resources/tpcds/queries/q3.sql delete mode 100644 src/test/resources/tpcds/queries/q30.sql delete mode 100644 src/test/resources/tpcds/queries/q31.sql delete mode 100644 src/test/resources/tpcds/queries/q32.sql delete mode 100644 src/test/resources/tpcds/queries/q33.sql delete mode 100644 src/test/resources/tpcds/queries/q34.sql delete mode 100644 src/test/resources/tpcds/queries/q35.sql delete mode 100644 src/test/resources/tpcds/queries/q36.sql delete mode 100644 src/test/resources/tpcds/queries/q37.sql delete mode 100644 src/test/resources/tpcds/queries/q38.sql delete mode 100644 src/test/resources/tpcds/queries/q39a.sql delete mode 100644 src/test/resources/tpcds/queries/q39b.sql delete mode 100644 src/test/resources/tpcds/queries/q4.sql delete mode 100644 src/test/resources/tpcds/queries/q40.sql delete mode 100644 src/test/resources/tpcds/queries/q41.sql delete mode 100644 src/test/resources/tpcds/queries/q42.sql delete mode 100644 src/test/resources/tpcds/queries/q43.sql delete mode 100644 src/test/resources/tpcds/queries/q44.sql delete mode 100644 src/test/resources/tpcds/queries/q45.sql delete mode 100644 src/test/resources/tpcds/queries/q46.sql delete mode 100644 src/test/resources/tpcds/queries/q47.sql delete mode 100644 src/test/resources/tpcds/queries/q48.sql delete mode 100644 src/test/resources/tpcds/queries/q49.sql delete mode 100644 src/test/resources/tpcds/queries/q5.sql delete mode 100644 src/test/resources/tpcds/queries/q50.sql delete mode 100644 src/test/resources/tpcds/queries/q51.sql delete mode 100644 src/test/resources/tpcds/queries/q52.sql delete mode 100644 src/test/resources/tpcds/queries/q53.sql delete mode 100644 src/test/resources/tpcds/queries/q54.sql delete mode 100644 src/test/resources/tpcds/queries/q55.sql delete mode 100644 src/test/resources/tpcds/queries/q56.sql delete mode 100644 src/test/resources/tpcds/queries/q57.sql delete mode 100644 src/test/resources/tpcds/queries/q58.sql delete mode 100644 src/test/resources/tpcds/queries/q59.sql delete mode 100644 src/test/resources/tpcds/queries/q6.sql delete mode 100644 src/test/resources/tpcds/queries/q60.sql delete mode 100644 src/test/resources/tpcds/queries/q61.sql delete mode 100644 src/test/resources/tpcds/queries/q62.sql delete mode 100644 src/test/resources/tpcds/queries/q63.sql delete mode 100644 src/test/resources/tpcds/queries/q64.sql delete mode 100644 src/test/resources/tpcds/queries/q65.sql delete mode 100644 src/test/resources/tpcds/queries/q66.sql delete mode 100644 src/test/resources/tpcds/queries/q67.sql delete mode 100644 src/test/resources/tpcds/queries/q68.sql delete mode 100644 src/test/resources/tpcds/queries/q69.sql delete mode 100644 src/test/resources/tpcds/queries/q7.sql delete mode 100644 src/test/resources/tpcds/queries/q70.sql delete mode 100644 src/test/resources/tpcds/queries/q71.sql delete mode 100644 src/test/resources/tpcds/queries/q72.sql delete mode 100644 src/test/resources/tpcds/queries/q73.sql delete mode 100644 src/test/resources/tpcds/queries/q74.sql delete mode 100644 src/test/resources/tpcds/queries/q75.sql delete mode 100644 src/test/resources/tpcds/queries/q76.sql delete mode 100644 src/test/resources/tpcds/queries/q77.sql delete mode 100644 src/test/resources/tpcds/queries/q78.sql delete mode 100644 src/test/resources/tpcds/queries/q79.sql delete mode 100644 src/test/resources/tpcds/queries/q8.sql delete mode 100644 src/test/resources/tpcds/queries/q80.sql delete mode 100644 src/test/resources/tpcds/queries/q81.sql delete mode 100644 src/test/resources/tpcds/queries/q82.sql delete mode 100644 src/test/resources/tpcds/queries/q83.sql delete mode 100644 src/test/resources/tpcds/queries/q84.sql delete mode 100644 src/test/resources/tpcds/queries/q85.sql delete mode 100644 src/test/resources/tpcds/queries/q86.sql delete mode 100644 src/test/resources/tpcds/queries/q87.sql delete mode 100644 src/test/resources/tpcds/queries/q88.sql delete mode 100644 src/test/resources/tpcds/queries/q89.sql delete mode 100644 src/test/resources/tpcds/queries/q9.sql delete mode 100644 src/test/resources/tpcds/queries/q90.sql delete mode 100644 src/test/resources/tpcds/queries/q91.sql delete mode 100644 src/test/resources/tpcds/queries/q92.sql delete mode 100644 src/test/resources/tpcds/queries/q93.sql delete mode 100644 src/test/resources/tpcds/queries/q94.sql delete mode 100644 src/test/resources/tpcds/queries/q95.sql delete mode 100644 src/test/resources/tpcds/queries/q96.sql delete mode 100644 src/test/resources/tpcds/queries/q97.sql delete mode 100644 src/test/resources/tpcds/queries/q98.sql delete mode 100644 src/test/resources/tpcds/queries/q99.sql delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt diff --git a/src/test/resources/tpcds/queries/q1.sql b/src/test/resources/tpcds/queries/q1.sql deleted file mode 100644 index 4d20faad8..000000000 --- a/src/test/resources/tpcds/queries/q1.sql +++ /dev/null @@ -1,19 +0,0 @@ -WITH customer_total_return AS -( SELECT - sr_customer_sk AS ctr_customer_sk, - sr_store_sk AS ctr_store_sk, - sum(sr_return_amt) AS ctr_total_return - FROM store_returns, date_dim - WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000 - GROUP BY sr_customer_sk, sr_store_sk) -SELECT c_customer_id -FROM customer_total_return ctr1, store, customer -WHERE ctr1.ctr_total_return > - (SELECT avg(ctr_total_return) * 1.2 - FROM customer_total_return ctr2 - WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk) - AND s_store_sk = ctr1.ctr_store_sk - AND s_state = 'TN' - AND ctr1.ctr_customer_sk = c_customer_sk -ORDER BY c_customer_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q10.sql b/src/test/resources/tpcds/queries/q10.sql deleted file mode 100644 index 5500e1aea..000000000 --- a/src/test/resources/tpcds/queries/q10.sql +++ /dev/null @@ -1,57 +0,0 @@ -SELECT - cd_gender, - cd_marital_status, - cd_education_status, - count(*) cnt1, - cd_purchase_estimate, - count(*) cnt2, - cd_credit_rating, - count(*) cnt3, - cd_dep_count, - count(*) cnt4, - cd_dep_employed_count, - count(*) cnt5, - cd_dep_college_count, - count(*) cnt6 -FROM - customer c, customer_address ca, customer_demographics -WHERE - c.c_current_addr_sk = ca.ca_address_sk AND - ca_county IN ('Rush County', 'Toole County', 'Jefferson County', - 'Dona Ana County', 'La Porte County') AND - cd_demo_sk = c.c_current_cdemo_sk AND - exists(SELECT * - FROM store_sales, date_dim - WHERE c.c_customer_sk = ss_customer_sk AND - ss_sold_date_sk = d_date_sk AND - d_year = 2002 AND - d_moy BETWEEN 1 AND 1 + 3) AND - (exists(SELECT * - FROM web_sales, date_dim - WHERE c.c_customer_sk = ws_bill_customer_sk AND - ws_sold_date_sk = d_date_sk AND - d_year = 2002 AND - d_moy BETWEEN 1 AND 1 + 3) OR - exists(SELECT * - FROM catalog_sales, date_dim - WHERE c.c_customer_sk = cs_ship_customer_sk AND - cs_sold_date_sk = d_date_sk AND - d_year = 2002 AND - d_moy BETWEEN 1 AND 1 + 3)) -GROUP BY cd_gender, - cd_marital_status, - cd_education_status, - cd_purchase_estimate, - cd_credit_rating, - cd_dep_count, - cd_dep_employed_count, - cd_dep_college_count -ORDER BY cd_gender, - cd_marital_status, - cd_education_status, - cd_purchase_estimate, - cd_credit_rating, - cd_dep_count, - cd_dep_employed_count, - cd_dep_college_count -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q11.sql b/src/test/resources/tpcds/queries/q11.sql deleted file mode 100644 index 3618fb14f..000000000 --- a/src/test/resources/tpcds/queries/q11.sql +++ /dev/null @@ -1,68 +0,0 @@ -WITH year_total AS ( - SELECT - c_customer_id customer_id, - c_first_name customer_first_name, - c_last_name customer_last_name, - c_preferred_cust_flag customer_preferred_cust_flag, - c_birth_country customer_birth_country, - c_login customer_login, - c_email_address customer_email_address, - d_year dyear, - sum(ss_ext_list_price - ss_ext_discount_amt) year_total, - 's' sale_type - FROM customer, store_sales, date_dim - WHERE c_customer_sk = ss_customer_sk - AND ss_sold_date_sk = d_date_sk - GROUP BY c_customer_id - , c_first_name - , c_last_name - , d_year - , c_preferred_cust_flag - , c_birth_country - , c_login - , c_email_address - , d_year - UNION ALL - SELECT - c_customer_id customer_id, - c_first_name customer_first_name, - c_last_name customer_last_name, - c_preferred_cust_flag customer_preferred_cust_flag, - c_birth_country customer_birth_country, - c_login customer_login, - c_email_address customer_email_address, - d_year dyear, - sum(ws_ext_list_price - ws_ext_discount_amt) year_total, - 'w' sale_type - FROM customer, web_sales, date_dim - WHERE c_customer_sk = ws_bill_customer_sk - AND ws_sold_date_sk = d_date_sk - GROUP BY - c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, - c_login, c_email_address, d_year) -SELECT t_s_secyear.customer_preferred_cust_flag -FROM year_total t_s_firstyear - , year_total t_s_secyear - , year_total t_w_firstyear - , year_total t_w_secyear -WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id - AND t_s_firstyear.customer_id = t_w_secyear.customer_id - AND t_s_firstyear.customer_id = t_w_firstyear.customer_id - AND t_s_firstyear.sale_type = 's' - AND t_w_firstyear.sale_type = 'w' - AND t_s_secyear.sale_type = 's' - AND t_w_secyear.sale_type = 'w' - AND t_s_firstyear.dyear = 2001 - AND t_s_secyear.dyear = 2001 + 1 - AND t_w_firstyear.dyear = 2001 - AND t_w_secyear.dyear = 2001 + 1 - AND t_s_firstyear.year_total > 0 - AND t_w_firstyear.year_total > 0 - AND CASE WHEN t_w_firstyear.year_total > 0 - THEN t_w_secyear.year_total / t_w_firstyear.year_total - ELSE NULL END - > CASE WHEN t_s_firstyear.year_total > 0 - THEN t_s_secyear.year_total / t_s_firstyear.year_total - ELSE NULL END -ORDER BY t_s_secyear.customer_preferred_cust_flag -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q12.sql b/src/test/resources/tpcds/queries/q12.sql deleted file mode 100644 index 0382737f5..000000000 --- a/src/test/resources/tpcds/queries/q12.sql +++ /dev/null @@ -1,22 +0,0 @@ -SELECT - i_item_desc, - i_category, - i_class, - i_current_price, - sum(ws_ext_sales_price) AS itemrevenue, - sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price)) - OVER - (PARTITION BY i_class) AS revenueratio -FROM - web_sales, item, date_dim -WHERE - ws_item_sk = i_item_sk - AND i_category IN ('Sports', 'Books', 'Home') - AND ws_sold_date_sk = d_date_sk - AND d_date BETWEEN cast('1999-02-22' AS DATE) - AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) -GROUP BY - i_item_id, i_item_desc, i_category, i_class, i_current_price -ORDER BY - i_category, i_class, i_item_id, i_item_desc, revenueratio -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q13.sql b/src/test/resources/tpcds/queries/q13.sql deleted file mode 100644 index 32dc9e260..000000000 --- a/src/test/resources/tpcds/queries/q13.sql +++ /dev/null @@ -1,49 +0,0 @@ -SELECT - avg(ss_quantity), - avg(ss_ext_sales_price), - avg(ss_ext_wholesale_cost), - sum(ss_ext_wholesale_cost) -FROM store_sales - , store - , customer_demographics - , household_demographics - , customer_address - , date_dim -WHERE s_store_sk = ss_store_sk - AND ss_sold_date_sk = d_date_sk AND d_year = 2001 - AND ((ss_hdemo_sk = hd_demo_sk - AND cd_demo_sk = ss_cdemo_sk - AND cd_marital_status = 'M' - AND cd_education_status = 'Advanced Degree' - AND ss_sales_price BETWEEN 100.00 AND 150.00 - AND hd_dep_count = 3 -) OR - (ss_hdemo_sk = hd_demo_sk - AND cd_demo_sk = ss_cdemo_sk - AND cd_marital_status = 'S' - AND cd_education_status = 'College' - AND ss_sales_price BETWEEN 50.00 AND 100.00 - AND hd_dep_count = 1 - ) OR - (ss_hdemo_sk = hd_demo_sk - AND cd_demo_sk = ss_cdemo_sk - AND cd_marital_status = 'W' - AND cd_education_status = '2 yr Degree' - AND ss_sales_price BETWEEN 150.00 AND 200.00 - AND hd_dep_count = 1 - )) - AND ((ss_addr_sk = ca_address_sk - AND ca_country = 'United States' - AND ca_state IN ('TX', 'OH', 'TX') - AND ss_net_profit BETWEEN 100 AND 200 -) OR - (ss_addr_sk = ca_address_sk - AND ca_country = 'United States' - AND ca_state IN ('OR', 'NM', 'KY') - AND ss_net_profit BETWEEN 150 AND 300 - ) OR - (ss_addr_sk = ca_address_sk - AND ca_country = 'United States' - AND ca_state IN ('VA', 'TX', 'MS') - AND ss_net_profit BETWEEN 50 AND 250 - )) diff --git a/src/test/resources/tpcds/queries/q14a.sql b/src/test/resources/tpcds/queries/q14a.sql deleted file mode 100644 index 954ddd41b..000000000 --- a/src/test/resources/tpcds/queries/q14a.sql +++ /dev/null @@ -1,120 +0,0 @@ -WITH cross_items AS -(SELECT i_item_sk ss_item_sk - FROM item, - (SELECT - iss.i_brand_id brand_id, - iss.i_class_id class_id, - iss.i_category_id category_id - FROM store_sales, item iss, date_dim d1 - WHERE ss_item_sk = iss.i_item_sk - AND ss_sold_date_sk = d1.d_date_sk - AND d1.d_year BETWEEN 1999 AND 1999 + 2 - INTERSECT - SELECT - ics.i_brand_id, - ics.i_class_id, - ics.i_category_id - FROM catalog_sales, item ics, date_dim d2 - WHERE cs_item_sk = ics.i_item_sk - AND cs_sold_date_sk = d2.d_date_sk - AND d2.d_year BETWEEN 1999 AND 1999 + 2 - INTERSECT - SELECT - iws.i_brand_id, - iws.i_class_id, - iws.i_category_id - FROM web_sales, item iws, date_dim d3 - WHERE ws_item_sk = iws.i_item_sk - AND ws_sold_date_sk = d3.d_date_sk - AND d3.d_year BETWEEN 1999 AND 1999 + 2) x - WHERE i_brand_id = brand_id - AND i_class_id = class_id - AND i_category_id = category_id -), - avg_sales AS - (SELECT avg(quantity * list_price) average_sales - FROM ( - SELECT - ss_quantity quantity, - ss_list_price list_price - FROM store_sales, date_dim - WHERE ss_sold_date_sk = d_date_sk - AND d_year BETWEEN 1999 AND 2001 - UNION ALL - SELECT - cs_quantity quantity, - cs_list_price list_price - FROM catalog_sales, date_dim - WHERE cs_sold_date_sk = d_date_sk - AND d_year BETWEEN 1999 AND 1999 + 2 - UNION ALL - SELECT - ws_quantity quantity, - ws_list_price list_price - FROM web_sales, date_dim - WHERE ws_sold_date_sk = d_date_sk - AND d_year BETWEEN 1999 AND 1999 + 2) x) -SELECT - channel, - i_brand_id, - i_class_id, - i_category_id, - sum(sales), - sum(number_sales) -FROM ( - SELECT - 'store' channel, - i_brand_id, - i_class_id, - i_category_id, - sum(ss_quantity * ss_list_price) sales, - count(*) number_sales - FROM store_sales, item, date_dim - WHERE ss_item_sk IN (SELECT ss_item_sk - FROM cross_items) - AND ss_item_sk = i_item_sk - AND ss_sold_date_sk = d_date_sk - AND d_year = 1999 + 2 - AND d_moy = 11 - GROUP BY i_brand_id, i_class_id, i_category_id - HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales - FROM avg_sales) - UNION ALL - SELECT - 'catalog' channel, - i_brand_id, - i_class_id, - i_category_id, - sum(cs_quantity * cs_list_price) sales, - count(*) number_sales - FROM catalog_sales, item, date_dim - WHERE cs_item_sk IN (SELECT ss_item_sk - FROM cross_items) - AND cs_item_sk = i_item_sk - AND cs_sold_date_sk = d_date_sk - AND d_year = 1999 + 2 - AND d_moy = 11 - GROUP BY i_brand_id, i_class_id, i_category_id - HAVING sum(cs_quantity * cs_list_price) > (SELECT average_sales FROM avg_sales) - UNION ALL - SELECT - 'web' channel, - i_brand_id, - i_class_id, - i_category_id, - sum(ws_quantity * ws_list_price) sales, - count(*) number_sales - FROM web_sales, item, date_dim - WHERE ws_item_sk IN (SELECT ss_item_sk - FROM cross_items) - AND ws_item_sk = i_item_sk - AND ws_sold_date_sk = d_date_sk - AND d_year = 1999 + 2 - AND d_moy = 11 - GROUP BY i_brand_id, i_class_id, i_category_id - HAVING sum(ws_quantity * ws_list_price) > (SELECT average_sales - FROM avg_sales) - ) y -GROUP BY ROLLUP (channel, i_brand_id, i_class_id, i_category_id) -ORDER BY channel, i_brand_id, i_class_id, i_category_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q14b.sql b/src/test/resources/tpcds/queries/q14b.sql deleted file mode 100644 index 929a8484b..000000000 --- a/src/test/resources/tpcds/queries/q14b.sql +++ /dev/null @@ -1,95 +0,0 @@ -WITH cross_items AS -(SELECT i_item_sk ss_item_sk - FROM item, - (SELECT - iss.i_brand_id brand_id, - iss.i_class_id class_id, - iss.i_category_id category_id - FROM store_sales, item iss, date_dim d1 - WHERE ss_item_sk = iss.i_item_sk - AND ss_sold_date_sk = d1.d_date_sk - AND d1.d_year BETWEEN 1999 AND 1999 + 2 - INTERSECT - SELECT - ics.i_brand_id, - ics.i_class_id, - ics.i_category_id - FROM catalog_sales, item ics, date_dim d2 - WHERE cs_item_sk = ics.i_item_sk - AND cs_sold_date_sk = d2.d_date_sk - AND d2.d_year BETWEEN 1999 AND 1999 + 2 - INTERSECT - SELECT - iws.i_brand_id, - iws.i_class_id, - iws.i_category_id - FROM web_sales, item iws, date_dim d3 - WHERE ws_item_sk = iws.i_item_sk - AND ws_sold_date_sk = d3.d_date_sk - AND d3.d_year BETWEEN 1999 AND 1999 + 2) x - WHERE i_brand_id = brand_id - AND i_class_id = class_id - AND i_category_id = category_id -), - avg_sales AS - (SELECT avg(quantity * list_price) average_sales - FROM (SELECT - ss_quantity quantity, - ss_list_price list_price - FROM store_sales, date_dim - WHERE ss_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 - UNION ALL - SELECT - cs_quantity quantity, - cs_list_price list_price - FROM catalog_sales, date_dim - WHERE cs_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 - UNION ALL - SELECT - ws_quantity quantity, - ws_list_price list_price - FROM web_sales, date_dim - WHERE ws_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2) x) -SELECT * -FROM - (SELECT - 'store' channel, - i_brand_id, - i_class_id, - i_category_id, - sum(ss_quantity * ss_list_price) sales, - count(*) number_sales - FROM store_sales, item, date_dim - WHERE ss_item_sk IN (SELECT ss_item_sk - FROM cross_items) - AND ss_item_sk = i_item_sk - AND ss_sold_date_sk = d_date_sk - AND d_week_seq = (SELECT d_week_seq - FROM date_dim - WHERE d_year = 1999 + 1 AND d_moy = 12 AND d_dom = 11) - GROUP BY i_brand_id, i_class_id, i_category_id - HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales - FROM avg_sales)) this_year, - (SELECT - 'store' channel, - i_brand_id, - i_class_id, - i_category_id, - sum(ss_quantity * ss_list_price) sales, - count(*) number_sales - FROM store_sales, item, date_dim - WHERE ss_item_sk IN (SELECT ss_item_sk - FROM cross_items) - AND ss_item_sk = i_item_sk - AND ss_sold_date_sk = d_date_sk - AND d_week_seq = (SELECT d_week_seq - FROM date_dim - WHERE d_year = 1999 AND d_moy = 12 AND d_dom = 11) - GROUP BY i_brand_id, i_class_id, i_category_id - HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales - FROM avg_sales)) last_year -WHERE this_year.i_brand_id = last_year.i_brand_id - AND this_year.i_class_id = last_year.i_class_id - AND this_year.i_category_id = last_year.i_category_id -ORDER BY this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q15.sql b/src/test/resources/tpcds/queries/q15.sql deleted file mode 100644 index b8182e23b..000000000 --- a/src/test/resources/tpcds/queries/q15.sql +++ /dev/null @@ -1,15 +0,0 @@ -SELECT - ca_zip, - sum(cs_sales_price) -FROM catalog_sales, customer, customer_address, date_dim -WHERE cs_bill_customer_sk = c_customer_sk - AND c_current_addr_sk = ca_address_sk - AND (substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475', - '85392', '85460', '80348', '81792') - OR ca_state IN ('CA', 'WA', 'GA') - OR cs_sales_price > 500) - AND cs_sold_date_sk = d_date_sk - AND d_qoy = 2 AND d_year = 2001 -GROUP BY ca_zip -ORDER BY ca_zip -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q16.sql b/src/test/resources/tpcds/queries/q16.sql deleted file mode 100644 index 732ad0d84..000000000 --- a/src/test/resources/tpcds/queries/q16.sql +++ /dev/null @@ -1,23 +0,0 @@ -SELECT - count(DISTINCT cs_order_number) AS `order count `, - sum(cs_ext_ship_cost) AS `total shipping cost `, - sum(cs_net_profit) AS `total net profit ` -FROM - catalog_sales cs1, date_dim, customer_address, call_center -WHERE - d_date BETWEEN '2002-02-01' AND (CAST('2002-02-01' AS DATE) + INTERVAL 60 days) - AND cs1.cs_ship_date_sk = d_date_sk - AND cs1.cs_ship_addr_sk = ca_address_sk - AND ca_state = 'GA' - AND cs1.cs_call_center_sk = cc_call_center_sk - AND cc_county IN - ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') - AND EXISTS(SELECT * - FROM catalog_sales cs2 - WHERE cs1.cs_order_number = cs2.cs_order_number - AND cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) - AND NOT EXISTS(SELECT * - FROM catalog_returns cr1 - WHERE cs1.cs_order_number = cr1.cr_order_number) -ORDER BY count(DISTINCT cs_order_number) -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q17.sql b/src/test/resources/tpcds/queries/q17.sql deleted file mode 100644 index 4d647f795..000000000 --- a/src/test/resources/tpcds/queries/q17.sql +++ /dev/null @@ -1,33 +0,0 @@ -SELECT - i_item_id, - i_item_desc, - s_state, - count(ss_quantity) AS store_sales_quantitycount, - avg(ss_quantity) AS store_sales_quantityave, - stddev_samp(ss_quantity) AS store_sales_quantitystdev, - stddev_samp(ss_quantity) / avg(ss_quantity) AS store_sales_quantitycov, - count(sr_return_quantity) as_store_returns_quantitycount, - avg(sr_return_quantity) as_store_returns_quantityave, - stddev_samp(sr_return_quantity) as_store_returns_quantitystdev, - stddev_samp(sr_return_quantity) / avg(sr_return_quantity) AS store_returns_quantitycov, - count(cs_quantity) AS catalog_sales_quantitycount, - avg(cs_quantity) AS catalog_sales_quantityave, - stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitystdev, - stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitycov -FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item -WHERE d1.d_quarter_name = '2001Q1' - AND d1.d_date_sk = ss_sold_date_sk - AND i_item_sk = ss_item_sk - AND s_store_sk = ss_store_sk - AND ss_customer_sk = sr_customer_sk - AND ss_item_sk = sr_item_sk - AND ss_ticket_number = sr_ticket_number - AND sr_returned_date_sk = d2.d_date_sk - AND d2.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') - AND sr_customer_sk = cs_bill_customer_sk - AND sr_item_sk = cs_item_sk - AND cs_sold_date_sk = d3.d_date_sk - AND d3.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') -GROUP BY i_item_id, i_item_desc, s_state -ORDER BY i_item_id, i_item_desc, s_state -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q18.sql b/src/test/resources/tpcds/queries/q18.sql deleted file mode 100644 index 4055c80fd..000000000 --- a/src/test/resources/tpcds/queries/q18.sql +++ /dev/null @@ -1,28 +0,0 @@ -SELECT - i_item_id, - ca_country, - ca_state, - ca_county, - avg(cast(cs_quantity AS DECIMAL(12, 2))) agg1, - avg(cast(cs_list_price AS DECIMAL(12, 2))) agg2, - avg(cast(cs_coupon_amt AS DECIMAL(12, 2))) agg3, - avg(cast(cs_sales_price AS DECIMAL(12, 2))) agg4, - avg(cast(cs_net_profit AS DECIMAL(12, 2))) agg5, - avg(cast(c_birth_year AS DECIMAL(12, 2))) agg6, - avg(cast(cd1.cd_dep_count AS DECIMAL(12, 2))) agg7 -FROM catalog_sales, customer_demographics cd1, - customer_demographics cd2, customer, customer_address, date_dim, item -WHERE cs_sold_date_sk = d_date_sk AND - cs_item_sk = i_item_sk AND - cs_bill_cdemo_sk = cd1.cd_demo_sk AND - cs_bill_customer_sk = c_customer_sk AND - cd1.cd_gender = 'F' AND - cd1.cd_education_status = 'Unknown' AND - c_current_cdemo_sk = cd2.cd_demo_sk AND - c_current_addr_sk = ca_address_sk AND - c_birth_month IN (1, 6, 8, 9, 12, 2) AND - d_year = 1998 AND - ca_state IN ('MS', 'IN', 'ND', 'OK', 'NM', 'VA', 'MS') -GROUP BY ROLLUP (i_item_id, ca_country, ca_state, ca_county) -ORDER BY ca_country, ca_state, ca_county, i_item_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q19.sql b/src/test/resources/tpcds/queries/q19.sql deleted file mode 100644 index e38ab7f26..000000000 --- a/src/test/resources/tpcds/queries/q19.sql +++ /dev/null @@ -1,19 +0,0 @@ -SELECT - i_brand_id brand_id, - i_brand brand, - i_manufact_id, - i_manufact, - sum(ss_ext_sales_price) ext_price -FROM date_dim, store_sales, item, customer, customer_address, store -WHERE d_date_sk = ss_sold_date_sk - AND ss_item_sk = i_item_sk - AND i_manager_id = 8 - AND d_moy = 11 - AND d_year = 1998 - AND ss_customer_sk = c_customer_sk - AND c_current_addr_sk = ca_address_sk - AND substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) - AND ss_store_sk = s_store_sk -GROUP BY i_brand, i_brand_id, i_manufact_id, i_manufact -ORDER BY ext_price DESC, brand, brand_id, i_manufact_id, i_manufact -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q2.sql b/src/test/resources/tpcds/queries/q2.sql deleted file mode 100644 index 52c0e90c4..000000000 --- a/src/test/resources/tpcds/queries/q2.sql +++ /dev/null @@ -1,81 +0,0 @@ -WITH wscs AS -( SELECT - sold_date_sk, - sales_price - FROM (SELECT - ws_sold_date_sk sold_date_sk, - ws_ext_sales_price sales_price - FROM web_sales) x - UNION ALL - (SELECT - cs_sold_date_sk sold_date_sk, - cs_ext_sales_price sales_price - FROM catalog_sales)), - wswscs AS - ( SELECT - d_week_seq, - sum(CASE WHEN (d_day_name = 'Sunday') - THEN sales_price - ELSE NULL END) - sun_sales, - sum(CASE WHEN (d_day_name = 'Monday') - THEN sales_price - ELSE NULL END) - mon_sales, - sum(CASE WHEN (d_day_name = 'Tuesday') - THEN sales_price - ELSE NULL END) - tue_sales, - sum(CASE WHEN (d_day_name = 'Wednesday') - THEN sales_price - ELSE NULL END) - wed_sales, - sum(CASE WHEN (d_day_name = 'Thursday') - THEN sales_price - ELSE NULL END) - thu_sales, - sum(CASE WHEN (d_day_name = 'Friday') - THEN sales_price - ELSE NULL END) - fri_sales, - sum(CASE WHEN (d_day_name = 'Saturday') - THEN sales_price - ELSE NULL END) - sat_sales - FROM wscs, date_dim - WHERE d_date_sk = sold_date_sk - GROUP BY d_week_seq) -SELECT - d_week_seq1, - round(sun_sales1 / sun_sales2, 2), - round(mon_sales1 / mon_sales2, 2), - round(tue_sales1 / tue_sales2, 2), - round(wed_sales1 / wed_sales2, 2), - round(thu_sales1 / thu_sales2, 2), - round(fri_sales1 / fri_sales2, 2), - round(sat_sales1 / sat_sales2, 2) -FROM - (SELECT - wswscs.d_week_seq d_week_seq1, - sun_sales sun_sales1, - mon_sales mon_sales1, - tue_sales tue_sales1, - wed_sales wed_sales1, - thu_sales thu_sales1, - fri_sales fri_sales1, - sat_sales sat_sales1 - FROM wswscs, date_dim - WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y, - (SELECT - wswscs.d_week_seq d_week_seq2, - sun_sales sun_sales2, - mon_sales mon_sales2, - tue_sales tue_sales2, - wed_sales wed_sales2, - thu_sales thu_sales2, - fri_sales fri_sales2, - sat_sales sat_sales2 - FROM wswscs, date_dim - WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1) z -WHERE d_week_seq1 = d_week_seq2 - 53 -ORDER BY d_week_seq1 diff --git a/src/test/resources/tpcds/queries/q20.sql b/src/test/resources/tpcds/queries/q20.sql deleted file mode 100644 index 7ac6c7a75..000000000 --- a/src/test/resources/tpcds/queries/q20.sql +++ /dev/null @@ -1,18 +0,0 @@ -SELECT - i_item_desc, - i_category, - i_class, - i_current_price, - sum(cs_ext_sales_price) AS itemrevenue, - sum(cs_ext_sales_price) * 100 / sum(sum(cs_ext_sales_price)) - OVER - (PARTITION BY i_class) AS revenueratio -FROM catalog_sales, item, date_dim -WHERE cs_item_sk = i_item_sk - AND i_category IN ('Sports', 'Books', 'Home') - AND cs_sold_date_sk = d_date_sk - AND d_date BETWEEN cast('1999-02-22' AS DATE) -AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) -GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price -ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q21.sql b/src/test/resources/tpcds/queries/q21.sql deleted file mode 100644 index 550881143..000000000 --- a/src/test/resources/tpcds/queries/q21.sql +++ /dev/null @@ -1,25 +0,0 @@ -SELECT * -FROM ( - SELECT - w_warehouse_name, - i_item_id, - sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) - THEN inv_quantity_on_hand - ELSE 0 END) AS inv_before, - sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) - THEN inv_quantity_on_hand - ELSE 0 END) AS inv_after - FROM inventory, warehouse, item, date_dim - WHERE i_current_price BETWEEN 0.99 AND 1.49 - AND i_item_sk = inv_item_sk - AND inv_warehouse_sk = w_warehouse_sk - AND inv_date_sk = d_date_sk - AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) - AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) - GROUP BY w_warehouse_name, i_item_id) x -WHERE (CASE WHEN inv_before > 0 - THEN inv_after / inv_before - ELSE NULL - END) BETWEEN 2.0 / 3.0 AND 3.0 / 2.0 -ORDER BY w_warehouse_name, i_item_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q22.sql b/src/test/resources/tpcds/queries/q22.sql deleted file mode 100644 index add3b41f7..000000000 --- a/src/test/resources/tpcds/queries/q22.sql +++ /dev/null @@ -1,14 +0,0 @@ -SELECT - i_product_name, - i_brand, - i_class, - i_category, - avg(inv_quantity_on_hand) qoh -FROM inventory, date_dim, item, warehouse -WHERE inv_date_sk = d_date_sk - AND inv_item_sk = i_item_sk - AND inv_warehouse_sk = w_warehouse_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11 -GROUP BY ROLLUP (i_product_name, i_brand, i_class, i_category) -ORDER BY qoh, i_product_name, i_brand, i_class, i_category -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q23a.sql b/src/test/resources/tpcds/queries/q23a.sql deleted file mode 100644 index 37791f643..000000000 --- a/src/test/resources/tpcds/queries/q23a.sql +++ /dev/null @@ -1,53 +0,0 @@ -WITH frequent_ss_items AS -(SELECT - substr(i_item_desc, 1, 30) itemdesc, - i_item_sk item_sk, - d_date solddate, - count(*) cnt - FROM store_sales, date_dim, item - WHERE ss_sold_date_sk = d_date_sk - AND ss_item_sk = i_item_sk - AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) - GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date - HAVING count(*) > 4), - max_store_sales AS - (SELECT max(csales) tpcds_cmax - FROM (SELECT - c_customer_sk, - sum(ss_quantity * ss_sales_price) csales - FROM store_sales, customer, date_dim - WHERE ss_customer_sk = c_customer_sk - AND ss_sold_date_sk = d_date_sk - AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) - GROUP BY c_customer_sk) x), - best_ss_customer AS - (SELECT - c_customer_sk, - sum(ss_quantity * ss_sales_price) ssales - FROM store_sales, customer - WHERE ss_customer_sk = c_customer_sk - GROUP BY c_customer_sk - HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * - (SELECT * - FROM max_store_sales)) -SELECT sum(sales) -FROM ((SELECT cs_quantity * cs_list_price sales -FROM catalog_sales, date_dim -WHERE d_year = 2000 - AND d_moy = 2 - AND cs_sold_date_sk = d_date_sk - AND cs_item_sk IN (SELECT item_sk -FROM frequent_ss_items) - AND cs_bill_customer_sk IN (SELECT c_customer_sk -FROM best_ss_customer)) - UNION ALL - (SELECT ws_quantity * ws_list_price sales - FROM web_sales, date_dim - WHERE d_year = 2000 - AND d_moy = 2 - AND ws_sold_date_sk = d_date_sk - AND ws_item_sk IN (SELECT item_sk - FROM frequent_ss_items) - AND ws_bill_customer_sk IN (SELECT c_customer_sk - FROM best_ss_customer))) y -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q23b.sql b/src/test/resources/tpcds/queries/q23b.sql deleted file mode 100644 index 01150197a..000000000 --- a/src/test/resources/tpcds/queries/q23b.sql +++ /dev/null @@ -1,68 +0,0 @@ -WITH frequent_ss_items AS -(SELECT - substr(i_item_desc, 1, 30) itemdesc, - i_item_sk item_sk, - d_date solddate, - count(*) cnt - FROM store_sales, date_dim, item - WHERE ss_sold_date_sk = d_date_sk - AND ss_item_sk = i_item_sk - AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) - GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date - HAVING count(*) > 4), - max_store_sales AS - (SELECT max(csales) tpcds_cmax - FROM (SELECT - c_customer_sk, - sum(ss_quantity * ss_sales_price) csales - FROM store_sales, customer, date_dim - WHERE ss_customer_sk = c_customer_sk - AND ss_sold_date_sk = d_date_sk - AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) - GROUP BY c_customer_sk) x), - best_ss_customer AS - (SELECT - c_customer_sk, - sum(ss_quantity * ss_sales_price) ssales - FROM store_sales - , customer - WHERE ss_customer_sk = c_customer_sk - GROUP BY c_customer_sk - HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * - (SELECT * - FROM max_store_sales)) -SELECT - c_last_name, - c_first_name, - sales -FROM ((SELECT - c_last_name, - c_first_name, - sum(cs_quantity * cs_list_price) sales -FROM catalog_sales, customer, date_dim -WHERE d_year = 2000 - AND d_moy = 2 - AND cs_sold_date_sk = d_date_sk - AND cs_item_sk IN (SELECT item_sk -FROM frequent_ss_items) - AND cs_bill_customer_sk IN (SELECT c_customer_sk -FROM best_ss_customer) - AND cs_bill_customer_sk = c_customer_sk -GROUP BY c_last_name, c_first_name) - UNION ALL - (SELECT - c_last_name, - c_first_name, - sum(ws_quantity * ws_list_price) sales - FROM web_sales, customer, date_dim - WHERE d_year = 2000 - AND d_moy = 2 - AND ws_sold_date_sk = d_date_sk - AND ws_item_sk IN (SELECT item_sk - FROM frequent_ss_items) - AND ws_bill_customer_sk IN (SELECT c_customer_sk - FROM best_ss_customer) - AND ws_bill_customer_sk = c_customer_sk - GROUP BY c_last_name, c_first_name)) y -ORDER BY c_last_name, c_first_name, sales -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q24a.sql b/src/test/resources/tpcds/queries/q24a.sql deleted file mode 100644 index bcc189486..000000000 --- a/src/test/resources/tpcds/queries/q24a.sql +++ /dev/null @@ -1,34 +0,0 @@ -WITH ssales AS -(SELECT - c_last_name, - c_first_name, - s_store_name, - ca_state, - s_state, - i_color, - i_current_price, - i_manager_id, - i_units, - i_size, - sum(ss_net_paid) netpaid - FROM store_sales, store_returns, store, item, customer, customer_address - WHERE ss_ticket_number = sr_ticket_number - AND ss_item_sk = sr_item_sk - AND ss_customer_sk = c_customer_sk - AND ss_item_sk = i_item_sk - AND ss_store_sk = s_store_sk - AND c_birth_country = upper(ca_country) - AND s_zip = ca_zip - AND s_market_id = 8 - GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, - i_current_price, i_manager_id, i_units, i_size) -SELECT - c_last_name, - c_first_name, - s_store_name, - sum(netpaid) paid -FROM ssales -WHERE i_color = 'pale' -GROUP BY c_last_name, c_first_name, s_store_name -HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) -FROM ssales) diff --git a/src/test/resources/tpcds/queries/q24b.sql b/src/test/resources/tpcds/queries/q24b.sql deleted file mode 100644 index 830eb670b..000000000 --- a/src/test/resources/tpcds/queries/q24b.sql +++ /dev/null @@ -1,34 +0,0 @@ -WITH ssales AS -(SELECT - c_last_name, - c_first_name, - s_store_name, - ca_state, - s_state, - i_color, - i_current_price, - i_manager_id, - i_units, - i_size, - sum(ss_net_paid) netpaid - FROM store_sales, store_returns, store, item, customer, customer_address - WHERE ss_ticket_number = sr_ticket_number - AND ss_item_sk = sr_item_sk - AND ss_customer_sk = c_customer_sk - AND ss_item_sk = i_item_sk - AND ss_store_sk = s_store_sk - AND c_birth_country = upper(ca_country) - AND s_zip = ca_zip - AND s_market_id = 8 - GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, - i_color, i_current_price, i_manager_id, i_units, i_size) -SELECT - c_last_name, - c_first_name, - s_store_name, - sum(netpaid) paid -FROM ssales -WHERE i_color = 'chiffon' -GROUP BY c_last_name, c_first_name, s_store_name -HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) -FROM ssales) diff --git a/src/test/resources/tpcds/queries/q25.sql b/src/test/resources/tpcds/queries/q25.sql deleted file mode 100644 index a4d78a3c5..000000000 --- a/src/test/resources/tpcds/queries/q25.sql +++ /dev/null @@ -1,33 +0,0 @@ -SELECT - i_item_id, - i_item_desc, - s_store_id, - s_store_name, - sum(ss_net_profit) AS store_sales_profit, - sum(sr_net_loss) AS store_returns_loss, - sum(cs_net_profit) AS catalog_sales_profit -FROM - store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, - store, item -WHERE - d1.d_moy = 4 - AND d1.d_year = 2001 - AND d1.d_date_sk = ss_sold_date_sk - AND i_item_sk = ss_item_sk - AND s_store_sk = ss_store_sk - AND ss_customer_sk = sr_customer_sk - AND ss_item_sk = sr_item_sk - AND ss_ticket_number = sr_ticket_number - AND sr_returned_date_sk = d2.d_date_sk - AND d2.d_moy BETWEEN 4 AND 10 - AND d2.d_year = 2001 - AND sr_customer_sk = cs_bill_customer_sk - AND sr_item_sk = cs_item_sk - AND cs_sold_date_sk = d3.d_date_sk - AND d3.d_moy BETWEEN 4 AND 10 - AND d3.d_year = 2001 -GROUP BY - i_item_id, i_item_desc, s_store_id, s_store_name -ORDER BY - i_item_id, i_item_desc, s_store_id, s_store_name -LIMIT 100 \ No newline at end of file diff --git a/src/test/resources/tpcds/queries/q26.sql b/src/test/resources/tpcds/queries/q26.sql deleted file mode 100644 index 6d395a1d7..000000000 --- a/src/test/resources/tpcds/queries/q26.sql +++ /dev/null @@ -1,19 +0,0 @@ -SELECT - i_item_id, - avg(cs_quantity) agg1, - avg(cs_list_price) agg2, - avg(cs_coupon_amt) agg3, - avg(cs_sales_price) agg4 -FROM catalog_sales, customer_demographics, date_dim, item, promotion -WHERE cs_sold_date_sk = d_date_sk AND - cs_item_sk = i_item_sk AND - cs_bill_cdemo_sk = cd_demo_sk AND - cs_promo_sk = p_promo_sk AND - cd_gender = 'M' AND - cd_marital_status = 'S' AND - cd_education_status = 'College' AND - (p_channel_email = 'N' OR p_channel_event = 'N') AND - d_year = 2000 -GROUP BY i_item_id -ORDER BY i_item_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q27.sql b/src/test/resources/tpcds/queries/q27.sql deleted file mode 100644 index b0e2fd95f..000000000 --- a/src/test/resources/tpcds/queries/q27.sql +++ /dev/null @@ -1,21 +0,0 @@ -SELECT - i_item_id, - s_state, - grouping(s_state) g_state, - avg(ss_quantity) agg1, - avg(ss_list_price) agg2, - avg(ss_coupon_amt) agg3, - avg(ss_sales_price) agg4 -FROM store_sales, customer_demographics, date_dim, store, item -WHERE ss_sold_date_sk = d_date_sk AND - ss_item_sk = i_item_sk AND - ss_store_sk = s_store_sk AND - ss_cdemo_sk = cd_demo_sk AND - cd_gender = 'M' AND - cd_marital_status = 'S' AND - cd_education_status = 'College' AND - d_year = 2002 AND - s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN') -GROUP BY ROLLUP (i_item_id, s_state) -ORDER BY i_item_id, s_state -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q28.sql b/src/test/resources/tpcds/queries/q28.sql deleted file mode 100644 index f34c2bb0e..000000000 --- a/src/test/resources/tpcds/queries/q28.sql +++ /dev/null @@ -1,56 +0,0 @@ -SELECT * -FROM (SELECT - avg(ss_list_price) B1_LP, - count(ss_list_price) B1_CNT, - count(DISTINCT ss_list_price) B1_CNTD -FROM store_sales -WHERE ss_quantity BETWEEN 0 AND 5 - AND (ss_list_price BETWEEN 8 AND 8 + 10 - OR ss_coupon_amt BETWEEN 459 AND 459 + 1000 - OR ss_wholesale_cost BETWEEN 57 AND 57 + 20)) B1, - (SELECT - avg(ss_list_price) B2_LP, - count(ss_list_price) B2_CNT, - count(DISTINCT ss_list_price) B2_CNTD - FROM store_sales - WHERE ss_quantity BETWEEN 6 AND 10 - AND (ss_list_price BETWEEN 90 AND 90 + 10 - OR ss_coupon_amt BETWEEN 2323 AND 2323 + 1000 - OR ss_wholesale_cost BETWEEN 31 AND 31 + 20)) B2, - (SELECT - avg(ss_list_price) B3_LP, - count(ss_list_price) B3_CNT, - count(DISTINCT ss_list_price) B3_CNTD - FROM store_sales - WHERE ss_quantity BETWEEN 11 AND 15 - AND (ss_list_price BETWEEN 142 AND 142 + 10 - OR ss_coupon_amt BETWEEN 12214 AND 12214 + 1000 - OR ss_wholesale_cost BETWEEN 79 AND 79 + 20)) B3, - (SELECT - avg(ss_list_price) B4_LP, - count(ss_list_price) B4_CNT, - count(DISTINCT ss_list_price) B4_CNTD - FROM store_sales - WHERE ss_quantity BETWEEN 16 AND 20 - AND (ss_list_price BETWEEN 135 AND 135 + 10 - OR ss_coupon_amt BETWEEN 6071 AND 6071 + 1000 - OR ss_wholesale_cost BETWEEN 38 AND 38 + 20)) B4, - (SELECT - avg(ss_list_price) B5_LP, - count(ss_list_price) B5_CNT, - count(DISTINCT ss_list_price) B5_CNTD - FROM store_sales - WHERE ss_quantity BETWEEN 21 AND 25 - AND (ss_list_price BETWEEN 122 AND 122 + 10 - OR ss_coupon_amt BETWEEN 836 AND 836 + 1000 - OR ss_wholesale_cost BETWEEN 17 AND 17 + 20)) B5, - (SELECT - avg(ss_list_price) B6_LP, - count(ss_list_price) B6_CNT, - count(DISTINCT ss_list_price) B6_CNTD - FROM store_sales - WHERE ss_quantity BETWEEN 26 AND 30 - AND (ss_list_price BETWEEN 154 AND 154 + 10 - OR ss_coupon_amt BETWEEN 7326 AND 7326 + 1000 - OR ss_wholesale_cost BETWEEN 7 AND 7 + 20)) B6 -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q29.sql b/src/test/resources/tpcds/queries/q29.sql deleted file mode 100644 index 3f1fd553f..000000000 --- a/src/test/resources/tpcds/queries/q29.sql +++ /dev/null @@ -1,32 +0,0 @@ -SELECT - i_item_id, - i_item_desc, - s_store_id, - s_store_name, - sum(ss_quantity) AS store_sales_quantity, - sum(sr_return_quantity) AS store_returns_quantity, - sum(cs_quantity) AS catalog_sales_quantity -FROM - store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, - date_dim d3, store, item -WHERE - d1.d_moy = 9 - AND d1.d_year = 1999 - AND d1.d_date_sk = ss_sold_date_sk - AND i_item_sk = ss_item_sk - AND s_store_sk = ss_store_sk - AND ss_customer_sk = sr_customer_sk - AND ss_item_sk = sr_item_sk - AND ss_ticket_number = sr_ticket_number - AND sr_returned_date_sk = d2.d_date_sk - AND d2.d_moy BETWEEN 9 AND 9 + 3 - AND d2.d_year = 1999 - AND sr_customer_sk = cs_bill_customer_sk - AND sr_item_sk = cs_item_sk - AND cs_sold_date_sk = d3.d_date_sk - AND d3.d_year IN (1999, 1999 + 1, 1999 + 2) -GROUP BY - i_item_id, i_item_desc, s_store_id, s_store_name -ORDER BY - i_item_id, i_item_desc, s_store_id, s_store_name -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q3.sql b/src/test/resources/tpcds/queries/q3.sql deleted file mode 100644 index 181509df9..000000000 --- a/src/test/resources/tpcds/queries/q3.sql +++ /dev/null @@ -1,13 +0,0 @@ -SELECT - dt.d_year, - item.i_brand_id brand_id, - item.i_brand brand, - SUM(ss_ext_sales_price) sum_agg -FROM date_dim dt, store_sales, item -WHERE dt.d_date_sk = store_sales.ss_sold_date_sk - AND store_sales.ss_item_sk = item.i_item_sk - AND item.i_manufact_id = 128 - AND dt.d_moy = 11 -GROUP BY dt.d_year, item.i_brand, item.i_brand_id -ORDER BY dt.d_year, sum_agg DESC, brand_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q30.sql b/src/test/resources/tpcds/queries/q30.sql deleted file mode 100644 index 986bef566..000000000 --- a/src/test/resources/tpcds/queries/q30.sql +++ /dev/null @@ -1,35 +0,0 @@ -WITH customer_total_return AS -(SELECT - wr_returning_customer_sk AS ctr_customer_sk, - ca_state AS ctr_state, - sum(wr_return_amt) AS ctr_total_return - FROM web_returns, date_dim, customer_address - WHERE wr_returned_date_sk = d_date_sk - AND d_year = 2002 - AND wr_returning_addr_sk = ca_address_sk - GROUP BY wr_returning_customer_sk, ca_state) -SELECT - c_customer_id, - c_salutation, - c_first_name, - c_last_name, - c_preferred_cust_flag, - c_birth_day, - c_birth_month, - c_birth_year, - c_birth_country, - c_login, - c_email_address, - c_last_review_date, - ctr_total_return -FROM customer_total_return ctr1, customer_address, customer -WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 -FROM customer_total_return ctr2 -WHERE ctr1.ctr_state = ctr2.ctr_state) - AND ca_address_sk = c_current_addr_sk - AND ca_state = 'GA' - AND ctr1.ctr_customer_sk = c_customer_sk -ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag - , c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address - , c_last_review_date, ctr_total_return -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q31.sql b/src/test/resources/tpcds/queries/q31.sql deleted file mode 100644 index 3e543d543..000000000 --- a/src/test/resources/tpcds/queries/q31.sql +++ /dev/null @@ -1,60 +0,0 @@ -WITH ss AS -(SELECT - ca_county, - d_qoy, - d_year, - sum(ss_ext_sales_price) AS store_sales - FROM store_sales, date_dim, customer_address - WHERE ss_sold_date_sk = d_date_sk - AND ss_addr_sk = ca_address_sk - GROUP BY ca_county, d_qoy, d_year), - ws AS - (SELECT - ca_county, - d_qoy, - d_year, - sum(ws_ext_sales_price) AS web_sales - FROM web_sales, date_dim, customer_address - WHERE ws_sold_date_sk = d_date_sk - AND ws_bill_addr_sk = ca_address_sk - GROUP BY ca_county, d_qoy, d_year) -SELECT - ss1.ca_county, - ss1.d_year, - ws2.web_sales / ws1.web_sales web_q1_q2_increase, - ss2.store_sales / ss1.store_sales store_q1_q2_increase, - ws3.web_sales / ws2.web_sales web_q2_q3_increase, - ss3.store_sales / ss2.store_sales store_q2_q3_increase -FROM - ss ss1, ss ss2, ss ss3, ws ws1, ws ws2, ws ws3 -WHERE - ss1.d_qoy = 1 - AND ss1.d_year = 2000 - AND ss1.ca_county = ss2.ca_county - AND ss2.d_qoy = 2 - AND ss2.d_year = 2000 - AND ss2.ca_county = ss3.ca_county - AND ss3.d_qoy = 3 - AND ss3.d_year = 2000 - AND ss1.ca_county = ws1.ca_county - AND ws1.d_qoy = 1 - AND ws1.d_year = 2000 - AND ws1.ca_county = ws2.ca_county - AND ws2.d_qoy = 2 - AND ws2.d_year = 2000 - AND ws1.ca_county = ws3.ca_county - AND ws3.d_qoy = 3 - AND ws3.d_year = 2000 - AND CASE WHEN ws1.web_sales > 0 - THEN ws2.web_sales / ws1.web_sales - ELSE NULL END - > CASE WHEN ss1.store_sales > 0 - THEN ss2.store_sales / ss1.store_sales - ELSE NULL END - AND CASE WHEN ws2.web_sales > 0 - THEN ws3.web_sales / ws2.web_sales - ELSE NULL END - > CASE WHEN ss2.store_sales > 0 - THEN ss3.store_sales / ss2.store_sales - ELSE NULL END -ORDER BY ss1.ca_county diff --git a/src/test/resources/tpcds/queries/q32.sql b/src/test/resources/tpcds/queries/q32.sql deleted file mode 100644 index a6f59ecb8..000000000 --- a/src/test/resources/tpcds/queries/q32.sql +++ /dev/null @@ -1,15 +0,0 @@ -SELECT 1 AS `excess discount amount ` -FROM - catalog_sales, item, date_dim -WHERE - i_manufact_id = 977 - AND i_item_sk = cs_item_sk - AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) - AND d_date_sk = cs_sold_date_sk - AND cs_ext_discount_amt > ( - SELECT 1.3 * avg(cs_ext_discount_amt) - FROM catalog_sales, date_dim - WHERE cs_item_sk = i_item_sk - AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) - AND d_date_sk = cs_sold_date_sk) -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q33.sql b/src/test/resources/tpcds/queries/q33.sql deleted file mode 100644 index d24856aa5..000000000 --- a/src/test/resources/tpcds/queries/q33.sql +++ /dev/null @@ -1,65 +0,0 @@ -WITH ss AS ( - SELECT - i_manufact_id, - sum(ss_ext_sales_price) total_sales - FROM - store_sales, date_dim, customer_address, item - WHERE - i_manufact_id IN (SELECT i_manufact_id - FROM item - WHERE i_category IN ('Electronics')) - AND ss_item_sk = i_item_sk - AND ss_sold_date_sk = d_date_sk - AND d_year = 1998 - AND d_moy = 5 - AND ss_addr_sk = ca_address_sk - AND ca_gmt_offset = -5 - GROUP BY i_manufact_id), cs AS -(SELECT - i_manufact_id, - sum(cs_ext_sales_price) total_sales - FROM catalog_sales, date_dim, customer_address, item - WHERE - i_manufact_id IN ( - SELECT i_manufact_id - FROM item - WHERE - i_category IN ('Electronics')) - AND cs_item_sk = i_item_sk - AND cs_sold_date_sk = d_date_sk - AND d_year = 1998 - AND d_moy = 5 - AND cs_bill_addr_sk = ca_address_sk - AND ca_gmt_offset = -5 - GROUP BY i_manufact_id), - ws AS ( - SELECT - i_manufact_id, - sum(ws_ext_sales_price) total_sales - FROM - web_sales, date_dim, customer_address, item - WHERE - i_manufact_id IN (SELECT i_manufact_id - FROM item - WHERE i_category IN ('Electronics')) - AND ws_item_sk = i_item_sk - AND ws_sold_date_sk = d_date_sk - AND d_year = 1998 - AND d_moy = 5 - AND ws_bill_addr_sk = ca_address_sk - AND ca_gmt_offset = -5 - GROUP BY i_manufact_id) -SELECT - i_manufact_id, - sum(total_sales) total_sales -FROM (SELECT * - FROM ss - UNION ALL - SELECT * - FROM cs - UNION ALL - SELECT * - FROM ws) tmp1 -GROUP BY i_manufact_id -ORDER BY total_sales -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q34.sql b/src/test/resources/tpcds/queries/q34.sql deleted file mode 100644 index 33396bf16..000000000 --- a/src/test/resources/tpcds/queries/q34.sql +++ /dev/null @@ -1,32 +0,0 @@ -SELECT - c_last_name, - c_first_name, - c_salutation, - c_preferred_cust_flag, - ss_ticket_number, - cnt -FROM - (SELECT - ss_ticket_number, - ss_customer_sk, - count(*) cnt - FROM store_sales, date_dim, store, household_demographics - WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk - AND store_sales.ss_store_sk = store.s_store_sk - AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk - AND (date_dim.d_dom BETWEEN 1 AND 3 OR date_dim.d_dom BETWEEN 25 AND 28) - AND (household_demographics.hd_buy_potential = '>10000' OR - household_demographics.hd_buy_potential = 'unknown') - AND household_demographics.hd_vehicle_count > 0 - AND (CASE WHEN household_demographics.hd_vehicle_count > 0 - THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count - ELSE NULL - END) > 1.2 - AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) - AND store.s_county IN - ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', - 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') - GROUP BY ss_ticket_number, ss_customer_sk) dn, customer -WHERE ss_customer_sk = c_customer_sk - AND cnt BETWEEN 15 AND 20 -ORDER BY c_last_name, c_first_name, c_salutation, c_preferred_cust_flag DESC diff --git a/src/test/resources/tpcds/queries/q35.sql b/src/test/resources/tpcds/queries/q35.sql deleted file mode 100644 index cfe4342d8..000000000 --- a/src/test/resources/tpcds/queries/q35.sql +++ /dev/null @@ -1,46 +0,0 @@ -SELECT - ca_state, - cd_gender, - cd_marital_status, - count(*) cnt1, - min(cd_dep_count), - max(cd_dep_count), - avg(cd_dep_count), - cd_dep_employed_count, - count(*) cnt2, - min(cd_dep_employed_count), - max(cd_dep_employed_count), - avg(cd_dep_employed_count), - cd_dep_college_count, - count(*) cnt3, - min(cd_dep_college_count), - max(cd_dep_college_count), - avg(cd_dep_college_count) -FROM - customer c, customer_address ca, customer_demographics -WHERE - c.c_current_addr_sk = ca.ca_address_sk AND - cd_demo_sk = c.c_current_cdemo_sk AND - exists(SELECT * - FROM store_sales, date_dim - WHERE c.c_customer_sk = ss_customer_sk AND - ss_sold_date_sk = d_date_sk AND - d_year = 2002 AND - d_qoy < 4) AND - (exists(SELECT * - FROM web_sales, date_dim - WHERE c.c_customer_sk = ws_bill_customer_sk AND - ws_sold_date_sk = d_date_sk AND - d_year = 2002 AND - d_qoy < 4) OR - exists(SELECT * - FROM catalog_sales, date_dim - WHERE c.c_customer_sk = cs_ship_customer_sk AND - cs_sold_date_sk = d_date_sk AND - d_year = 2002 AND - d_qoy < 4)) -GROUP BY ca_state, cd_gender, cd_marital_status, cd_dep_count, - cd_dep_employed_count, cd_dep_college_count -ORDER BY ca_state, cd_gender, cd_marital_status, cd_dep_count, - cd_dep_employed_count, cd_dep_college_count -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q36.sql b/src/test/resources/tpcds/queries/q36.sql deleted file mode 100644 index a8f93df76..000000000 --- a/src/test/resources/tpcds/queries/q36.sql +++ /dev/null @@ -1,26 +0,0 @@ -SELECT - sum(ss_net_profit) / sum(ss_ext_sales_price) AS gross_margin, - i_category, - i_class, - grouping(i_category) + grouping(i_class) AS lochierarchy, - rank() - OVER ( - PARTITION BY grouping(i_category) + grouping(i_class), - CASE WHEN grouping(i_class) = 0 - THEN i_category END - ORDER BY sum(ss_net_profit) / sum(ss_ext_sales_price) ASC) AS rank_within_parent -FROM - store_sales, date_dim d1, item, store -WHERE - d1.d_year = 2001 - AND d1.d_date_sk = ss_sold_date_sk - AND i_item_sk = ss_item_sk - AND s_store_sk = ss_store_sk - AND s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN') -GROUP BY ROLLUP (i_category, i_class) -ORDER BY - lochierarchy DESC - , CASE WHEN lochierarchy = 0 - THEN i_category END - , rank_within_parent -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q37.sql b/src/test/resources/tpcds/queries/q37.sql deleted file mode 100644 index 11b3821fa..000000000 --- a/src/test/resources/tpcds/queries/q37.sql +++ /dev/null @@ -1,15 +0,0 @@ -SELECT - i_item_id, - i_item_desc, - i_current_price -FROM item, inventory, date_dim, catalog_sales -WHERE i_current_price BETWEEN 68 AND 68 + 30 - AND inv_item_sk = i_item_sk - AND d_date_sk = inv_date_sk - AND d_date BETWEEN cast('2000-02-01' AS DATE) AND (cast('2000-02-01' AS DATE) + INTERVAL 60 days) - AND i_manufact_id IN (677, 940, 694, 808) - AND inv_quantity_on_hand BETWEEN 100 AND 500 - AND cs_item_sk = i_item_sk -GROUP BY i_item_id, i_item_desc, i_current_price -ORDER BY i_item_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q38.sql b/src/test/resources/tpcds/queries/q38.sql deleted file mode 100644 index 1c8d53ee2..000000000 --- a/src/test/resources/tpcds/queries/q38.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT count(*) -FROM ( - SELECT DISTINCT - c_last_name, - c_first_name, - d_date - FROM store_sales, date_dim, customer - WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk - AND store_sales.ss_customer_sk = customer.c_customer_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11 - INTERSECT - SELECT DISTINCT - c_last_name, - c_first_name, - d_date - FROM catalog_sales, date_dim, customer - WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk - AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11 - INTERSECT - SELECT DISTINCT - c_last_name, - c_first_name, - d_date - FROM web_sales, date_dim, customer - WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk - AND web_sales.ws_bill_customer_sk = customer.c_customer_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11 - ) hot_cust -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q39a.sql b/src/test/resources/tpcds/queries/q39a.sql deleted file mode 100644 index 9fc4c1701..000000000 --- a/src/test/resources/tpcds/queries/q39a.sql +++ /dev/null @@ -1,47 +0,0 @@ -WITH inv AS -(SELECT - w_warehouse_name, - w_warehouse_sk, - i_item_sk, - d_moy, - stdev, - mean, - CASE mean - WHEN 0 - THEN NULL - ELSE stdev / mean END cov - FROM (SELECT - w_warehouse_name, - w_warehouse_sk, - i_item_sk, - d_moy, - stddev_samp(inv_quantity_on_hand) stdev, - avg(inv_quantity_on_hand) mean - FROM inventory, item, warehouse, date_dim - WHERE inv_item_sk = i_item_sk - AND inv_warehouse_sk = w_warehouse_sk - AND inv_date_sk = d_date_sk - AND d_year = 2001 - GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo - WHERE CASE mean - WHEN 0 - THEN 0 - ELSE stdev / mean END > 1) -SELECT - inv1.w_warehouse_sk, - inv1.i_item_sk, - inv1.d_moy, - inv1.mean, - inv1.cov, - inv2.w_warehouse_sk, - inv2.i_item_sk, - inv2.d_moy, - inv2.mean, - inv2.cov -FROM inv inv1, inv inv2 -WHERE inv1.i_item_sk = inv2.i_item_sk - AND inv1.w_warehouse_sk = inv2.w_warehouse_sk - AND inv1.d_moy = 1 - AND inv2.d_moy = 1 + 1 -ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov - , inv2.d_moy, inv2.mean, inv2.cov diff --git a/src/test/resources/tpcds/queries/q39b.sql b/src/test/resources/tpcds/queries/q39b.sql deleted file mode 100644 index 6f8493029..000000000 --- a/src/test/resources/tpcds/queries/q39b.sql +++ /dev/null @@ -1,48 +0,0 @@ -WITH inv AS -(SELECT - w_warehouse_name, - w_warehouse_sk, - i_item_sk, - d_moy, - stdev, - mean, - CASE mean - WHEN 0 - THEN NULL - ELSE stdev / mean END cov - FROM (SELECT - w_warehouse_name, - w_warehouse_sk, - i_item_sk, - d_moy, - stddev_samp(inv_quantity_on_hand) stdev, - avg(inv_quantity_on_hand) mean - FROM inventory, item, warehouse, date_dim - WHERE inv_item_sk = i_item_sk - AND inv_warehouse_sk = w_warehouse_sk - AND inv_date_sk = d_date_sk - AND d_year = 2001 - GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo - WHERE CASE mean - WHEN 0 - THEN 0 - ELSE stdev / mean END > 1) -SELECT - inv1.w_warehouse_sk, - inv1.i_item_sk, - inv1.d_moy, - inv1.mean, - inv1.cov, - inv2.w_warehouse_sk, - inv2.i_item_sk, - inv2.d_moy, - inv2.mean, - inv2.cov -FROM inv inv1, inv inv2 -WHERE inv1.i_item_sk = inv2.i_item_sk - AND inv1.w_warehouse_sk = inv2.w_warehouse_sk - AND inv1.d_moy = 1 - AND inv2.d_moy = 1 + 1 - AND inv1.cov > 1.5 -ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov - , inv2.d_moy, inv2.mean, inv2.cov diff --git a/src/test/resources/tpcds/queries/q4.sql b/src/test/resources/tpcds/queries/q4.sql deleted file mode 100644 index b9f27fbc9..000000000 --- a/src/test/resources/tpcds/queries/q4.sql +++ /dev/null @@ -1,120 +0,0 @@ -WITH year_total AS ( - SELECT - c_customer_id customer_id, - c_first_name customer_first_name, - c_last_name customer_last_name, - c_preferred_cust_flag customer_preferred_cust_flag, - c_birth_country customer_birth_country, - c_login customer_login, - c_email_address customer_email_address, - d_year dyear, - sum(((ss_ext_list_price - ss_ext_wholesale_cost - ss_ext_discount_amt) + - ss_ext_sales_price) / 2) year_total, - 's' sale_type - FROM customer, store_sales, date_dim - WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk - GROUP BY c_customer_id, - c_first_name, - c_last_name, - c_preferred_cust_flag, - c_birth_country, - c_login, - c_email_address, - d_year - UNION ALL - SELECT - c_customer_id customer_id, - c_first_name customer_first_name, - c_last_name customer_last_name, - c_preferred_cust_flag customer_preferred_cust_flag, - c_birth_country customer_birth_country, - c_login customer_login, - c_email_address customer_email_address, - d_year dyear, - sum((((cs_ext_list_price - cs_ext_wholesale_cost - cs_ext_discount_amt) + - cs_ext_sales_price) / 2)) year_total, - 'c' sale_type - FROM customer, catalog_sales, date_dim - WHERE c_customer_sk = cs_bill_customer_sk AND cs_sold_date_sk = d_date_sk - GROUP BY c_customer_id, - c_first_name, - c_last_name, - c_preferred_cust_flag, - c_birth_country, - c_login, - c_email_address, - d_year - UNION ALL - SELECT - c_customer_id customer_id, - c_first_name customer_first_name, - c_last_name customer_last_name, - c_preferred_cust_flag customer_preferred_cust_flag, - c_birth_country customer_birth_country, - c_login customer_login, - c_email_address customer_email_address, - d_year dyear, - sum((((ws_ext_list_price - ws_ext_wholesale_cost - ws_ext_discount_amt) + ws_ext_sales_price) / - 2)) year_total, - 'w' sale_type - FROM customer, web_sales, date_dim - WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk - GROUP BY c_customer_id, - c_first_name, - c_last_name, - c_preferred_cust_flag, - c_birth_country, - c_login, - c_email_address, - d_year) -SELECT - t_s_secyear.customer_id, - t_s_secyear.customer_first_name, - t_s_secyear.customer_last_name, - t_s_secyear.customer_preferred_cust_flag, - t_s_secyear.customer_birth_country, - t_s_secyear.customer_login, - t_s_secyear.customer_email_address -FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_c_firstyear, - year_total t_c_secyear, year_total t_w_firstyear, year_total t_w_secyear -WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id - AND t_s_firstyear.customer_id = t_c_secyear.customer_id - AND t_s_firstyear.customer_id = t_c_firstyear.customer_id - AND t_s_firstyear.customer_id = t_w_firstyear.customer_id - AND t_s_firstyear.customer_id = t_w_secyear.customer_id - AND t_s_firstyear.sale_type = 's' - AND t_c_firstyear.sale_type = 'c' - AND t_w_firstyear.sale_type = 'w' - AND t_s_secyear.sale_type = 's' - AND t_c_secyear.sale_type = 'c' - AND t_w_secyear.sale_type = 'w' - AND t_s_firstyear.dyear = 2001 - AND t_s_secyear.dyear = 2001 + 1 - AND t_c_firstyear.dyear = 2001 - AND t_c_secyear.dyear = 2001 + 1 - AND t_w_firstyear.dyear = 2001 - AND t_w_secyear.dyear = 2001 + 1 - AND t_s_firstyear.year_total > 0 - AND t_c_firstyear.year_total > 0 - AND t_w_firstyear.year_total > 0 - AND CASE WHEN t_c_firstyear.year_total > 0 - THEN t_c_secyear.year_total / t_c_firstyear.year_total - ELSE NULL END - > CASE WHEN t_s_firstyear.year_total > 0 - THEN t_s_secyear.year_total / t_s_firstyear.year_total - ELSE NULL END - AND CASE WHEN t_c_firstyear.year_total > 0 - THEN t_c_secyear.year_total / t_c_firstyear.year_total - ELSE NULL END - > CASE WHEN t_w_firstyear.year_total > 0 - THEN t_w_secyear.year_total / t_w_firstyear.year_total - ELSE NULL END -ORDER BY - t_s_secyear.customer_id, - t_s_secyear.customer_first_name, - t_s_secyear.customer_last_name, - t_s_secyear.customer_preferred_cust_flag, - t_s_secyear.customer_birth_country, - t_s_secyear.customer_login, - t_s_secyear.customer_email_address -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q40.sql b/src/test/resources/tpcds/queries/q40.sql deleted file mode 100644 index 66d8b73ac..000000000 --- a/src/test/resources/tpcds/queries/q40.sql +++ /dev/null @@ -1,25 +0,0 @@ -SELECT - w_state, - i_item_id, - sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) - THEN cs_sales_price - coalesce(cr_refunded_cash, 0) - ELSE 0 END) AS sales_before, - sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) - THEN cs_sales_price - coalesce(cr_refunded_cash, 0) - ELSE 0 END) AS sales_after -FROM - catalog_sales - LEFT OUTER JOIN catalog_returns ON - (cs_order_number = cr_order_number - AND cs_item_sk = cr_item_sk) - , warehouse, item, date_dim -WHERE - i_current_price BETWEEN 0.99 AND 1.49 - AND i_item_sk = cs_item_sk - AND cs_warehouse_sk = w_warehouse_sk - AND cs_sold_date_sk = d_date_sk - AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) - AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) -GROUP BY w_state, i_item_id -ORDER BY w_state, i_item_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q41.sql b/src/test/resources/tpcds/queries/q41.sql deleted file mode 100644 index 25e317e0e..000000000 --- a/src/test/resources/tpcds/queries/q41.sql +++ /dev/null @@ -1,49 +0,0 @@ -SELECT DISTINCT (i_product_name) -FROM item i1 -WHERE i_manufact_id BETWEEN 738 AND 738 + 40 - AND (SELECT count(*) AS item_cnt -FROM item -WHERE (i_manufact = i1.i_manufact AND - ((i_category = 'Women' AND - (i_color = 'powder' OR i_color = 'khaki') AND - (i_units = 'Ounce' OR i_units = 'Oz') AND - (i_size = 'medium' OR i_size = 'extra large') - ) OR - (i_category = 'Women' AND - (i_color = 'brown' OR i_color = 'honeydew') AND - (i_units = 'Bunch' OR i_units = 'Ton') AND - (i_size = 'N/A' OR i_size = 'small') - ) OR - (i_category = 'Men' AND - (i_color = 'floral' OR i_color = 'deep') AND - (i_units = 'N/A' OR i_units = 'Dozen') AND - (i_size = 'petite' OR i_size = 'large') - ) OR - (i_category = 'Men' AND - (i_color = 'light' OR i_color = 'cornflower') AND - (i_units = 'Box' OR i_units = 'Pound') AND - (i_size = 'medium' OR i_size = 'extra large') - ))) OR - (i_manufact = i1.i_manufact AND - ((i_category = 'Women' AND - (i_color = 'midnight' OR i_color = 'snow') AND - (i_units = 'Pallet' OR i_units = 'Gross') AND - (i_size = 'medium' OR i_size = 'extra large') - ) OR - (i_category = 'Women' AND - (i_color = 'cyan' OR i_color = 'papaya') AND - (i_units = 'Cup' OR i_units = 'Dram') AND - (i_size = 'N/A' OR i_size = 'small') - ) OR - (i_category = 'Men' AND - (i_color = 'orange' OR i_color = 'frosted') AND - (i_units = 'Each' OR i_units = 'Tbl') AND - (i_size = 'petite' OR i_size = 'large') - ) OR - (i_category = 'Men' AND - (i_color = 'forest' OR i_color = 'ghost') AND - (i_units = 'Lb' OR i_units = 'Bundle') AND - (i_size = 'medium' OR i_size = 'extra large') - )))) > 0 -ORDER BY i_product_name -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q42.sql b/src/test/resources/tpcds/queries/q42.sql deleted file mode 100644 index 4d2e71760..000000000 --- a/src/test/resources/tpcds/queries/q42.sql +++ /dev/null @@ -1,18 +0,0 @@ -SELECT - dt.d_year, - item.i_category_id, - item.i_category, - sum(ss_ext_sales_price) -FROM date_dim dt, store_sales, item -WHERE dt.d_date_sk = store_sales.ss_sold_date_sk - AND store_sales.ss_item_sk = item.i_item_sk - AND item.i_manager_id = 1 - AND dt.d_moy = 11 - AND dt.d_year = 2000 -GROUP BY dt.d_year - , item.i_category_id - , item.i_category -ORDER BY sum(ss_ext_sales_price) DESC, dt.d_year - , item.i_category_id - , item.i_category -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q43.sql b/src/test/resources/tpcds/queries/q43.sql deleted file mode 100644 index 45411772c..000000000 --- a/src/test/resources/tpcds/queries/q43.sql +++ /dev/null @@ -1,33 +0,0 @@ -SELECT - s_store_name, - s_store_id, - sum(CASE WHEN (d_day_name = 'Sunday') - THEN ss_sales_price - ELSE NULL END) sun_sales, - sum(CASE WHEN (d_day_name = 'Monday') - THEN ss_sales_price - ELSE NULL END) mon_sales, - sum(CASE WHEN (d_day_name = 'Tuesday') - THEN ss_sales_price - ELSE NULL END) tue_sales, - sum(CASE WHEN (d_day_name = 'Wednesday') - THEN ss_sales_price - ELSE NULL END) wed_sales, - sum(CASE WHEN (d_day_name = 'Thursday') - THEN ss_sales_price - ELSE NULL END) thu_sales, - sum(CASE WHEN (d_day_name = 'Friday') - THEN ss_sales_price - ELSE NULL END) fri_sales, - sum(CASE WHEN (d_day_name = 'Saturday') - THEN ss_sales_price - ELSE NULL END) sat_sales -FROM date_dim, store_sales, store -WHERE d_date_sk = ss_sold_date_sk AND - s_store_sk = ss_store_sk AND - s_gmt_offset = -5 AND - d_year = 2000 -GROUP BY s_store_name, s_store_id -ORDER BY s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales, - thu_sales, fri_sales, sat_sales -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q44.sql b/src/test/resources/tpcds/queries/q44.sql deleted file mode 100644 index 379e60478..000000000 --- a/src/test/resources/tpcds/queries/q44.sql +++ /dev/null @@ -1,46 +0,0 @@ -SELECT - asceding.rnk, - i1.i_product_name best_performing, - i2.i_product_name worst_performing -FROM (SELECT * -FROM (SELECT - item_sk, - rank() - OVER ( - ORDER BY rank_col ASC) rnk -FROM (SELECT - ss_item_sk item_sk, - avg(ss_net_profit) rank_col -FROM store_sales ss1 -WHERE ss_store_sk = 4 -GROUP BY ss_item_sk -HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col -FROM store_sales -WHERE ss_store_sk = 4 - AND ss_addr_sk IS NULL -GROUP BY ss_store_sk)) V1) V11 -WHERE rnk < 11) asceding, - (SELECT * - FROM (SELECT - item_sk, - rank() - OVER ( - ORDER BY rank_col DESC) rnk - FROM (SELECT - ss_item_sk item_sk, - avg(ss_net_profit) rank_col - FROM store_sales ss1 - WHERE ss_store_sk = 4 - GROUP BY ss_item_sk - HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col - FROM store_sales - WHERE ss_store_sk = 4 - AND ss_addr_sk IS NULL - GROUP BY ss_store_sk)) V2) V21 - WHERE rnk < 11) descending, - item i1, item i2 -WHERE asceding.rnk = descending.rnk - AND i1.i_item_sk = asceding.item_sk - AND i2.i_item_sk = descending.item_sk -ORDER BY asceding.rnk -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q45.sql b/src/test/resources/tpcds/queries/q45.sql deleted file mode 100644 index 907438f19..000000000 --- a/src/test/resources/tpcds/queries/q45.sql +++ /dev/null @@ -1,21 +0,0 @@ -SELECT - ca_zip, - ca_city, - sum(ws_sales_price) -FROM web_sales, customer, customer_address, date_dim, item -WHERE ws_bill_customer_sk = c_customer_sk - AND c_current_addr_sk = ca_address_sk - AND ws_item_sk = i_item_sk - AND (substr(ca_zip, 1, 5) IN - ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') - OR - i_item_id IN (SELECT i_item_id - FROM item - WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) - ) -) - AND ws_sold_date_sk = d_date_sk - AND d_qoy = 2 AND d_year = 2001 -GROUP BY ca_zip, ca_city -ORDER BY ca_zip, ca_city -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q46.sql b/src/test/resources/tpcds/queries/q46.sql deleted file mode 100644 index 0911677df..000000000 --- a/src/test/resources/tpcds/queries/q46.sql +++ /dev/null @@ -1,32 +0,0 @@ -SELECT - c_last_name, - c_first_name, - ca_city, - bought_city, - ss_ticket_number, - amt, - profit -FROM - (SELECT - ss_ticket_number, - ss_customer_sk, - ca_city bought_city, - sum(ss_coupon_amt) amt, - sum(ss_net_profit) profit - FROM store_sales, date_dim, store, household_demographics, customer_address - WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk - AND store_sales.ss_store_sk = store.s_store_sk - AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk - AND store_sales.ss_addr_sk = customer_address.ca_address_sk - AND (household_demographics.hd_dep_count = 4 OR - household_demographics.hd_vehicle_count = 3) - AND date_dim.d_dow IN (6, 0) - AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) - AND store.s_city IN ('Fairview', 'Midway', 'Fairview', 'Fairview', 'Fairview') - GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, customer, - customer_address current_addr -WHERE ss_customer_sk = c_customer_sk - AND customer.c_current_addr_sk = current_addr.ca_address_sk - AND current_addr.ca_city <> bought_city -ORDER BY c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q47.sql b/src/test/resources/tpcds/queries/q47.sql deleted file mode 100644 index cfc37a4ce..000000000 --- a/src/test/resources/tpcds/queries/q47.sql +++ /dev/null @@ -1,63 +0,0 @@ -WITH v1 AS ( - SELECT - i_category, - i_brand, - s_store_name, - s_company_name, - d_year, - d_moy, - sum(ss_sales_price) sum_sales, - avg(sum(ss_sales_price)) - OVER - (PARTITION BY i_category, i_brand, - s_store_name, s_company_name, d_year) - avg_monthly_sales, - rank() - OVER - (PARTITION BY i_category, i_brand, - s_store_name, s_company_name - ORDER BY d_year, d_moy) rn - FROM item, store_sales, date_dim, store - WHERE ss_item_sk = i_item_sk AND - ss_sold_date_sk = d_date_sk AND - ss_store_sk = s_store_sk AND - ( - d_year = 1999 OR - (d_year = 1999 - 1 AND d_moy = 12) OR - (d_year = 1999 + 1 AND d_moy = 1) - ) - GROUP BY i_category, i_brand, - s_store_name, s_company_name, - d_year, d_moy), - v2 AS ( - SELECT - v1.i_category, - v1.i_brand, - v1.s_store_name, - v1.s_company_name, - v1.d_year, - v1.d_moy, - v1.avg_monthly_sales, - v1.sum_sales, - v1_lag.sum_sales psum, - v1_lead.sum_sales nsum - FROM v1, v1 v1_lag, v1 v1_lead - WHERE v1.i_category = v1_lag.i_category AND - v1.i_category = v1_lead.i_category AND - v1.i_brand = v1_lag.i_brand AND - v1.i_brand = v1_lead.i_brand AND - v1.s_store_name = v1_lag.s_store_name AND - v1.s_store_name = v1_lead.s_store_name AND - v1.s_company_name = v1_lag.s_company_name AND - v1.s_company_name = v1_lead.s_company_name AND - v1.rn = v1_lag.rn + 1 AND - v1.rn = v1_lead.rn - 1) -SELECT * -FROM v2 -WHERE d_year = 1999 AND - avg_monthly_sales > 0 AND - CASE WHEN avg_monthly_sales > 0 - THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales - ELSE NULL END > 0.1 -ORDER BY sum_sales - avg_monthly_sales, 3 -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q48.sql b/src/test/resources/tpcds/queries/q48.sql deleted file mode 100644 index fdb9f38e2..000000000 --- a/src/test/resources/tpcds/queries/q48.sql +++ /dev/null @@ -1,63 +0,0 @@ -SELECT sum(ss_quantity) -FROM store_sales, store, customer_demographics, customer_address, date_dim -WHERE s_store_sk = ss_store_sk - AND ss_sold_date_sk = d_date_sk AND d_year = 2001 - AND - ( - ( - cd_demo_sk = ss_cdemo_sk - AND - cd_marital_status = 'M' - AND - cd_education_status = '4 yr Degree' - AND - ss_sales_price BETWEEN 100.00 AND 150.00 - ) - OR - ( - cd_demo_sk = ss_cdemo_sk - AND - cd_marital_status = 'D' - AND - cd_education_status = '2 yr Degree' - AND - ss_sales_price BETWEEN 50.00 AND 100.00 - ) - OR - ( - cd_demo_sk = ss_cdemo_sk - AND - cd_marital_status = 'S' - AND - cd_education_status = 'College' - AND - ss_sales_price BETWEEN 150.00 AND 200.00 - ) - ) - AND - ( - ( - ss_addr_sk = ca_address_sk - AND - ca_country = 'United States' - AND - ca_state IN ('CO', 'OH', 'TX') - AND ss_net_profit BETWEEN 0 AND 2000 - ) - OR - (ss_addr_sk = ca_address_sk - AND - ca_country = 'United States' - AND - ca_state IN ('OR', 'MN', 'KY') - AND ss_net_profit BETWEEN 150 AND 3000 - ) - OR - (ss_addr_sk = ca_address_sk - AND - ca_country = 'United States' - AND - ca_state IN ('VA', 'CA', 'MS') - AND ss_net_profit BETWEEN 50 AND 25000 - ) - ) diff --git a/src/test/resources/tpcds/queries/q49.sql b/src/test/resources/tpcds/queries/q49.sql deleted file mode 100644 index 9568d8b92..000000000 --- a/src/test/resources/tpcds/queries/q49.sql +++ /dev/null @@ -1,126 +0,0 @@ -SELECT - 'web' AS channel, - web.item, - web.return_ratio, - web.return_rank, - web.currency_rank -FROM ( - SELECT - item, - return_ratio, - currency_ratio, - rank() - OVER ( - ORDER BY return_ratio) AS return_rank, - rank() - OVER ( - ORDER BY currency_ratio) AS currency_rank - FROM - (SELECT - ws.ws_item_sk AS item, - (cast(sum(coalesce(wr.wr_return_quantity, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(ws.ws_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, - (cast(sum(coalesce(wr.wr_return_amt, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(ws.ws_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio - FROM - web_sales ws LEFT OUTER JOIN web_returns wr - ON (ws.ws_order_number = wr.wr_order_number AND - ws.ws_item_sk = wr.wr_item_sk) - , date_dim - WHERE - wr.wr_return_amt > 10000 - AND ws.ws_net_profit > 1 - AND ws.ws_net_paid > 0 - AND ws.ws_quantity > 0 - AND ws_sold_date_sk = d_date_sk - AND d_year = 2001 - AND d_moy = 12 - GROUP BY ws.ws_item_sk - ) in_web - ) web -WHERE (web.return_rank <= 10 OR web.currency_rank <= 10) -UNION -SELECT - 'catalog' AS channel, - catalog.item, - catalog.return_ratio, - catalog.return_rank, - catalog.currency_rank -FROM ( - SELECT - item, - return_ratio, - currency_ratio, - rank() - OVER ( - ORDER BY return_ratio) AS return_rank, - rank() - OVER ( - ORDER BY currency_ratio) AS currency_rank - FROM - (SELECT - cs.cs_item_sk AS item, - (cast(sum(coalesce(cr.cr_return_quantity, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(cs.cs_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, - (cast(sum(coalesce(cr.cr_return_amount, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(cs.cs_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio - FROM - catalog_sales cs LEFT OUTER JOIN catalog_returns cr - ON (cs.cs_order_number = cr.cr_order_number AND - cs.cs_item_sk = cr.cr_item_sk) - , date_dim - WHERE - cr.cr_return_amount > 10000 - AND cs.cs_net_profit > 1 - AND cs.cs_net_paid > 0 - AND cs.cs_quantity > 0 - AND cs_sold_date_sk = d_date_sk - AND d_year = 2001 - AND d_moy = 12 - GROUP BY cs.cs_item_sk - ) in_cat - ) catalog -WHERE (catalog.return_rank <= 10 OR catalog.currency_rank <= 10) -UNION -SELECT - 'store' AS channel, - store.item, - store.return_ratio, - store.return_rank, - store.currency_rank -FROM ( - SELECT - item, - return_ratio, - currency_ratio, - rank() - OVER ( - ORDER BY return_ratio) AS return_rank, - rank() - OVER ( - ORDER BY currency_ratio) AS currency_rank - FROM - (SELECT - sts.ss_item_sk AS item, - (cast(sum(coalesce(sr.sr_return_quantity, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(sts.ss_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, - (cast(sum(coalesce(sr.sr_return_amt, 0)) AS DECIMAL(15, 4)) / - cast(sum(coalesce(sts.ss_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio - FROM - store_sales sts LEFT OUTER JOIN store_returns sr - ON (sts.ss_ticket_number = sr.sr_ticket_number AND sts.ss_item_sk = sr.sr_item_sk) - , date_dim - WHERE - sr.sr_return_amt > 10000 - AND sts.ss_net_profit > 1 - AND sts.ss_net_paid > 0 - AND sts.ss_quantity > 0 - AND ss_sold_date_sk = d_date_sk - AND d_year = 2001 - AND d_moy = 12 - GROUP BY sts.ss_item_sk - ) in_store - ) store -WHERE (store.return_rank <= 10 OR store.currency_rank <= 10) -ORDER BY 1, 4, 5 -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q5.sql b/src/test/resources/tpcds/queries/q5.sql deleted file mode 100644 index b87cf3a44..000000000 --- a/src/test/resources/tpcds/queries/q5.sql +++ /dev/null @@ -1,131 +0,0 @@ -WITH ssr AS -( SELECT - s_store_id, - sum(sales_price) AS sales, - sum(profit) AS profit, - sum(return_amt) AS RETURNS, - sum(net_loss) AS profit_loss - FROM - (SELECT - ss_store_sk AS store_sk, - ss_sold_date_sk AS date_sk, - ss_ext_sales_price AS sales_price, - ss_net_profit AS profit, - cast(0 AS DECIMAL(7, 2)) AS return_amt, - cast(0 AS DECIMAL(7, 2)) AS net_loss - FROM store_sales - UNION ALL - SELECT - sr_store_sk AS store_sk, - sr_returned_date_sk AS date_sk, - cast(0 AS DECIMAL(7, 2)) AS sales_price, - cast(0 AS DECIMAL(7, 2)) AS profit, - sr_return_amt AS return_amt, - sr_net_loss AS net_loss - FROM store_returns) - salesreturns, date_dim, store - WHERE date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-23' AS DATE) - AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) - AND store_sk = s_store_sk - GROUP BY s_store_id), - csr AS - ( SELECT - cp_catalog_page_id, - sum(sales_price) AS sales, - sum(profit) AS profit, - sum(return_amt) AS RETURNS, - sum(net_loss) AS profit_loss - FROM - (SELECT - cs_catalog_page_sk AS page_sk, - cs_sold_date_sk AS date_sk, - cs_ext_sales_price AS sales_price, - cs_net_profit AS profit, - cast(0 AS DECIMAL(7, 2)) AS return_amt, - cast(0 AS DECIMAL(7, 2)) AS net_loss - FROM catalog_sales - UNION ALL - SELECT - cr_catalog_page_sk AS page_sk, - cr_returned_date_sk AS date_sk, - cast(0 AS DECIMAL(7, 2)) AS sales_price, - cast(0 AS DECIMAL(7, 2)) AS profit, - cr_return_amount AS return_amt, - cr_net_loss AS net_loss - FROM catalog_returns - ) salesreturns, date_dim, catalog_page - WHERE date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-23' AS DATE) - AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) - AND page_sk = cp_catalog_page_sk - GROUP BY cp_catalog_page_id) - , - wsr AS - ( SELECT - web_site_id, - sum(sales_price) AS sales, - sum(profit) AS profit, - sum(return_amt) AS RETURNS, - sum(net_loss) AS profit_loss - FROM - (SELECT - ws_web_site_sk AS wsr_web_site_sk, - ws_sold_date_sk AS date_sk, - ws_ext_sales_price AS sales_price, - ws_net_profit AS profit, - cast(0 AS DECIMAL(7, 2)) AS return_amt, - cast(0 AS DECIMAL(7, 2)) AS net_loss - FROM web_sales - UNION ALL - SELECT - ws_web_site_sk AS wsr_web_site_sk, - wr_returned_date_sk AS date_sk, - cast(0 AS DECIMAL(7, 2)) AS sales_price, - cast(0 AS DECIMAL(7, 2)) AS profit, - wr_return_amt AS return_amt, - wr_net_loss AS net_loss - FROM web_returns - LEFT OUTER JOIN web_sales ON - (wr_item_sk = ws_item_sk - AND wr_order_number = ws_order_number) - ) salesreturns, date_dim, web_site - WHERE date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-23' AS DATE) - AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) - AND wsr_web_site_sk = web_site_sk - GROUP BY web_site_id) -SELECT - channel, - id, - sum(sales) AS sales, - sum(returns) AS returns, - sum(profit) AS profit -FROM - (SELECT - 'store channel' AS channel, - concat('store', s_store_id) AS id, - sales, - returns, - (profit - profit_loss) AS profit - FROM ssr - UNION ALL - SELECT - 'catalog channel' AS channel, - concat('catalog_page', cp_catalog_page_id) AS id, - sales, - returns, - (profit - profit_loss) AS profit - FROM csr - UNION ALL - SELECT - 'web channel' AS channel, - concat('web_site', web_site_id) AS id, - sales, - returns, - (profit - profit_loss) AS profit - FROM wsr - ) x -GROUP BY ROLLUP (channel, id) -ORDER BY channel, id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q50.sql b/src/test/resources/tpcds/queries/q50.sql deleted file mode 100644 index f1d4b1544..000000000 --- a/src/test/resources/tpcds/queries/q50.sql +++ /dev/null @@ -1,47 +0,0 @@ -SELECT - s_store_name, - s_company_id, - s_street_number, - s_street_name, - s_street_type, - s_suite_number, - s_city, - s_county, - s_state, - s_zip, - sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk <= 30) - THEN 1 - ELSE 0 END) AS `30 days `, - sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 30) AND - (sr_returned_date_sk - ss_sold_date_sk <= 60) - THEN 1 - ELSE 0 END) AS `31 - 60 days `, - sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 60) AND - (sr_returned_date_sk - ss_sold_date_sk <= 90) - THEN 1 - ELSE 0 END) AS `61 - 90 days `, - sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 90) AND - (sr_returned_date_sk - ss_sold_date_sk <= 120) - THEN 1 - ELSE 0 END) AS `91 - 120 days `, - sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 120) - THEN 1 - ELSE 0 END) AS `>120 days ` -FROM - store_sales, store_returns, store, date_dim d1, date_dim d2 -WHERE - d2.d_year = 2001 - AND d2.d_moy = 8 - AND ss_ticket_number = sr_ticket_number - AND ss_item_sk = sr_item_sk - AND ss_sold_date_sk = d1.d_date_sk - AND sr_returned_date_sk = d2.d_date_sk - AND ss_customer_sk = sr_customer_sk - AND ss_store_sk = s_store_sk -GROUP BY - s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, - s_suite_number, s_city, s_county, s_state, s_zip -ORDER BY - s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, - s_suite_number, s_city, s_county, s_state, s_zip -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q51.sql b/src/test/resources/tpcds/queries/q51.sql deleted file mode 100644 index 62b003eb6..000000000 --- a/src/test/resources/tpcds/queries/q51.sql +++ /dev/null @@ -1,55 +0,0 @@ -WITH web_v1 AS ( - SELECT - ws_item_sk item_sk, - d_date, - sum(sum(ws_sales_price)) - OVER (PARTITION BY ws_item_sk - ORDER BY d_date - ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales - FROM web_sales, date_dim - WHERE ws_sold_date_sk = d_date_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11 - AND ws_item_sk IS NOT NULL - GROUP BY ws_item_sk, d_date), - store_v1 AS ( - SELECT - ss_item_sk item_sk, - d_date, - sum(sum(ss_sales_price)) - OVER (PARTITION BY ss_item_sk - ORDER BY d_date - ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales - FROM store_sales, date_dim - WHERE ss_sold_date_sk = d_date_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11 - AND ss_item_sk IS NOT NULL - GROUP BY ss_item_sk, d_date) -SELECT * -FROM (SELECT - item_sk, - d_date, - web_sales, - store_sales, - max(web_sales) - OVER (PARTITION BY item_sk - ORDER BY d_date - ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) web_cumulative, - max(store_sales) - OVER (PARTITION BY item_sk - ORDER BY d_date - ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) store_cumulative -FROM (SELECT - CASE WHEN web.item_sk IS NOT NULL - THEN web.item_sk - ELSE store.item_sk END item_sk, - CASE WHEN web.d_date IS NOT NULL - THEN web.d_date - ELSE store.d_date END d_date, - web.cume_sales web_sales, - store.cume_sales store_sales -FROM web_v1 web FULL OUTER JOIN store_v1 store ON (web.item_sk = store.item_sk - AND web.d_date = store.d_date) - ) x) y -WHERE web_cumulative > store_cumulative -ORDER BY item_sk, d_date -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q52.sql b/src/test/resources/tpcds/queries/q52.sql deleted file mode 100644 index 467d1ae05..000000000 --- a/src/test/resources/tpcds/queries/q52.sql +++ /dev/null @@ -1,14 +0,0 @@ -SELECT - dt.d_year, - item.i_brand_id brand_id, - item.i_brand brand, - sum(ss_ext_sales_price) ext_price -FROM date_dim dt, store_sales, item -WHERE dt.d_date_sk = store_sales.ss_sold_date_sk - AND store_sales.ss_item_sk = item.i_item_sk - AND item.i_manager_id = 1 - AND dt.d_moy = 11 - AND dt.d_year = 2000 -GROUP BY dt.d_year, item.i_brand, item.i_brand_id -ORDER BY dt.d_year, ext_price DESC, brand_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q53.sql b/src/test/resources/tpcds/queries/q53.sql deleted file mode 100644 index b42c68dcf..000000000 --- a/src/test/resources/tpcds/queries/q53.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT * -FROM - (SELECT - i_manufact_id, - sum(ss_sales_price) sum_sales, - avg(sum(ss_sales_price)) - OVER (PARTITION BY i_manufact_id) avg_quarterly_sales - FROM item, store_sales, date_dim, store - WHERE ss_item_sk = i_item_sk AND - ss_sold_date_sk = d_date_sk AND - ss_store_sk = s_store_sk AND - d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, - 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) AND - ((i_category IN ('Books', 'Children', 'Electronics') AND - i_class IN ('personal', 'portable', 'reference', 'self-help') AND - i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', - 'exportiunivamalg #9', 'scholaramalgamalg #9')) - OR - (i_category IN ('Women', 'Music', 'Men') AND - i_class IN ('accessories', 'classical', 'fragrances', 'pants') AND - i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', - 'importoamalg #1'))) - GROUP BY i_manufact_id, d_qoy) tmp1 -WHERE CASE WHEN avg_quarterly_sales > 0 - THEN abs(sum_sales - avg_quarterly_sales) / avg_quarterly_sales - ELSE NULL END > 0.1 -ORDER BY avg_quarterly_sales, - sum_sales, - i_manufact_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q54.sql b/src/test/resources/tpcds/queries/q54.sql deleted file mode 100644 index 897237fb6..000000000 --- a/src/test/resources/tpcds/queries/q54.sql +++ /dev/null @@ -1,61 +0,0 @@ -WITH my_customers AS ( - SELECT DISTINCT - c_customer_sk, - c_current_addr_sk - FROM - (SELECT - cs_sold_date_sk sold_date_sk, - cs_bill_customer_sk customer_sk, - cs_item_sk item_sk - FROM catalog_sales - UNION ALL - SELECT - ws_sold_date_sk sold_date_sk, - ws_bill_customer_sk customer_sk, - ws_item_sk item_sk - FROM web_sales - ) cs_or_ws_sales, - item, - date_dim, - customer - WHERE sold_date_sk = d_date_sk - AND item_sk = i_item_sk - AND i_category = 'Women' - AND i_class = 'maternity' - AND c_customer_sk = cs_or_ws_sales.customer_sk - AND d_moy = 12 - AND d_year = 1998 -) - , my_revenue AS ( - SELECT - c_customer_sk, - sum(ss_ext_sales_price) AS revenue - FROM my_customers, - store_sales, - customer_address, - store, - date_dim - WHERE c_current_addr_sk = ca_address_sk - AND ca_county = s_county - AND ca_state = s_state - AND ss_sold_date_sk = d_date_sk - AND c_customer_sk = ss_customer_sk - AND d_month_seq BETWEEN (SELECT DISTINCT d_month_seq + 1 - FROM date_dim - WHERE d_year = 1998 AND d_moy = 12) - AND (SELECT DISTINCT d_month_seq + 3 - FROM date_dim - WHERE d_year = 1998 AND d_moy = 12) - GROUP BY c_customer_sk -) - , segments AS -(SELECT cast((revenue / 50) AS INT) AS segment - FROM my_revenue) -SELECT - segment, - count(*) AS num_customers, - segment * 50 AS segment_base -FROM segments -GROUP BY segment -ORDER BY segment, num_customers -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q55.sql b/src/test/resources/tpcds/queries/q55.sql deleted file mode 100644 index bc5d888c9..000000000 --- a/src/test/resources/tpcds/queries/q55.sql +++ /dev/null @@ -1,13 +0,0 @@ -SELECT - i_brand_id brand_id, - i_brand brand, - sum(ss_ext_sales_price) ext_price -FROM date_dim, store_sales, item -WHERE d_date_sk = ss_sold_date_sk - AND ss_item_sk = i_item_sk - AND i_manager_id = 28 - AND d_moy = 11 - AND d_year = 1999 -GROUP BY i_brand, i_brand_id -ORDER BY ext_price DESC, brand_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q56.sql b/src/test/resources/tpcds/queries/q56.sql deleted file mode 100644 index 2fa1738dc..000000000 --- a/src/test/resources/tpcds/queries/q56.sql +++ /dev/null @@ -1,65 +0,0 @@ -WITH ss AS ( - SELECT - i_item_id, - sum(ss_ext_sales_price) total_sales - FROM - store_sales, date_dim, customer_address, item - WHERE - i_item_id IN (SELECT i_item_id - FROM item - WHERE i_color IN ('slate', 'blanched', 'burnished')) - AND ss_item_sk = i_item_sk - AND ss_sold_date_sk = d_date_sk - AND d_year = 2001 - AND d_moy = 2 - AND ss_addr_sk = ca_address_sk - AND ca_gmt_offset = -5 - GROUP BY i_item_id), - cs AS ( - SELECT - i_item_id, - sum(cs_ext_sales_price) total_sales - FROM - catalog_sales, date_dim, customer_address, item - WHERE - i_item_id IN (SELECT i_item_id - FROM item - WHERE i_color IN ('slate', 'blanched', 'burnished')) - AND cs_item_sk = i_item_sk - AND cs_sold_date_sk = d_date_sk - AND d_year = 2001 - AND d_moy = 2 - AND cs_bill_addr_sk = ca_address_sk - AND ca_gmt_offset = -5 - GROUP BY i_item_id), - ws AS ( - SELECT - i_item_id, - sum(ws_ext_sales_price) total_sales - FROM - web_sales, date_dim, customer_address, item - WHERE - i_item_id IN (SELECT i_item_id - FROM item - WHERE i_color IN ('slate', 'blanched', 'burnished')) - AND ws_item_sk = i_item_sk - AND ws_sold_date_sk = d_date_sk - AND d_year = 2001 - AND d_moy = 2 - AND ws_bill_addr_sk = ca_address_sk - AND ca_gmt_offset = -5 - GROUP BY i_item_id) -SELECT - i_item_id, - sum(total_sales) total_sales -FROM (SELECT * - FROM ss - UNION ALL - SELECT * - FROM cs - UNION ALL - SELECT * - FROM ws) tmp1 -GROUP BY i_item_id -ORDER BY total_sales -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q57.sql b/src/test/resources/tpcds/queries/q57.sql deleted file mode 100644 index cf70d4b90..000000000 --- a/src/test/resources/tpcds/queries/q57.sql +++ /dev/null @@ -1,56 +0,0 @@ -WITH v1 AS ( - SELECT - i_category, - i_brand, - cc_name, - d_year, - d_moy, - sum(cs_sales_price) sum_sales, - avg(sum(cs_sales_price)) - OVER - (PARTITION BY i_category, i_brand, cc_name, d_year) - avg_monthly_sales, - rank() - OVER - (PARTITION BY i_category, i_brand, cc_name - ORDER BY d_year, d_moy) rn - FROM item, catalog_sales, date_dim, call_center - WHERE cs_item_sk = i_item_sk AND - cs_sold_date_sk = d_date_sk AND - cc_call_center_sk = cs_call_center_sk AND - ( - d_year = 1999 OR - (d_year = 1999 - 1 AND d_moy = 12) OR - (d_year = 1999 + 1 AND d_moy = 1) - ) - GROUP BY i_category, i_brand, - cc_name, d_year, d_moy), - v2 AS ( - SELECT - v1.i_category, - v1.i_brand, - v1.cc_name, - v1.d_year, - v1.d_moy, - v1.avg_monthly_sales, - v1.sum_sales, - v1_lag.sum_sales psum, - v1_lead.sum_sales nsum - FROM v1, v1 v1_lag, v1 v1_lead - WHERE v1.i_category = v1_lag.i_category AND - v1.i_category = v1_lead.i_category AND - v1.i_brand = v1_lag.i_brand AND - v1.i_brand = v1_lead.i_brand AND - v1.cc_name = v1_lag.cc_name AND - v1.cc_name = v1_lead.cc_name AND - v1.rn = v1_lag.rn + 1 AND - v1.rn = v1_lead.rn - 1) -SELECT * -FROM v2 -WHERE d_year = 1999 AND - avg_monthly_sales > 0 AND - CASE WHEN avg_monthly_sales > 0 - THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales - ELSE NULL END > 0.1 -ORDER BY sum_sales - avg_monthly_sales, 3 -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q58.sql b/src/test/resources/tpcds/queries/q58.sql deleted file mode 100644 index 5f63f33dc..000000000 --- a/src/test/resources/tpcds/queries/q58.sql +++ /dev/null @@ -1,59 +0,0 @@ -WITH ss_items AS -(SELECT - i_item_id item_id, - sum(ss_ext_sales_price) ss_item_rev - FROM store_sales, item, date_dim - WHERE ss_item_sk = i_item_sk - AND d_date IN (SELECT d_date - FROM date_dim - WHERE d_week_seq = (SELECT d_week_seq - FROM date_dim - WHERE d_date = '2000-01-03')) - AND ss_sold_date_sk = d_date_sk - GROUP BY i_item_id), - cs_items AS - (SELECT - i_item_id item_id, - sum(cs_ext_sales_price) cs_item_rev - FROM catalog_sales, item, date_dim - WHERE cs_item_sk = i_item_sk - AND d_date IN (SELECT d_date - FROM date_dim - WHERE d_week_seq = (SELECT d_week_seq - FROM date_dim - WHERE d_date = '2000-01-03')) - AND cs_sold_date_sk = d_date_sk - GROUP BY i_item_id), - ws_items AS - (SELECT - i_item_id item_id, - sum(ws_ext_sales_price) ws_item_rev - FROM web_sales, item, date_dim - WHERE ws_item_sk = i_item_sk - AND d_date IN (SELECT d_date - FROM date_dim - WHERE d_week_seq = (SELECT d_week_seq - FROM date_dim - WHERE d_date = '2000-01-03')) - AND ws_sold_date_sk = d_date_sk - GROUP BY i_item_id) -SELECT - ss_items.item_id, - ss_item_rev, - ss_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ss_dev, - cs_item_rev, - cs_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 cs_dev, - ws_item_rev, - ws_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ws_dev, - (ss_item_rev + cs_item_rev + ws_item_rev) / 3 average -FROM ss_items, cs_items, ws_items -WHERE ss_items.item_id = cs_items.item_id - AND ss_items.item_id = ws_items.item_id - AND ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev - AND ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev - AND cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev - AND cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev - AND ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev - AND ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev -ORDER BY item_id, ss_item_rev -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q59.sql b/src/test/resources/tpcds/queries/q59.sql deleted file mode 100644 index 3cef20276..000000000 --- a/src/test/resources/tpcds/queries/q59.sql +++ /dev/null @@ -1,75 +0,0 @@ -WITH wss AS -(SELECT - d_week_seq, - ss_store_sk, - sum(CASE WHEN (d_day_name = 'Sunday') - THEN ss_sales_price - ELSE NULL END) sun_sales, - sum(CASE WHEN (d_day_name = 'Monday') - THEN ss_sales_price - ELSE NULL END) mon_sales, - sum(CASE WHEN (d_day_name = 'Tuesday') - THEN ss_sales_price - ELSE NULL END) tue_sales, - sum(CASE WHEN (d_day_name = 'Wednesday') - THEN ss_sales_price - ELSE NULL END) wed_sales, - sum(CASE WHEN (d_day_name = 'Thursday') - THEN ss_sales_price - ELSE NULL END) thu_sales, - sum(CASE WHEN (d_day_name = 'Friday') - THEN ss_sales_price - ELSE NULL END) fri_sales, - sum(CASE WHEN (d_day_name = 'Saturday') - THEN ss_sales_price - ELSE NULL END) sat_sales - FROM store_sales, date_dim - WHERE d_date_sk = ss_sold_date_sk - GROUP BY d_week_seq, ss_store_sk -) -SELECT - s_store_name1, - s_store_id1, - d_week_seq1, - sun_sales1 / sun_sales2, - mon_sales1 / mon_sales2, - tue_sales1 / tue_sales2, - wed_sales1 / wed_sales2, - thu_sales1 / thu_sales2, - fri_sales1 / fri_sales2, - sat_sales1 / sat_sales2 -FROM - (SELECT - s_store_name s_store_name1, - wss.d_week_seq d_week_seq1, - s_store_id s_store_id1, - sun_sales sun_sales1, - mon_sales mon_sales1, - tue_sales tue_sales1, - wed_sales wed_sales1, - thu_sales thu_sales1, - fri_sales fri_sales1, - sat_sales sat_sales1 - FROM wss, store, date_dim d - WHERE d.d_week_seq = wss.d_week_seq AND - ss_store_sk = s_store_sk AND - d_month_seq BETWEEN 1212 AND 1212 + 11) y, - (SELECT - s_store_name s_store_name2, - wss.d_week_seq d_week_seq2, - s_store_id s_store_id2, - sun_sales sun_sales2, - mon_sales mon_sales2, - tue_sales tue_sales2, - wed_sales wed_sales2, - thu_sales thu_sales2, - fri_sales fri_sales2, - sat_sales sat_sales2 - FROM wss, store, date_dim d - WHERE d.d_week_seq = wss.d_week_seq AND - ss_store_sk = s_store_sk AND - d_month_seq BETWEEN 1212 + 12 AND 1212 + 23) x -WHERE s_store_id1 = s_store_id2 - AND d_week_seq1 = d_week_seq2 - 52 -ORDER BY s_store_name1, s_store_id1, d_week_seq1 -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q6.sql b/src/test/resources/tpcds/queries/q6.sql deleted file mode 100644 index f0f5cf05a..000000000 --- a/src/test/resources/tpcds/queries/q6.sql +++ /dev/null @@ -1,21 +0,0 @@ -SELECT - a.ca_state state, - count(*) cnt -FROM - customer_address a, customer c, store_sales s, date_dim d, item i -WHERE a.ca_address_sk = c.c_current_addr_sk - AND c.c_customer_sk = s.ss_customer_sk - AND s.ss_sold_date_sk = d.d_date_sk - AND s.ss_item_sk = i.i_item_sk - AND d.d_month_seq = - (SELECT DISTINCT (d_month_seq) - FROM date_dim - WHERE d_year = 2000 AND d_moy = 1) - AND i.i_current_price > 1.2 * - (SELECT avg(j.i_current_price) - FROM item j - WHERE j.i_category = i.i_category) -GROUP BY a.ca_state -HAVING count(*) >= 10 -ORDER BY cnt -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q60.sql b/src/test/resources/tpcds/queries/q60.sql deleted file mode 100644 index 41b963f44..000000000 --- a/src/test/resources/tpcds/queries/q60.sql +++ /dev/null @@ -1,62 +0,0 @@ -WITH ss AS ( - SELECT - i_item_id, - sum(ss_ext_sales_price) total_sales - FROM store_sales, date_dim, customer_address, item - WHERE - i_item_id IN (SELECT i_item_id - FROM item - WHERE i_category IN ('Music')) - AND ss_item_sk = i_item_sk - AND ss_sold_date_sk = d_date_sk - AND d_year = 1998 - AND d_moy = 9 - AND ss_addr_sk = ca_address_sk - AND ca_gmt_offset = -5 - GROUP BY i_item_id), - cs AS ( - SELECT - i_item_id, - sum(cs_ext_sales_price) total_sales - FROM catalog_sales, date_dim, customer_address, item - WHERE - i_item_id IN (SELECT i_item_id - FROM item - WHERE i_category IN ('Music')) - AND cs_item_sk = i_item_sk - AND cs_sold_date_sk = d_date_sk - AND d_year = 1998 - AND d_moy = 9 - AND cs_bill_addr_sk = ca_address_sk - AND ca_gmt_offset = -5 - GROUP BY i_item_id), - ws AS ( - SELECT - i_item_id, - sum(ws_ext_sales_price) total_sales - FROM web_sales, date_dim, customer_address, item - WHERE - i_item_id IN (SELECT i_item_id - FROM item - WHERE i_category IN ('Music')) - AND ws_item_sk = i_item_sk - AND ws_sold_date_sk = d_date_sk - AND d_year = 1998 - AND d_moy = 9 - AND ws_bill_addr_sk = ca_address_sk - AND ca_gmt_offset = -5 - GROUP BY i_item_id) -SELECT - i_item_id, - sum(total_sales) total_sales -FROM (SELECT * - FROM ss - UNION ALL - SELECT * - FROM cs - UNION ALL - SELECT * - FROM ws) tmp1 -GROUP BY i_item_id -ORDER BY i_item_id, total_sales -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q61.sql b/src/test/resources/tpcds/queries/q61.sql deleted file mode 100644 index b0a872b4b..000000000 --- a/src/test/resources/tpcds/queries/q61.sql +++ /dev/null @@ -1,33 +0,0 @@ -SELECT - promotions, - total, - cast(promotions AS DECIMAL(15, 4)) / cast(total AS DECIMAL(15, 4)) * 100 -FROM - (SELECT sum(ss_ext_sales_price) promotions - FROM store_sales, store, promotion, date_dim, customer, customer_address, item - WHERE ss_sold_date_sk = d_date_sk - AND ss_store_sk = s_store_sk - AND ss_promo_sk = p_promo_sk - AND ss_customer_sk = c_customer_sk - AND ca_address_sk = c_current_addr_sk - AND ss_item_sk = i_item_sk - AND ca_gmt_offset = -5 - AND i_category = 'Jewelry' - AND (p_channel_dmail = 'Y' OR p_channel_email = 'Y' OR p_channel_tv = 'Y') - AND s_gmt_offset = -5 - AND d_year = 1998 - AND d_moy = 11) promotional_sales, - (SELECT sum(ss_ext_sales_price) total - FROM store_sales, store, date_dim, customer, customer_address, item - WHERE ss_sold_date_sk = d_date_sk - AND ss_store_sk = s_store_sk - AND ss_customer_sk = c_customer_sk - AND ca_address_sk = c_current_addr_sk - AND ss_item_sk = i_item_sk - AND ca_gmt_offset = -5 - AND i_category = 'Jewelry' - AND s_gmt_offset = -5 - AND d_year = 1998 - AND d_moy = 11) all_sales -ORDER BY promotions, total -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q62.sql b/src/test/resources/tpcds/queries/q62.sql deleted file mode 100644 index 8a414f154..000000000 --- a/src/test/resources/tpcds/queries/q62.sql +++ /dev/null @@ -1,35 +0,0 @@ -SELECT - substr(w_warehouse_name, 1, 20), - sm_type, - web_name, - sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk <= 30) - THEN 1 - ELSE 0 END) AS `30 days `, - sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 30) AND - (ws_ship_date_sk - ws_sold_date_sk <= 60) - THEN 1 - ELSE 0 END) AS `31 - 60 days `, - sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 60) AND - (ws_ship_date_sk - ws_sold_date_sk <= 90) - THEN 1 - ELSE 0 END) AS `61 - 90 days `, - sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 90) AND - (ws_ship_date_sk - ws_sold_date_sk <= 120) - THEN 1 - ELSE 0 END) AS `91 - 120 days `, - sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 120) - THEN 1 - ELSE 0 END) AS `>120 days ` -FROM - web_sales, warehouse, ship_mode, web_site, date_dim -WHERE - d_month_seq BETWEEN 1200 AND 1200 + 11 - AND ws_ship_date_sk = d_date_sk - AND ws_warehouse_sk = w_warehouse_sk - AND ws_ship_mode_sk = sm_ship_mode_sk - AND ws_web_site_sk = web_site_sk -GROUP BY - substr(w_warehouse_name, 1, 20), sm_type, web_name -ORDER BY - substr(w_warehouse_name, 1, 20), sm_type, web_name -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q63.sql b/src/test/resources/tpcds/queries/q63.sql deleted file mode 100644 index ef6867e0a..000000000 --- a/src/test/resources/tpcds/queries/q63.sql +++ /dev/null @@ -1,31 +0,0 @@ -SELECT * -FROM (SELECT - i_manager_id, - sum(ss_sales_price) sum_sales, - avg(sum(ss_sales_price)) - OVER (PARTITION BY i_manager_id) avg_monthly_sales -FROM item - , store_sales - , date_dim - , store -WHERE ss_item_sk = i_item_sk - AND ss_sold_date_sk = d_date_sk - AND ss_store_sk = s_store_sk - AND d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, - 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) - AND ((i_category IN ('Books', 'Children', 'Electronics') - AND i_class IN ('personal', 'portable', 'refernece', 'self-help') - AND i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', - 'exportiunivamalg #9', 'scholaramalgamalg #9')) - OR (i_category IN ('Women', 'Music', 'Men') - AND i_class IN ('accessories', 'classical', 'fragrances', 'pants') - AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', - 'importoamalg #1'))) -GROUP BY i_manager_id, d_moy) tmp1 -WHERE CASE WHEN avg_monthly_sales > 0 - THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales - ELSE NULL END > 0.1 -ORDER BY i_manager_id - , avg_monthly_sales - , sum_sales -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q64.sql b/src/test/resources/tpcds/queries/q64.sql deleted file mode 100644 index 8ec1d31b6..000000000 --- a/src/test/resources/tpcds/queries/q64.sql +++ /dev/null @@ -1,92 +0,0 @@ -WITH cs_ui AS -(SELECT - cs_item_sk, - sum(cs_ext_list_price) AS sale, - sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit) AS refund - FROM catalog_sales - , catalog_returns - WHERE cs_item_sk = cr_item_sk - AND cs_order_number = cr_order_number - GROUP BY cs_item_sk - HAVING sum(cs_ext_list_price) > 2 * sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit)), - cross_sales AS - (SELECT - i_product_name product_name, - i_item_sk item_sk, - s_store_name store_name, - s_zip store_zip, - ad1.ca_street_number b_street_number, - ad1.ca_street_name b_streen_name, - ad1.ca_city b_city, - ad1.ca_zip b_zip, - ad2.ca_street_number c_street_number, - ad2.ca_street_name c_street_name, - ad2.ca_city c_city, - ad2.ca_zip c_zip, - d1.d_year AS syear, - d2.d_year AS fsyear, - d3.d_year s2year, - count(*) cnt, - sum(ss_wholesale_cost) s1, - sum(ss_list_price) s2, - sum(ss_coupon_amt) s3 - FROM store_sales, store_returns, cs_ui, date_dim d1, date_dim d2, date_dim d3, - store, customer, customer_demographics cd1, customer_demographics cd2, - promotion, household_demographics hd1, household_demographics hd2, - customer_address ad1, customer_address ad2, income_band ib1, income_band ib2, item - WHERE ss_store_sk = s_store_sk AND - ss_sold_date_sk = d1.d_date_sk AND - ss_customer_sk = c_customer_sk AND - ss_cdemo_sk = cd1.cd_demo_sk AND - ss_hdemo_sk = hd1.hd_demo_sk AND - ss_addr_sk = ad1.ca_address_sk AND - ss_item_sk = i_item_sk AND - ss_item_sk = sr_item_sk AND - ss_ticket_number = sr_ticket_number AND - ss_item_sk = cs_ui.cs_item_sk AND - c_current_cdemo_sk = cd2.cd_demo_sk AND - c_current_hdemo_sk = hd2.hd_demo_sk AND - c_current_addr_sk = ad2.ca_address_sk AND - c_first_sales_date_sk = d2.d_date_sk AND - c_first_shipto_date_sk = d3.d_date_sk AND - ss_promo_sk = p_promo_sk AND - hd1.hd_income_band_sk = ib1.ib_income_band_sk AND - hd2.hd_income_band_sk = ib2.ib_income_band_sk AND - cd1.cd_marital_status <> cd2.cd_marital_status AND - i_color IN ('purple', 'burlywood', 'indian', 'spring', 'floral', 'medium') AND - i_current_price BETWEEN 64 AND 64 + 10 AND - i_current_price BETWEEN 64 + 1 AND 64 + 15 - GROUP BY i_product_name, i_item_sk, s_store_name, s_zip, ad1.ca_street_number, - ad1.ca_street_name, ad1.ca_city, ad1.ca_zip, ad2.ca_street_number, - ad2.ca_street_name, ad2.ca_city, ad2.ca_zip, d1.d_year, d2.d_year, d3.d_year - ) -SELECT - cs1.product_name, - cs1.store_name, - cs1.store_zip, - cs1.b_street_number, - cs1.b_streen_name, - cs1.b_city, - cs1.b_zip, - cs1.c_street_number, - cs1.c_street_name, - cs1.c_city, - cs1.c_zip, - cs1.syear, - cs1.cnt, - cs1.s1, - cs1.s2, - cs1.s3, - cs2.s1, - cs2.s2, - cs2.s3, - cs2.syear, - cs2.cnt -FROM cross_sales cs1, cross_sales cs2 -WHERE cs1.item_sk = cs2.item_sk AND - cs1.syear = 1999 AND - cs2.syear = 1999 + 1 AND - cs2.cnt <= cs1.cnt AND - cs1.store_name = cs2.store_name AND - cs1.store_zip = cs2.store_zip -ORDER BY cs1.product_name, cs1.store_name, cs2.cnt diff --git a/src/test/resources/tpcds/queries/q65.sql b/src/test/resources/tpcds/queries/q65.sql deleted file mode 100644 index aad04be1b..000000000 --- a/src/test/resources/tpcds/queries/q65.sql +++ /dev/null @@ -1,33 +0,0 @@ -SELECT - s_store_name, - i_item_desc, - sc.revenue, - i_current_price, - i_wholesale_cost, - i_brand -FROM store, item, - (SELECT - ss_store_sk, - avg(revenue) AS ave - FROM - (SELECT - ss_store_sk, - ss_item_sk, - sum(ss_sales_price) AS revenue - FROM store_sales, date_dim - WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 - GROUP BY ss_store_sk, ss_item_sk) sa - GROUP BY ss_store_sk) sb, - (SELECT - ss_store_sk, - ss_item_sk, - sum(ss_sales_price) AS revenue - FROM store_sales, date_dim - WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 - GROUP BY ss_store_sk, ss_item_sk) sc -WHERE sb.ss_store_sk = sc.ss_store_sk AND - sc.revenue <= 0.1 * sb.ave AND - s_store_sk = sc.ss_store_sk AND - i_item_sk = sc.ss_item_sk -ORDER BY s_store_name, i_item_desc -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q66.sql b/src/test/resources/tpcds/queries/q66.sql deleted file mode 100644 index f826b4164..000000000 --- a/src/test/resources/tpcds/queries/q66.sql +++ /dev/null @@ -1,240 +0,0 @@ -SELECT - w_warehouse_name, - w_warehouse_sq_ft, - w_city, - w_county, - w_state, - w_country, - ship_carriers, - year, - sum(jan_sales) AS jan_sales, - sum(feb_sales) AS feb_sales, - sum(mar_sales) AS mar_sales, - sum(apr_sales) AS apr_sales, - sum(may_sales) AS may_sales, - sum(jun_sales) AS jun_sales, - sum(jul_sales) AS jul_sales, - sum(aug_sales) AS aug_sales, - sum(sep_sales) AS sep_sales, - sum(oct_sales) AS oct_sales, - sum(nov_sales) AS nov_sales, - sum(dec_sales) AS dec_sales, - sum(jan_sales / w_warehouse_sq_ft) AS jan_sales_per_sq_foot, - sum(feb_sales / w_warehouse_sq_ft) AS feb_sales_per_sq_foot, - sum(mar_sales / w_warehouse_sq_ft) AS mar_sales_per_sq_foot, - sum(apr_sales / w_warehouse_sq_ft) AS apr_sales_per_sq_foot, - sum(may_sales / w_warehouse_sq_ft) AS may_sales_per_sq_foot, - sum(jun_sales / w_warehouse_sq_ft) AS jun_sales_per_sq_foot, - sum(jul_sales / w_warehouse_sq_ft) AS jul_sales_per_sq_foot, - sum(aug_sales / w_warehouse_sq_ft) AS aug_sales_per_sq_foot, - sum(sep_sales / w_warehouse_sq_ft) AS sep_sales_per_sq_foot, - sum(oct_sales / w_warehouse_sq_ft) AS oct_sales_per_sq_foot, - sum(nov_sales / w_warehouse_sq_ft) AS nov_sales_per_sq_foot, - sum(dec_sales / w_warehouse_sq_ft) AS dec_sales_per_sq_foot, - sum(jan_net) AS jan_net, - sum(feb_net) AS feb_net, - sum(mar_net) AS mar_net, - sum(apr_net) AS apr_net, - sum(may_net) AS may_net, - sum(jun_net) AS jun_net, - sum(jul_net) AS jul_net, - sum(aug_net) AS aug_net, - sum(sep_net) AS sep_net, - sum(oct_net) AS oct_net, - sum(nov_net) AS nov_net, - sum(dec_net) AS dec_net -FROM ( - (SELECT - w_warehouse_name, - w_warehouse_sq_ft, - w_city, - w_county, - w_state, - w_country, - concat('DHL', ',', 'BARIAN') AS ship_carriers, - d_year AS year, - sum(CASE WHEN d_moy = 1 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS jan_sales, - sum(CASE WHEN d_moy = 2 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS feb_sales, - sum(CASE WHEN d_moy = 3 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS mar_sales, - sum(CASE WHEN d_moy = 4 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS apr_sales, - sum(CASE WHEN d_moy = 5 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS may_sales, - sum(CASE WHEN d_moy = 6 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS jun_sales, - sum(CASE WHEN d_moy = 7 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS jul_sales, - sum(CASE WHEN d_moy = 8 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS aug_sales, - sum(CASE WHEN d_moy = 9 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS sep_sales, - sum(CASE WHEN d_moy = 10 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS oct_sales, - sum(CASE WHEN d_moy = 11 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS nov_sales, - sum(CASE WHEN d_moy = 12 - THEN ws_ext_sales_price * ws_quantity - ELSE 0 END) AS dec_sales, - sum(CASE WHEN d_moy = 1 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS jan_net, - sum(CASE WHEN d_moy = 2 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS feb_net, - sum(CASE WHEN d_moy = 3 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS mar_net, - sum(CASE WHEN d_moy = 4 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS apr_net, - sum(CASE WHEN d_moy = 5 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS may_net, - sum(CASE WHEN d_moy = 6 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS jun_net, - sum(CASE WHEN d_moy = 7 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS jul_net, - sum(CASE WHEN d_moy = 8 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS aug_net, - sum(CASE WHEN d_moy = 9 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS sep_net, - sum(CASE WHEN d_moy = 10 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS oct_net, - sum(CASE WHEN d_moy = 11 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS nov_net, - sum(CASE WHEN d_moy = 12 - THEN ws_net_paid * ws_quantity - ELSE 0 END) AS dec_net - FROM - web_sales, warehouse, date_dim, time_dim, ship_mode - WHERE - ws_warehouse_sk = w_warehouse_sk - AND ws_sold_date_sk = d_date_sk - AND ws_sold_time_sk = t_time_sk - AND ws_ship_mode_sk = sm_ship_mode_sk - AND d_year = 2001 - AND t_time BETWEEN 30838 AND 30838 + 28800 - AND sm_carrier IN ('DHL', 'BARIAN') - GROUP BY - w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year) - UNION ALL - (SELECT - w_warehouse_name, - w_warehouse_sq_ft, - w_city, - w_county, - w_state, - w_country, - concat('DHL', ',', 'BARIAN') AS ship_carriers, - d_year AS year, - sum(CASE WHEN d_moy = 1 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS jan_sales, - sum(CASE WHEN d_moy = 2 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS feb_sales, - sum(CASE WHEN d_moy = 3 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS mar_sales, - sum(CASE WHEN d_moy = 4 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS apr_sales, - sum(CASE WHEN d_moy = 5 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS may_sales, - sum(CASE WHEN d_moy = 6 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS jun_sales, - sum(CASE WHEN d_moy = 7 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS jul_sales, - sum(CASE WHEN d_moy = 8 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS aug_sales, - sum(CASE WHEN d_moy = 9 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS sep_sales, - sum(CASE WHEN d_moy = 10 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS oct_sales, - sum(CASE WHEN d_moy = 11 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS nov_sales, - sum(CASE WHEN d_moy = 12 - THEN cs_sales_price * cs_quantity - ELSE 0 END) AS dec_sales, - sum(CASE WHEN d_moy = 1 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS jan_net, - sum(CASE WHEN d_moy = 2 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS feb_net, - sum(CASE WHEN d_moy = 3 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS mar_net, - sum(CASE WHEN d_moy = 4 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS apr_net, - sum(CASE WHEN d_moy = 5 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS may_net, - sum(CASE WHEN d_moy = 6 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS jun_net, - sum(CASE WHEN d_moy = 7 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS jul_net, - sum(CASE WHEN d_moy = 8 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS aug_net, - sum(CASE WHEN d_moy = 9 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS sep_net, - sum(CASE WHEN d_moy = 10 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS oct_net, - sum(CASE WHEN d_moy = 11 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS nov_net, - sum(CASE WHEN d_moy = 12 - THEN cs_net_paid_inc_tax * cs_quantity - ELSE 0 END) AS dec_net - FROM - catalog_sales, warehouse, date_dim, time_dim, ship_mode - WHERE - cs_warehouse_sk = w_warehouse_sk - AND cs_sold_date_sk = d_date_sk - AND cs_sold_time_sk = t_time_sk - AND cs_ship_mode_sk = sm_ship_mode_sk - AND d_year = 2001 - AND t_time BETWEEN 30838 AND 30838 + 28800 - AND sm_carrier IN ('DHL', 'BARIAN') - GROUP BY - w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year - ) - ) x -GROUP BY - w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, - ship_carriers, year -ORDER BY w_warehouse_name -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q67.sql b/src/test/resources/tpcds/queries/q67.sql deleted file mode 100644 index f66e2252b..000000000 --- a/src/test/resources/tpcds/queries/q67.sql +++ /dev/null @@ -1,38 +0,0 @@ -SELECT * -FROM - (SELECT - i_category, - i_class, - i_brand, - i_product_name, - d_year, - d_qoy, - d_moy, - s_store_id, - sumsales, - rank() - OVER (PARTITION BY i_category - ORDER BY sumsales DESC) rk - FROM - (SELECT - i_category, - i_class, - i_brand, - i_product_name, - d_year, - d_qoy, - d_moy, - s_store_id, - sum(coalesce(ss_sales_price * ss_quantity, 0)) sumsales - FROM store_sales, date_dim, store, item - WHERE ss_sold_date_sk = d_date_sk - AND ss_item_sk = i_item_sk - AND ss_store_sk = s_store_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11 - GROUP BY ROLLUP (i_category, i_class, i_brand, i_product_name, d_year, d_qoy, - d_moy, s_store_id)) dw1) dw2 -WHERE rk <= 100 -ORDER BY - i_category, i_class, i_brand, i_product_name, d_year, - d_qoy, d_moy, s_store_id, sumsales, rk -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q68.sql b/src/test/resources/tpcds/queries/q68.sql deleted file mode 100644 index adb8a7189..000000000 --- a/src/test/resources/tpcds/queries/q68.sql +++ /dev/null @@ -1,34 +0,0 @@ -SELECT - c_last_name, - c_first_name, - ca_city, - bought_city, - ss_ticket_number, - extended_price, - extended_tax, - list_price -FROM (SELECT - ss_ticket_number, - ss_customer_sk, - ca_city bought_city, - sum(ss_ext_sales_price) extended_price, - sum(ss_ext_list_price) list_price, - sum(ss_ext_tax) extended_tax -FROM store_sales, date_dim, store, household_demographics, customer_address -WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk - AND store_sales.ss_store_sk = store.s_store_sk - AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk - AND store_sales.ss_addr_sk = customer_address.ca_address_sk - AND date_dim.d_dom BETWEEN 1 AND 2 - AND (household_demographics.hd_dep_count = 4 OR - household_demographics.hd_vehicle_count = 3) - AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) - AND store.s_city IN ('Midway', 'Fairview') -GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, - customer, - customer_address current_addr -WHERE ss_customer_sk = c_customer_sk - AND customer.c_current_addr_sk = current_addr.ca_address_sk - AND current_addr.ca_city <> bought_city -ORDER BY c_last_name, ss_ticket_number -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q69.sql b/src/test/resources/tpcds/queries/q69.sql deleted file mode 100644 index 1f0ee64f5..000000000 --- a/src/test/resources/tpcds/queries/q69.sql +++ /dev/null @@ -1,38 +0,0 @@ -SELECT - cd_gender, - cd_marital_status, - cd_education_status, - count(*) cnt1, - cd_purchase_estimate, - count(*) cnt2, - cd_credit_rating, - count(*) cnt3 -FROM - customer c, customer_address ca, customer_demographics -WHERE - c.c_current_addr_sk = ca.ca_address_sk AND - ca_state IN ('KY', 'GA', 'NM') AND - cd_demo_sk = c.c_current_cdemo_sk AND - exists(SELECT * - FROM store_sales, date_dim - WHERE c.c_customer_sk = ss_customer_sk AND - ss_sold_date_sk = d_date_sk AND - d_year = 2001 AND - d_moy BETWEEN 4 AND 4 + 2) AND - (NOT exists(SELECT * - FROM web_sales, date_dim - WHERE c.c_customer_sk = ws_bill_customer_sk AND - ws_sold_date_sk = d_date_sk AND - d_year = 2001 AND - d_moy BETWEEN 4 AND 4 + 2) AND - NOT exists(SELECT * - FROM catalog_sales, date_dim - WHERE c.c_customer_sk = cs_ship_customer_sk AND - cs_sold_date_sk = d_date_sk AND - d_year = 2001 AND - d_moy BETWEEN 4 AND 4 + 2)) -GROUP BY cd_gender, cd_marital_status, cd_education_status, - cd_purchase_estimate, cd_credit_rating -ORDER BY cd_gender, cd_marital_status, cd_education_status, - cd_purchase_estimate, cd_credit_rating -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q7.sql b/src/test/resources/tpcds/queries/q7.sql deleted file mode 100644 index 6630a0054..000000000 --- a/src/test/resources/tpcds/queries/q7.sql +++ /dev/null @@ -1,19 +0,0 @@ -SELECT - i_item_id, - avg(ss_quantity) agg1, - avg(ss_list_price) agg2, - avg(ss_coupon_amt) agg3, - avg(ss_sales_price) agg4 -FROM store_sales, customer_demographics, date_dim, item, promotion -WHERE ss_sold_date_sk = d_date_sk AND - ss_item_sk = i_item_sk AND - ss_cdemo_sk = cd_demo_sk AND - ss_promo_sk = p_promo_sk AND - cd_gender = 'M' AND - cd_marital_status = 'S' AND - cd_education_status = 'College' AND - (p_channel_email = 'N' OR p_channel_event = 'N') AND - d_year = 2000 -GROUP BY i_item_id -ORDER BY i_item_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q70.sql b/src/test/resources/tpcds/queries/q70.sql deleted file mode 100644 index 625011b21..000000000 --- a/src/test/resources/tpcds/queries/q70.sql +++ /dev/null @@ -1,38 +0,0 @@ -SELECT - sum(ss_net_profit) AS total_sum, - s_state, - s_county, - grouping(s_state) + grouping(s_county) AS lochierarchy, - rank() - OVER ( - PARTITION BY grouping(s_state) + grouping(s_county), - CASE WHEN grouping(s_county) = 0 - THEN s_state END - ORDER BY sum(ss_net_profit) DESC) AS rank_within_parent -FROM - store_sales, date_dim d1, store -WHERE - d1.d_month_seq BETWEEN 1200 AND 1200 + 11 - AND d1.d_date_sk = ss_sold_date_sk - AND s_store_sk = ss_store_sk - AND s_state IN - (SELECT s_state - FROM - (SELECT - s_state AS s_state, - rank() - OVER (PARTITION BY s_state - ORDER BY sum(ss_net_profit) DESC) AS ranking - FROM store_sales, store, date_dim - WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 - AND d_date_sk = ss_sold_date_sk - AND s_store_sk = ss_store_sk - GROUP BY s_state) tmp1 - WHERE ranking <= 5) -GROUP BY ROLLUP (s_state, s_county) -ORDER BY - lochierarchy DESC - , CASE WHEN lochierarchy = 0 - THEN s_state END - , rank_within_parent -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q71.sql b/src/test/resources/tpcds/queries/q71.sql deleted file mode 100644 index 8d724b924..000000000 --- a/src/test/resources/tpcds/queries/q71.sql +++ /dev/null @@ -1,44 +0,0 @@ -SELECT - i_brand_id brand_id, - i_brand brand, - t_hour, - t_minute, - sum(ext_price) ext_price -FROM item, - (SELECT - ws_ext_sales_price AS ext_price, - ws_sold_date_sk AS sold_date_sk, - ws_item_sk AS sold_item_sk, - ws_sold_time_sk AS time_sk - FROM web_sales, date_dim - WHERE d_date_sk = ws_sold_date_sk - AND d_moy = 11 - AND d_year = 1999 - UNION ALL - SELECT - cs_ext_sales_price AS ext_price, - cs_sold_date_sk AS sold_date_sk, - cs_item_sk AS sold_item_sk, - cs_sold_time_sk AS time_sk - FROM catalog_sales, date_dim - WHERE d_date_sk = cs_sold_date_sk - AND d_moy = 11 - AND d_year = 1999 - UNION ALL - SELECT - ss_ext_sales_price AS ext_price, - ss_sold_date_sk AS sold_date_sk, - ss_item_sk AS sold_item_sk, - ss_sold_time_sk AS time_sk - FROM store_sales, date_dim - WHERE d_date_sk = ss_sold_date_sk - AND d_moy = 11 - AND d_year = 1999 - ) AS tmp, time_dim -WHERE - sold_item_sk = i_item_sk - AND i_manager_id = 1 - AND time_sk = t_time_sk - AND (t_meal_time = 'breakfast' OR t_meal_time = 'dinner') -GROUP BY i_brand, i_brand_id, t_hour, t_minute -ORDER BY ext_price DESC, brand_id diff --git a/src/test/resources/tpcds/queries/q72.sql b/src/test/resources/tpcds/queries/q72.sql deleted file mode 100644 index 99b3eee54..000000000 --- a/src/test/resources/tpcds/queries/q72.sql +++ /dev/null @@ -1,33 +0,0 @@ -SELECT - i_item_desc, - w_warehouse_name, - d1.d_week_seq, - count(CASE WHEN p_promo_sk IS NULL - THEN 1 - ELSE 0 END) no_promo, - count(CASE WHEN p_promo_sk IS NOT NULL - THEN 1 - ELSE 0 END) promo, - count(*) total_cnt -FROM catalog_sales - JOIN inventory ON (cs_item_sk = inv_item_sk) - JOIN warehouse ON (w_warehouse_sk = inv_warehouse_sk) - JOIN item ON (i_item_sk = cs_item_sk) - JOIN customer_demographics ON (cs_bill_cdemo_sk = cd_demo_sk) - JOIN household_demographics ON (cs_bill_hdemo_sk = hd_demo_sk) - JOIN date_dim d1 ON (cs_sold_date_sk = d1.d_date_sk) - JOIN date_dim d2 ON (inv_date_sk = d2.d_date_sk) - JOIN date_dim d3 ON (cs_ship_date_sk = d3.d_date_sk) - LEFT OUTER JOIN promotion ON (cs_promo_sk = p_promo_sk) - LEFT OUTER JOIN catalog_returns ON (cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number) -WHERE d1.d_week_seq = d2.d_week_seq - AND inv_quantity_on_hand < cs_quantity - AND d3.d_date > (cast(d1.d_date AS DATE) + interval 5 days) - AND hd_buy_potential = '>10000' - AND d1.d_year = 1999 - AND hd_buy_potential = '>10000' - AND cd_marital_status = 'D' - AND d1.d_year = 1999 -GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq -ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q73.sql b/src/test/resources/tpcds/queries/q73.sql deleted file mode 100644 index 881be2e90..000000000 --- a/src/test/resources/tpcds/queries/q73.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT - c_last_name, - c_first_name, - c_salutation, - c_preferred_cust_flag, - ss_ticket_number, - cnt -FROM - (SELECT - ss_ticket_number, - ss_customer_sk, - count(*) cnt - FROM store_sales, date_dim, store, household_demographics - WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk - AND store_sales.ss_store_sk = store.s_store_sk - AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk - AND date_dim.d_dom BETWEEN 1 AND 2 - AND (household_demographics.hd_buy_potential = '>10000' OR - household_demographics.hd_buy_potential = 'unknown') - AND household_demographics.hd_vehicle_count > 0 - AND CASE WHEN household_demographics.hd_vehicle_count > 0 - THEN - household_demographics.hd_dep_count / household_demographics.hd_vehicle_count - ELSE NULL END > 1 - AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) - AND store.s_county IN ('Williamson County', 'Franklin Parish', 'Bronx County', 'Orange County') - GROUP BY ss_ticket_number, ss_customer_sk) dj, customer -WHERE ss_customer_sk = c_customer_sk - AND cnt BETWEEN 1 AND 5 -ORDER BY cnt DESC diff --git a/src/test/resources/tpcds/queries/q74.sql b/src/test/resources/tpcds/queries/q74.sql deleted file mode 100644 index 154b26d68..000000000 --- a/src/test/resources/tpcds/queries/q74.sql +++ /dev/null @@ -1,58 +0,0 @@ -WITH year_total AS ( - SELECT - c_customer_id customer_id, - c_first_name customer_first_name, - c_last_name customer_last_name, - d_year AS year, - sum(ss_net_paid) year_total, - 's' sale_type - FROM - customer, store_sales, date_dim - WHERE c_customer_sk = ss_customer_sk - AND ss_sold_date_sk = d_date_sk - AND d_year IN (2001, 2001 + 1) - GROUP BY - c_customer_id, c_first_name, c_last_name, d_year - UNION ALL - SELECT - c_customer_id customer_id, - c_first_name customer_first_name, - c_last_name customer_last_name, - d_year AS year, - sum(ws_net_paid) year_total, - 'w' sale_type - FROM - customer, web_sales, date_dim - WHERE c_customer_sk = ws_bill_customer_sk - AND ws_sold_date_sk = d_date_sk - AND d_year IN (2001, 2001 + 1) - GROUP BY - c_customer_id, c_first_name, c_last_name, d_year) -SELECT - t_s_secyear.customer_id, - t_s_secyear.customer_first_name, - t_s_secyear.customer_last_name -FROM - year_total t_s_firstyear, year_total t_s_secyear, - year_total t_w_firstyear, year_total t_w_secyear -WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id - AND t_s_firstyear.customer_id = t_w_secyear.customer_id - AND t_s_firstyear.customer_id = t_w_firstyear.customer_id - AND t_s_firstyear.sale_type = 's' - AND t_w_firstyear.sale_type = 'w' - AND t_s_secyear.sale_type = 's' - AND t_w_secyear.sale_type = 'w' - AND t_s_firstyear.year = 2001 - AND t_s_secyear.year = 2001 + 1 - AND t_w_firstyear.year = 2001 - AND t_w_secyear.year = 2001 + 1 - AND t_s_firstyear.year_total > 0 - AND t_w_firstyear.year_total > 0 - AND CASE WHEN t_w_firstyear.year_total > 0 - THEN t_w_secyear.year_total / t_w_firstyear.year_total - ELSE NULL END - > CASE WHEN t_s_firstyear.year_total > 0 - THEN t_s_secyear.year_total / t_s_firstyear.year_total - ELSE NULL END -ORDER BY 1, 1, 1 -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q75.sql b/src/test/resources/tpcds/queries/q75.sql deleted file mode 100644 index 2a143232b..000000000 --- a/src/test/resources/tpcds/queries/q75.sql +++ /dev/null @@ -1,76 +0,0 @@ -WITH all_sales AS ( - SELECT - d_year, - i_brand_id, - i_class_id, - i_category_id, - i_manufact_id, - SUM(sales_cnt) AS sales_cnt, - SUM(sales_amt) AS sales_amt - FROM ( - SELECT - d_year, - i_brand_id, - i_class_id, - i_category_id, - i_manufact_id, - cs_quantity - COALESCE(cr_return_quantity, 0) AS sales_cnt, - cs_ext_sales_price - COALESCE(cr_return_amount, 0.0) AS sales_amt - FROM catalog_sales - JOIN item ON i_item_sk = cs_item_sk - JOIN date_dim ON d_date_sk = cs_sold_date_sk - LEFT JOIN catalog_returns ON (cs_order_number = cr_order_number - AND cs_item_sk = cr_item_sk) - WHERE i_category = 'Books' - UNION - SELECT - d_year, - i_brand_id, - i_class_id, - i_category_id, - i_manufact_id, - ss_quantity - COALESCE(sr_return_quantity, 0) AS sales_cnt, - ss_ext_sales_price - COALESCE(sr_return_amt, 0.0) AS sales_amt - FROM store_sales - JOIN item ON i_item_sk = ss_item_sk - JOIN date_dim ON d_date_sk = ss_sold_date_sk - LEFT JOIN store_returns ON (ss_ticket_number = sr_ticket_number - AND ss_item_sk = sr_item_sk) - WHERE i_category = 'Books' - UNION - SELECT - d_year, - i_brand_id, - i_class_id, - i_category_id, - i_manufact_id, - ws_quantity - COALESCE(wr_return_quantity, 0) AS sales_cnt, - ws_ext_sales_price - COALESCE(wr_return_amt, 0.0) AS sales_amt - FROM web_sales - JOIN item ON i_item_sk = ws_item_sk - JOIN date_dim ON d_date_sk = ws_sold_date_sk - LEFT JOIN web_returns ON (ws_order_number = wr_order_number - AND ws_item_sk = wr_item_sk) - WHERE i_category = 'Books') sales_detail - GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) -SELECT - prev_yr.d_year AS prev_year, - curr_yr.d_year AS year, - curr_yr.i_brand_id, - curr_yr.i_class_id, - curr_yr.i_category_id, - curr_yr.i_manufact_id, - prev_yr.sales_cnt AS prev_yr_cnt, - curr_yr.sales_cnt AS curr_yr_cnt, - curr_yr.sales_cnt - prev_yr.sales_cnt AS sales_cnt_diff, - curr_yr.sales_amt - prev_yr.sales_amt AS sales_amt_diff -FROM all_sales curr_yr, all_sales prev_yr -WHERE curr_yr.i_brand_id = prev_yr.i_brand_id - AND curr_yr.i_class_id = prev_yr.i_class_id - AND curr_yr.i_category_id = prev_yr.i_category_id - AND curr_yr.i_manufact_id = prev_yr.i_manufact_id - AND curr_yr.d_year = 2002 - AND prev_yr.d_year = 2002 - 1 - AND CAST(curr_yr.sales_cnt AS DECIMAL(17, 2)) / CAST(prev_yr.sales_cnt AS DECIMAL(17, 2)) < 0.9 -ORDER BY sales_cnt_diff -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q76.sql b/src/test/resources/tpcds/queries/q76.sql deleted file mode 100644 index 815fa922b..000000000 --- a/src/test/resources/tpcds/queries/q76.sql +++ /dev/null @@ -1,47 +0,0 @@ -SELECT - channel, - col_name, - d_year, - d_qoy, - i_category, - COUNT(*) sales_cnt, - SUM(ext_sales_price) sales_amt -FROM ( - SELECT - 'store' AS channel, - ss_store_sk col_name, - d_year, - d_qoy, - i_category, - ss_ext_sales_price ext_sales_price - FROM store_sales, item, date_dim - WHERE ss_store_sk IS NULL - AND ss_sold_date_sk = d_date_sk - AND ss_item_sk = i_item_sk - UNION ALL - SELECT - 'web' AS channel, - ws_ship_customer_sk col_name, - d_year, - d_qoy, - i_category, - ws_ext_sales_price ext_sales_price - FROM web_sales, item, date_dim - WHERE ws_ship_customer_sk IS NULL - AND ws_sold_date_sk = d_date_sk - AND ws_item_sk = i_item_sk - UNION ALL - SELECT - 'catalog' AS channel, - cs_ship_addr_sk col_name, - d_year, - d_qoy, - i_category, - cs_ext_sales_price ext_sales_price - FROM catalog_sales, item, date_dim - WHERE cs_ship_addr_sk IS NULL - AND cs_sold_date_sk = d_date_sk - AND cs_item_sk = i_item_sk) foo -GROUP BY channel, col_name, d_year, d_qoy, i_category -ORDER BY channel, col_name, d_year, d_qoy, i_category -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q77.sql b/src/test/resources/tpcds/queries/q77.sql deleted file mode 100644 index a69df9fbc..000000000 --- a/src/test/resources/tpcds/queries/q77.sql +++ /dev/null @@ -1,100 +0,0 @@ -WITH ss AS -(SELECT - s_store_sk, - sum(ss_ext_sales_price) AS sales, - sum(ss_net_profit) AS profit - FROM store_sales, date_dim, store - WHERE ss_sold_date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-03' AS DATE) AND - (cast('2000-08-03' AS DATE) + INTERVAL 30 days) - AND ss_store_sk = s_store_sk - GROUP BY s_store_sk), - sr AS - (SELECT - s_store_sk, - sum(sr_return_amt) AS returns, - sum(sr_net_loss) AS profit_loss - FROM store_returns, date_dim, store - WHERE sr_returned_date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-03' AS DATE) AND - (cast('2000-08-03' AS DATE) + INTERVAL 30 days) - AND sr_store_sk = s_store_sk - GROUP BY s_store_sk), - cs AS - (SELECT - cs_call_center_sk, - sum(cs_ext_sales_price) AS sales, - sum(cs_net_profit) AS profit - FROM catalog_sales, date_dim - WHERE cs_sold_date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-03' AS DATE) AND - (cast('2000-08-03' AS DATE) + INTERVAL 30 days) - GROUP BY cs_call_center_sk), - cr AS - (SELECT - sum(cr_return_amount) AS returns, - sum(cr_net_loss) AS profit_loss - FROM catalog_returns, date_dim - WHERE cr_returned_date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-03' AS DATE) AND - (cast('2000-08-03' AS DATE) + INTERVAL 30 days)), - ws AS - (SELECT - wp_web_page_sk, - sum(ws_ext_sales_price) AS sales, - sum(ws_net_profit) AS profit - FROM web_sales, date_dim, web_page - WHERE ws_sold_date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-03' AS DATE) AND - (cast('2000-08-03' AS DATE) + INTERVAL 30 days) - AND ws_web_page_sk = wp_web_page_sk - GROUP BY wp_web_page_sk), - wr AS - (SELECT - wp_web_page_sk, - sum(wr_return_amt) AS returns, - sum(wr_net_loss) AS profit_loss - FROM web_returns, date_dim, web_page - WHERE wr_returned_date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-03' AS DATE) AND - (cast('2000-08-03' AS DATE) + INTERVAL 30 days) - AND wr_web_page_sk = wp_web_page_sk - GROUP BY wp_web_page_sk) -SELECT - channel, - id, - sum(sales) AS sales, - sum(returns) AS returns, - sum(profit) AS profit -FROM - (SELECT - 'store channel' AS channel, - ss.s_store_sk AS id, - sales, - coalesce(returns, 0) AS returns, - (profit - coalesce(profit_loss, 0)) AS profit - FROM ss - LEFT JOIN sr - ON ss.s_store_sk = sr.s_store_sk - UNION ALL - SELECT - 'catalog channel' AS channel, - cs_call_center_sk AS id, - sales, - returns, - (profit - profit_loss) AS profit - FROM cs, cr - UNION ALL - SELECT - 'web channel' AS channel, - ws.wp_web_page_sk AS id, - sales, - coalesce(returns, 0) returns, - (profit - coalesce(profit_loss, 0)) AS profit - FROM ws - LEFT JOIN wr - ON ws.wp_web_page_sk = wr.wp_web_page_sk - ) x -GROUP BY ROLLUP (channel, id) -ORDER BY channel, id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q78.sql b/src/test/resources/tpcds/queries/q78.sql deleted file mode 100644 index 07b0940e2..000000000 --- a/src/test/resources/tpcds/queries/q78.sql +++ /dev/null @@ -1,64 +0,0 @@ -WITH ws AS -(SELECT - d_year AS ws_sold_year, - ws_item_sk, - ws_bill_customer_sk ws_customer_sk, - sum(ws_quantity) ws_qty, - sum(ws_wholesale_cost) ws_wc, - sum(ws_sales_price) ws_sp - FROM web_sales - LEFT JOIN web_returns ON wr_order_number = ws_order_number AND ws_item_sk = wr_item_sk - JOIN date_dim ON ws_sold_date_sk = d_date_sk - WHERE wr_order_number IS NULL - GROUP BY d_year, ws_item_sk, ws_bill_customer_sk -), - cs AS - (SELECT - d_year AS cs_sold_year, - cs_item_sk, - cs_bill_customer_sk cs_customer_sk, - sum(cs_quantity) cs_qty, - sum(cs_wholesale_cost) cs_wc, - sum(cs_sales_price) cs_sp - FROM catalog_sales - LEFT JOIN catalog_returns ON cr_order_number = cs_order_number AND cs_item_sk = cr_item_sk - JOIN date_dim ON cs_sold_date_sk = d_date_sk - WHERE cr_order_number IS NULL - GROUP BY d_year, cs_item_sk, cs_bill_customer_sk - ), - ss AS - (SELECT - d_year AS ss_sold_year, - ss_item_sk, - ss_customer_sk, - sum(ss_quantity) ss_qty, - sum(ss_wholesale_cost) ss_wc, - sum(ss_sales_price) ss_sp - FROM store_sales - LEFT JOIN store_returns ON sr_ticket_number = ss_ticket_number AND ss_item_sk = sr_item_sk - JOIN date_dim ON ss_sold_date_sk = d_date_sk - WHERE sr_ticket_number IS NULL - GROUP BY d_year, ss_item_sk, ss_customer_sk - ) -SELECT - round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) ratio, - ss_qty store_qty, - ss_wc store_wholesale_cost, - ss_sp store_sales_price, - coalesce(ws_qty, 0) + coalesce(cs_qty, 0) other_chan_qty, - coalesce(ws_wc, 0) + coalesce(cs_wc, 0) other_chan_wholesale_cost, - coalesce(ws_sp, 0) + coalesce(cs_sp, 0) other_chan_sales_price -FROM ss - LEFT JOIN ws - ON (ws_sold_year = ss_sold_year AND ws_item_sk = ss_item_sk AND ws_customer_sk = ss_customer_sk) - LEFT JOIN cs - ON (cs_sold_year = ss_sold_year AND cs_item_sk = ss_item_sk AND cs_customer_sk = ss_customer_sk) -WHERE coalesce(ws_qty, 0) > 0 AND coalesce(cs_qty, 0) > 0 AND ss_sold_year = 2000 -ORDER BY - ratio, - ss_qty DESC, ss_wc DESC, ss_sp DESC, - other_chan_qty, - other_chan_wholesale_cost, - other_chan_sales_price, - round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q79.sql b/src/test/resources/tpcds/queries/q79.sql deleted file mode 100644 index 08f86dc20..000000000 --- a/src/test/resources/tpcds/queries/q79.sql +++ /dev/null @@ -1,27 +0,0 @@ -SELECT - c_last_name, - c_first_name, - substr(s_city, 1, 30), - ss_ticket_number, - amt, - profit -FROM - (SELECT - ss_ticket_number, - ss_customer_sk, - store.s_city, - sum(ss_coupon_amt) amt, - sum(ss_net_profit) profit - FROM store_sales, date_dim, store, household_demographics - WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk - AND store_sales.ss_store_sk = store.s_store_sk - AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk - AND (household_demographics.hd_dep_count = 6 OR - household_demographics.hd_vehicle_count > 2) - AND date_dim.d_dow = 1 - AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) - AND store.s_number_employees BETWEEN 200 AND 295 - GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, store.s_city) ms, customer -WHERE ss_customer_sk = c_customer_sk -ORDER BY c_last_name, c_first_name, substr(s_city, 1, 30), profit -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q8.sql b/src/test/resources/tpcds/queries/q8.sql deleted file mode 100644 index 497725111..000000000 --- a/src/test/resources/tpcds/queries/q8.sql +++ /dev/null @@ -1,87 +0,0 @@ -SELECT - s_store_name, - sum(ss_net_profit) -FROM store_sales, date_dim, store, - (SELECT ca_zip - FROM ( - (SELECT substr(ca_zip, 1, 5) ca_zip - FROM customer_address - WHERE substr(ca_zip, 1, 5) IN ( - '24128','76232','65084','87816','83926','77556','20548', - '26231','43848','15126','91137','61265','98294','25782', - '17920','18426','98235','40081','84093','28577','55565', - '17183','54601','67897','22752','86284','18376','38607', - '45200','21756','29741','96765','23932','89360','29839', - '25989','28898','91068','72550','10390','18845','47770', - '82636','41367','76638','86198','81312','37126','39192', - '88424','72175','81426','53672','10445','42666','66864', - '66708','41248','48583','82276','18842','78890','49448', - '14089','38122','34425','79077','19849','43285','39861', - '66162','77610','13695','99543','83444','83041','12305', - '57665','68341','25003','57834','62878','49130','81096', - '18840','27700','23470','50412','21195','16021','76107', - '71954','68309','18119','98359','64544','10336','86379', - '27068','39736','98569','28915','24206','56529','57647', - '54917','42961','91110','63981','14922','36420','23006', - '67467','32754','30903','20260','31671','51798','72325', - '85816','68621','13955','36446','41766','68806','16725', - '15146','22744','35850','88086','51649','18270','52867', - '39972','96976','63792','11376','94898','13595','10516', - '90225','58943','39371','94945','28587','96576','57855', - '28488','26105','83933','25858','34322','44438','73171', - '30122','34102','22685','71256','78451','54364','13354', - '45375','40558','56458','28286','45266','47305','69399', - '83921','26233','11101','15371','69913','35942','15882', - '25631','24610','44165','99076','33786','70738','26653', - '14328','72305','62496','22152','10144','64147','48425', - '14663','21076','18799','30450','63089','81019','68893', - '24996','51200','51211','45692','92712','70466','79994', - '22437','25280','38935','71791','73134','56571','14060', - '19505','72425','56575','74351','68786','51650','20004', - '18383','76614','11634','18906','15765','41368','73241', - '76698','78567','97189','28545','76231','75691','22246', - '51061','90578','56691','68014','51103','94167','57047', - '14867','73520','15734','63435','25733','35474','24676', - '94627','53535','17879','15559','53268','59166','11928', - '59402','33282','45721','43933','68101','33515','36634', - '71286','19736','58058','55253','67473','41918','19515', - '36495','19430','22351','77191','91393','49156','50298', - '87501','18652','53179','18767','63193','23968','65164', - '68880','21286','72823','58470','67301','13394','31016', - '70372','67030','40604','24317','45748','39127','26065', - '77721','31029','31880','60576','24671','45549','13376', - '50016','33123','19769','22927','97789','46081','72151', - '15723','46136','51949','68100','96888','64528','14171', - '79777','28709','11489','25103','32213','78668','22245', - '15798','27156','37930','62971','21337','51622','67853', - '10567','38415','15455','58263','42029','60279','37125', - '56240','88190','50308','26859','64457','89091','82136', - '62377','36233','63837','58078','17043','30010','60099', - '28810','98025','29178','87343','73273','30469','64034', - '39516','86057','21309','90257','67875','40162','11356', - '73650','61810','72013','30431','22461','19512','13375', - '55307','30625','83849','68908','26689','96451','38193', - '46820','88885','84935','69035','83144','47537','56616', - '94983','48033','69952','25486','61547','27385','61860', - '58048','56910','16807','17871','35258','31387','35458', - '35576')) - INTERSECT - (SELECT ca_zip - FROM - (SELECT - substr(ca_zip, 1, 5) ca_zip, - count(*) cnt - FROM customer_address, customer - WHERE ca_address_sk = c_current_addr_sk AND - c_preferred_cust_flag = 'Y' - GROUP BY ca_zip - HAVING count(*) > 10) A1) - ) A2 - ) V1 -WHERE ss_store_sk = s_store_sk - AND ss_sold_date_sk = d_date_sk - AND d_qoy = 2 AND d_year = 1998 - AND (substr(s_zip, 1, 2) = substr(V1.ca_zip, 1, 2)) -GROUP BY s_store_name -ORDER BY s_store_name -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q80.sql b/src/test/resources/tpcds/queries/q80.sql deleted file mode 100644 index 433db87d2..000000000 --- a/src/test/resources/tpcds/queries/q80.sql +++ /dev/null @@ -1,94 +0,0 @@ -WITH ssr AS -(SELECT - s_store_id AS store_id, - sum(ss_ext_sales_price) AS sales, - sum(coalesce(sr_return_amt, 0)) AS returns, - sum(ss_net_profit - coalesce(sr_net_loss, 0)) AS profit - FROM store_sales - LEFT OUTER JOIN store_returns ON - (ss_item_sk = sr_item_sk AND - ss_ticket_number = sr_ticket_number) - , - date_dim, store, item, promotion - WHERE ss_sold_date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-23' AS DATE) - AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) - AND ss_store_sk = s_store_sk - AND ss_item_sk = i_item_sk - AND i_current_price > 50 - AND ss_promo_sk = p_promo_sk - AND p_channel_tv = 'N' - GROUP BY s_store_id), - csr AS - (SELECT - cp_catalog_page_id AS catalog_page_id, - sum(cs_ext_sales_price) AS sales, - sum(coalesce(cr_return_amount, 0)) AS returns, - sum(cs_net_profit - coalesce(cr_net_loss, 0)) AS profit - FROM catalog_sales - LEFT OUTER JOIN catalog_returns ON - (cs_item_sk = cr_item_sk AND - cs_order_number = cr_order_number) - , - date_dim, catalog_page, item, promotion - WHERE cs_sold_date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-23' AS DATE) - AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) - AND cs_catalog_page_sk = cp_catalog_page_sk - AND cs_item_sk = i_item_sk - AND i_current_price > 50 - AND cs_promo_sk = p_promo_sk - AND p_channel_tv = 'N' - GROUP BY cp_catalog_page_id), - wsr AS - (SELECT - web_site_id, - sum(ws_ext_sales_price) AS sales, - sum(coalesce(wr_return_amt, 0)) AS returns, - sum(ws_net_profit - coalesce(wr_net_loss, 0)) AS profit - FROM web_sales - LEFT OUTER JOIN web_returns ON - (ws_item_sk = wr_item_sk AND ws_order_number = wr_order_number) - , - date_dim, web_site, item, promotion - WHERE ws_sold_date_sk = d_date_sk - AND d_date BETWEEN cast('2000-08-23' AS DATE) - AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) - AND ws_web_site_sk = web_site_sk - AND ws_item_sk = i_item_sk - AND i_current_price > 50 - AND ws_promo_sk = p_promo_sk - AND p_channel_tv = 'N' - GROUP BY web_site_id) -SELECT - channel, - id, - sum(sales) AS sales, - sum(returns) AS returns, - sum(profit) AS profit -FROM (SELECT - 'store channel' AS channel, - concat('store', store_id) AS id, - sales, - returns, - profit - FROM ssr - UNION ALL - SELECT - 'catalog channel' AS channel, - concat('catalog_page', catalog_page_id) AS id, - sales, - returns, - profit - FROM csr - UNION ALL - SELECT - 'web channel' AS channel, - concat('web_site', web_site_id) AS id, - sales, - returns, - profit - FROM wsr) x -GROUP BY ROLLUP (channel, id) -ORDER BY channel, id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q81.sql b/src/test/resources/tpcds/queries/q81.sql deleted file mode 100644 index 18f0ffa7e..000000000 --- a/src/test/resources/tpcds/queries/q81.sql +++ /dev/null @@ -1,38 +0,0 @@ -WITH customer_total_return AS -(SELECT - cr_returning_customer_sk AS ctr_customer_sk, - ca_state AS ctr_state, - sum(cr_return_amt_inc_tax) AS ctr_total_return - FROM catalog_returns, date_dim, customer_address - WHERE cr_returned_date_sk = d_date_sk - AND d_year = 2000 - AND cr_returning_addr_sk = ca_address_sk - GROUP BY cr_returning_customer_sk, ca_state ) -SELECT - c_customer_id, - c_salutation, - c_first_name, - c_last_name, - ca_street_number, - ca_street_name, - ca_street_type, - ca_suite_number, - ca_city, - ca_county, - ca_state, - ca_zip, - ca_country, - ca_gmt_offset, - ca_location_type, - ctr_total_return -FROM customer_total_return ctr1, customer_address, customer -WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 -FROM customer_total_return ctr2 -WHERE ctr1.ctr_state = ctr2.ctr_state) - AND ca_address_sk = c_current_addr_sk - AND ca_state = 'GA' - AND ctr1.ctr_customer_sk = c_customer_sk -ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name - , ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset - , ca_location_type, ctr_total_return -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q82.sql b/src/test/resources/tpcds/queries/q82.sql deleted file mode 100644 index 20942cfeb..000000000 --- a/src/test/resources/tpcds/queries/q82.sql +++ /dev/null @@ -1,15 +0,0 @@ -SELECT - i_item_id, - i_item_desc, - i_current_price -FROM item, inventory, date_dim, store_sales -WHERE i_current_price BETWEEN 62 AND 62 + 30 - AND inv_item_sk = i_item_sk - AND d_date_sk = inv_date_sk - AND d_date BETWEEN cast('2000-05-25' AS DATE) AND (cast('2000-05-25' AS DATE) + INTERVAL 60 days) - AND i_manufact_id IN (129, 270, 821, 423) - AND inv_quantity_on_hand BETWEEN 100 AND 500 - AND ss_item_sk = i_item_sk -GROUP BY i_item_id, i_item_desc, i_current_price -ORDER BY i_item_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q83.sql b/src/test/resources/tpcds/queries/q83.sql deleted file mode 100644 index 53c10c7de..000000000 --- a/src/test/resources/tpcds/queries/q83.sql +++ /dev/null @@ -1,56 +0,0 @@ -WITH sr_items AS -(SELECT - i_item_id item_id, - sum(sr_return_quantity) sr_item_qty - FROM store_returns, item, date_dim - WHERE sr_item_sk = i_item_sk - AND d_date IN (SELECT d_date - FROM date_dim - WHERE d_week_seq IN - (SELECT d_week_seq - FROM date_dim - WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) - AND sr_returned_date_sk = d_date_sk - GROUP BY i_item_id), - cr_items AS - (SELECT - i_item_id item_id, - sum(cr_return_quantity) cr_item_qty - FROM catalog_returns, item, date_dim - WHERE cr_item_sk = i_item_sk - AND d_date IN (SELECT d_date - FROM date_dim - WHERE d_week_seq IN - (SELECT d_week_seq - FROM date_dim - WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) - AND cr_returned_date_sk = d_date_sk - GROUP BY i_item_id), - wr_items AS - (SELECT - i_item_id item_id, - sum(wr_return_quantity) wr_item_qty - FROM web_returns, item, date_dim - WHERE wr_item_sk = i_item_sk AND d_date IN - (SELECT d_date - FROM date_dim - WHERE d_week_seq IN - (SELECT d_week_seq - FROM date_dim - WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) - AND wr_returned_date_sk = d_date_sk - GROUP BY i_item_id) -SELECT - sr_items.item_id, - sr_item_qty, - sr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 sr_dev, - cr_item_qty, - cr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 cr_dev, - wr_item_qty, - wr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 wr_dev, - (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 average -FROM sr_items, cr_items, wr_items -WHERE sr_items.item_id = cr_items.item_id - AND sr_items.item_id = wr_items.item_id -ORDER BY sr_items.item_id, sr_item_qty -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q84.sql b/src/test/resources/tpcds/queries/q84.sql deleted file mode 100644 index a1076b57c..000000000 --- a/src/test/resources/tpcds/queries/q84.sql +++ /dev/null @@ -1,19 +0,0 @@ -SELECT - c_customer_id AS customer_id, - concat(c_last_name, ', ', c_first_name) AS customername -FROM customer - , customer_address - , customer_demographics - , household_demographics - , income_band - , store_returns -WHERE ca_city = 'Edgewood' - AND c_current_addr_sk = ca_address_sk - AND ib_lower_bound >= 38128 - AND ib_upper_bound <= 38128 + 50000 - AND ib_income_band_sk = hd_income_band_sk - AND cd_demo_sk = c_current_cdemo_sk - AND hd_demo_sk = c_current_hdemo_sk - AND sr_cdemo_sk = cd_demo_sk -ORDER BY c_customer_id -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q85.sql b/src/test/resources/tpcds/queries/q85.sql deleted file mode 100644 index cf718b0f8..000000000 --- a/src/test/resources/tpcds/queries/q85.sql +++ /dev/null @@ -1,82 +0,0 @@ -SELECT - substr(r_reason_desc, 1, 20), - avg(ws_quantity), - avg(wr_refunded_cash), - avg(wr_fee) -FROM web_sales, web_returns, web_page, customer_demographics cd1, - customer_demographics cd2, customer_address, date_dim, reason -WHERE ws_web_page_sk = wp_web_page_sk - AND ws_item_sk = wr_item_sk - AND ws_order_number = wr_order_number - AND ws_sold_date_sk = d_date_sk AND d_year = 2000 - AND cd1.cd_demo_sk = wr_refunded_cdemo_sk - AND cd2.cd_demo_sk = wr_returning_cdemo_sk - AND ca_address_sk = wr_refunded_addr_sk - AND r_reason_sk = wr_reason_sk - AND - ( - ( - cd1.cd_marital_status = 'M' - AND - cd1.cd_marital_status = cd2.cd_marital_status - AND - cd1.cd_education_status = 'Advanced Degree' - AND - cd1.cd_education_status = cd2.cd_education_status - AND - ws_sales_price BETWEEN 100.00 AND 150.00 - ) - OR - ( - cd1.cd_marital_status = 'S' - AND - cd1.cd_marital_status = cd2.cd_marital_status - AND - cd1.cd_education_status = 'College' - AND - cd1.cd_education_status = cd2.cd_education_status - AND - ws_sales_price BETWEEN 50.00 AND 100.00 - ) - OR - ( - cd1.cd_marital_status = 'W' - AND - cd1.cd_marital_status = cd2.cd_marital_status - AND - cd1.cd_education_status = '2 yr Degree' - AND - cd1.cd_education_status = cd2.cd_education_status - AND - ws_sales_price BETWEEN 150.00 AND 200.00 - ) - ) - AND - ( - ( - ca_country = 'United States' - AND - ca_state IN ('IN', 'OH', 'NJ') - AND ws_net_profit BETWEEN 100 AND 200 - ) - OR - ( - ca_country = 'United States' - AND - ca_state IN ('WI', 'CT', 'KY') - AND ws_net_profit BETWEEN 150 AND 300 - ) - OR - ( - ca_country = 'United States' - AND - ca_state IN ('LA', 'IA', 'AR') - AND ws_net_profit BETWEEN 50 AND 250 - ) - ) -GROUP BY r_reason_desc -ORDER BY substr(r_reason_desc, 1, 20) - , avg(ws_quantity) - , avg(wr_refunded_cash) - , avg(wr_fee) -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q86.sql b/src/test/resources/tpcds/queries/q86.sql deleted file mode 100644 index 789a4abf7..000000000 --- a/src/test/resources/tpcds/queries/q86.sql +++ /dev/null @@ -1,24 +0,0 @@ -SELECT - sum(ws_net_paid) AS total_sum, - i_category, - i_class, - grouping(i_category) + grouping(i_class) AS lochierarchy, - rank() - OVER ( - PARTITION BY grouping(i_category) + grouping(i_class), - CASE WHEN grouping(i_class) = 0 - THEN i_category END - ORDER BY sum(ws_net_paid) DESC) AS rank_within_parent -FROM - web_sales, date_dim d1, item -WHERE - d1.d_month_seq BETWEEN 1200 AND 1200 + 11 - AND d1.d_date_sk = ws_sold_date_sk - AND i_item_sk = ws_item_sk -GROUP BY ROLLUP (i_category, i_class) -ORDER BY - lochierarchy DESC, - CASE WHEN lochierarchy = 0 - THEN i_category END, - rank_within_parent -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q87.sql b/src/test/resources/tpcds/queries/q87.sql deleted file mode 100644 index 4aaa9f39d..000000000 --- a/src/test/resources/tpcds/queries/q87.sql +++ /dev/null @@ -1,28 +0,0 @@ -SELECT count(*) -FROM ((SELECT DISTINCT - c_last_name, - c_first_name, - d_date -FROM store_sales, date_dim, customer -WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk - AND store_sales.ss_customer_sk = customer.c_customer_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11) - EXCEPT - (SELECT DISTINCT - c_last_name, - c_first_name, - d_date - FROM catalog_sales, date_dim, customer - WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk - AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11) - EXCEPT - (SELECT DISTINCT - c_last_name, - c_first_name, - d_date - FROM web_sales, date_dim, customer - WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk - AND web_sales.ws_bill_customer_sk = customer.c_customer_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11) - ) cool_cust diff --git a/src/test/resources/tpcds/queries/q88.sql b/src/test/resources/tpcds/queries/q88.sql deleted file mode 100644 index 25bcd90f4..000000000 --- a/src/test/resources/tpcds/queries/q88.sql +++ /dev/null @@ -1,122 +0,0 @@ -SELECT * -FROM - (SELECT count(*) h8_30_to_9 - FROM store_sales, household_demographics, time_dim, store - WHERE ss_sold_time_sk = time_dim.t_time_sk - AND ss_hdemo_sk = household_demographics.hd_demo_sk - AND ss_store_sk = s_store_sk - AND time_dim.t_hour = 8 - AND time_dim.t_minute >= 30 - AND ( - (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) - OR - (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) - OR - (household_demographics.hd_dep_count = 0 AND - household_demographics.hd_vehicle_count <= 0 + 2)) - AND store.s_store_name = 'ese') s1, - (SELECT count(*) h9_to_9_30 - FROM store_sales, household_demographics, time_dim, store - WHERE ss_sold_time_sk = time_dim.t_time_sk - AND ss_hdemo_sk = household_demographics.hd_demo_sk - AND ss_store_sk = s_store_sk - AND time_dim.t_hour = 9 - AND time_dim.t_minute < 30 - AND ( - (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) - OR - (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) - OR - (household_demographics.hd_dep_count = 0 AND - household_demographics.hd_vehicle_count <= 0 + 2)) - AND store.s_store_name = 'ese') s2, - (SELECT count(*) h9_30_to_10 - FROM store_sales, household_demographics, time_dim, store - WHERE ss_sold_time_sk = time_dim.t_time_sk - AND ss_hdemo_sk = household_demographics.hd_demo_sk - AND ss_store_sk = s_store_sk - AND time_dim.t_hour = 9 - AND time_dim.t_minute >= 30 - AND ( - (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) - OR - (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) - OR - (household_demographics.hd_dep_count = 0 AND - household_demographics.hd_vehicle_count <= 0 + 2)) - AND store.s_store_name = 'ese') s3, - (SELECT count(*) h10_to_10_30 - FROM store_sales, household_demographics, time_dim, store - WHERE ss_sold_time_sk = time_dim.t_time_sk - AND ss_hdemo_sk = household_demographics.hd_demo_sk - AND ss_store_sk = s_store_sk - AND time_dim.t_hour = 10 - AND time_dim.t_minute < 30 - AND ( - (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) - OR - (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) - OR - (household_demographics.hd_dep_count = 0 AND - household_demographics.hd_vehicle_count <= 0 + 2)) - AND store.s_store_name = 'ese') s4, - (SELECT count(*) h10_30_to_11 - FROM store_sales, household_demographics, time_dim, store - WHERE ss_sold_time_sk = time_dim.t_time_sk - AND ss_hdemo_sk = household_demographics.hd_demo_sk - AND ss_store_sk = s_store_sk - AND time_dim.t_hour = 10 - AND time_dim.t_minute >= 30 - AND ( - (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) - OR - (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) - OR - (household_demographics.hd_dep_count = 0 AND - household_demographics.hd_vehicle_count <= 0 + 2)) - AND store.s_store_name = 'ese') s5, - (SELECT count(*) h11_to_11_30 - FROM store_sales, household_demographics, time_dim, store - WHERE ss_sold_time_sk = time_dim.t_time_sk - AND ss_hdemo_sk = household_demographics.hd_demo_sk - AND ss_store_sk = s_store_sk - AND time_dim.t_hour = 11 - AND time_dim.t_minute < 30 - AND ( - (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) - OR - (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) - OR - (household_demographics.hd_dep_count = 0 AND - household_demographics.hd_vehicle_count <= 0 + 2)) - AND store.s_store_name = 'ese') s6, - (SELECT count(*) h11_30_to_12 - FROM store_sales, household_demographics, time_dim, store - WHERE ss_sold_time_sk = time_dim.t_time_sk - AND ss_hdemo_sk = household_demographics.hd_demo_sk - AND ss_store_sk = s_store_sk - AND time_dim.t_hour = 11 - AND time_dim.t_minute >= 30 - AND ( - (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) - OR - (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) - OR - (household_demographics.hd_dep_count = 0 AND - household_demographics.hd_vehicle_count <= 0 + 2)) - AND store.s_store_name = 'ese') s7, - (SELECT count(*) h12_to_12_30 - FROM store_sales, household_demographics, time_dim, store - WHERE ss_sold_time_sk = time_dim.t_time_sk - AND ss_hdemo_sk = household_demographics.hd_demo_sk - AND ss_store_sk = s_store_sk - AND time_dim.t_hour = 12 - AND time_dim.t_minute < 30 - AND ( - (household_demographics.hd_dep_count = 4 AND household_demographics.hd_vehicle_count <= 4 + 2) - OR - (household_demographics.hd_dep_count = 2 AND household_demographics.hd_vehicle_count <= 2 + 2) - OR - (household_demographics.hd_dep_count = 0 AND - household_demographics.hd_vehicle_count <= 0 + 2)) - AND store.s_store_name = 'ese') s8 diff --git a/src/test/resources/tpcds/queries/q89.sql b/src/test/resources/tpcds/queries/q89.sql deleted file mode 100644 index 75408cb03..000000000 --- a/src/test/resources/tpcds/queries/q89.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT * -FROM ( - SELECT - i_category, - i_class, - i_brand, - s_store_name, - s_company_name, - d_moy, - sum(ss_sales_price) sum_sales, - avg(sum(ss_sales_price)) - OVER - (PARTITION BY i_category, i_brand, s_store_name, s_company_name) - avg_monthly_sales - FROM item, store_sales, date_dim, store - WHERE ss_item_sk = i_item_sk AND - ss_sold_date_sk = d_date_sk AND - ss_store_sk = s_store_sk AND - d_year IN (1999) AND - ((i_category IN ('Books', 'Electronics', 'Sports') AND - i_class IN ('computers', 'stereo', 'football')) - OR (i_category IN ('Men', 'Jewelry', 'Women') AND - i_class IN ('shirts', 'birdal', 'dresses'))) - GROUP BY i_category, i_class, i_brand, - s_store_name, s_company_name, d_moy) tmp1 -WHERE CASE WHEN (avg_monthly_sales <> 0) - THEN (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) - ELSE NULL END > 0.1 -ORDER BY sum_sales - avg_monthly_sales, s_store_name -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q9.sql b/src/test/resources/tpcds/queries/q9.sql deleted file mode 100644 index de3db9d98..000000000 --- a/src/test/resources/tpcds/queries/q9.sql +++ /dev/null @@ -1,48 +0,0 @@ -SELECT - CASE WHEN (SELECT count(*) - FROM store_sales - WHERE ss_quantity BETWEEN 1 AND 20) > 62316685 - THEN (SELECT avg(ss_ext_discount_amt) - FROM store_sales - WHERE ss_quantity BETWEEN 1 AND 20) - ELSE (SELECT avg(ss_net_paid) - FROM store_sales - WHERE ss_quantity BETWEEN 1 AND 20) END bucket1, - CASE WHEN (SELECT count(*) - FROM store_sales - WHERE ss_quantity BETWEEN 21 AND 40) > 19045798 - THEN (SELECT avg(ss_ext_discount_amt) - FROM store_sales - WHERE ss_quantity BETWEEN 21 AND 40) - ELSE (SELECT avg(ss_net_paid) - FROM store_sales - WHERE ss_quantity BETWEEN 21 AND 40) END bucket2, - CASE WHEN (SELECT count(*) - FROM store_sales - WHERE ss_quantity BETWEEN 41 AND 60) > 365541424 - THEN (SELECT avg(ss_ext_discount_amt) - FROM store_sales - WHERE ss_quantity BETWEEN 41 AND 60) - ELSE (SELECT avg(ss_net_paid) - FROM store_sales - WHERE ss_quantity BETWEEN 41 AND 60) END bucket3, - CASE WHEN (SELECT count(*) - FROM store_sales - WHERE ss_quantity BETWEEN 61 AND 80) > 216357808 - THEN (SELECT avg(ss_ext_discount_amt) - FROM store_sales - WHERE ss_quantity BETWEEN 61 AND 80) - ELSE (SELECT avg(ss_net_paid) - FROM store_sales - WHERE ss_quantity BETWEEN 61 AND 80) END bucket4, - CASE WHEN (SELECT count(*) - FROM store_sales - WHERE ss_quantity BETWEEN 81 AND 100) > 184483884 - THEN (SELECT avg(ss_ext_discount_amt) - FROM store_sales - WHERE ss_quantity BETWEEN 81 AND 100) - ELSE (SELECT avg(ss_net_paid) - FROM store_sales - WHERE ss_quantity BETWEEN 81 AND 100) END bucket5 -FROM reason -WHERE r_reason_sk = 1 diff --git a/src/test/resources/tpcds/queries/q90.sql b/src/test/resources/tpcds/queries/q90.sql deleted file mode 100644 index 85e35bf8b..000000000 --- a/src/test/resources/tpcds/queries/q90.sql +++ /dev/null @@ -1,19 +0,0 @@ -SELECT cast(amc AS DECIMAL(15, 4)) / cast(pmc AS DECIMAL(15, 4)) am_pm_ratio -FROM (SELECT count(*) amc -FROM web_sales, household_demographics, time_dim, web_page -WHERE ws_sold_time_sk = time_dim.t_time_sk - AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk - AND ws_web_page_sk = web_page.wp_web_page_sk - AND time_dim.t_hour BETWEEN 8 AND 8 + 1 - AND household_demographics.hd_dep_count = 6 - AND web_page.wp_char_count BETWEEN 5000 AND 5200) at, - (SELECT count(*) pmc - FROM web_sales, household_demographics, time_dim, web_page - WHERE ws_sold_time_sk = time_dim.t_time_sk - AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk - AND ws_web_page_sk = web_page.wp_web_page_sk - AND time_dim.t_hour BETWEEN 19 AND 19 + 1 - AND household_demographics.hd_dep_count = 6 - AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt -ORDER BY am_pm_ratio -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q91.sql b/src/test/resources/tpcds/queries/q91.sql deleted file mode 100644 index 9ca7ce00a..000000000 --- a/src/test/resources/tpcds/queries/q91.sql +++ /dev/null @@ -1,23 +0,0 @@ -SELECT - cc_call_center_id Call_Center, - cc_name Call_Center_Name, - cc_manager Manager, - sum(cr_net_loss) Returns_Loss -FROM - call_center, catalog_returns, date_dim, customer, customer_address, - customer_demographics, household_demographics -WHERE - cr_call_center_sk = cc_call_center_sk - AND cr_returned_date_sk = d_date_sk - AND cr_returning_customer_sk = c_customer_sk - AND cd_demo_sk = c_current_cdemo_sk - AND hd_demo_sk = c_current_hdemo_sk - AND ca_address_sk = c_current_addr_sk - AND d_year = 1998 - AND d_moy = 11 - AND ((cd_marital_status = 'M' AND cd_education_status = 'Unknown') - OR (cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree')) - AND hd_buy_potential LIKE 'Unknown%' - AND ca_gmt_offset = -7 -GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status -ORDER BY sum(cr_net_loss) DESC diff --git a/src/test/resources/tpcds/queries/q92.sql b/src/test/resources/tpcds/queries/q92.sql deleted file mode 100644 index 99129c3bd..000000000 --- a/src/test/resources/tpcds/queries/q92.sql +++ /dev/null @@ -1,16 +0,0 @@ -SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount ` -FROM web_sales, item, date_dim -WHERE i_manufact_id = 350 - AND i_item_sk = ws_item_sk - AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) - AND d_date_sk = ws_sold_date_sk - AND ws_ext_discount_amt > - ( - SELECT 1.3 * avg(ws_ext_discount_amt) - FROM web_sales, date_dim - WHERE ws_item_sk = i_item_sk - AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) - AND d_date_sk = ws_sold_date_sk - ) -ORDER BY sum(ws_ext_discount_amt) -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q93.sql b/src/test/resources/tpcds/queries/q93.sql deleted file mode 100644 index 222dc31c1..000000000 --- a/src/test/resources/tpcds/queries/q93.sql +++ /dev/null @@ -1,19 +0,0 @@ -SELECT - ss_customer_sk, - sum(act_sales) sumsales -FROM (SELECT - ss_item_sk, - ss_ticket_number, - ss_customer_sk, - CASE WHEN sr_return_quantity IS NOT NULL - THEN (ss_quantity - sr_return_quantity) * ss_sales_price - ELSE (ss_quantity * ss_sales_price) END act_sales -FROM store_sales - LEFT OUTER JOIN store_returns - ON (sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number) - , - reason -WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 28') t -GROUP BY ss_customer_sk -ORDER BY sumsales, ss_customer_sk -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q94.sql b/src/test/resources/tpcds/queries/q94.sql deleted file mode 100644 index d6de3d75b..000000000 --- a/src/test/resources/tpcds/queries/q94.sql +++ /dev/null @@ -1,23 +0,0 @@ -SELECT - count(DISTINCT ws_order_number) AS `order count `, - sum(ws_ext_ship_cost) AS `total shipping cost `, - sum(ws_net_profit) AS `total net profit ` -FROM - web_sales ws1, date_dim, customer_address, web_site -WHERE - d_date BETWEEN '1999-02-01' AND - (CAST('1999-02-01' AS DATE) + INTERVAL 60 days) - AND ws1.ws_ship_date_sk = d_date_sk - AND ws1.ws_ship_addr_sk = ca_address_sk - AND ca_state = 'IL' - AND ws1.ws_web_site_sk = web_site_sk - AND web_company_name = 'pri' - AND EXISTS(SELECT * - FROM web_sales ws2 - WHERE ws1.ws_order_number = ws2.ws_order_number - AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) - AND NOT EXISTS(SELECT * - FROM web_returns wr1 - WHERE ws1.ws_order_number = wr1.wr_order_number) -ORDER BY count(DISTINCT ws_order_number) -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q95.sql b/src/test/resources/tpcds/queries/q95.sql deleted file mode 100644 index df71f00bd..000000000 --- a/src/test/resources/tpcds/queries/q95.sql +++ /dev/null @@ -1,29 +0,0 @@ -WITH ws_wh AS -(SELECT - ws1.ws_order_number, - ws1.ws_warehouse_sk wh1, - ws2.ws_warehouse_sk wh2 - FROM web_sales ws1, web_sales ws2 - WHERE ws1.ws_order_number = ws2.ws_order_number - AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) -SELECT - count(DISTINCT ws_order_number) AS `order count `, - sum(ws_ext_ship_cost) AS `total shipping cost `, - sum(ws_net_profit) AS `total net profit ` -FROM - web_sales ws1, date_dim, customer_address, web_site -WHERE - d_date BETWEEN '1999-02-01' AND - (CAST('1999-02-01' AS DATE) + INTERVAL 60 DAY) - AND ws1.ws_ship_date_sk = d_date_sk - AND ws1.ws_ship_addr_sk = ca_address_sk - AND ca_state = 'IL' - AND ws1.ws_web_site_sk = web_site_sk - AND web_company_name = 'pri' - AND ws1.ws_order_number IN (SELECT ws_order_number - FROM ws_wh) - AND ws1.ws_order_number IN (SELECT wr_order_number - FROM web_returns, ws_wh - WHERE wr_order_number = ws_wh.ws_order_number) -ORDER BY count(DISTINCT ws_order_number) -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q96.sql b/src/test/resources/tpcds/queries/q96.sql deleted file mode 100644 index 7ab17e7bc..000000000 --- a/src/test/resources/tpcds/queries/q96.sql +++ /dev/null @@ -1,11 +0,0 @@ -SELECT count(*) -FROM store_sales, household_demographics, time_dim, store -WHERE ss_sold_time_sk = time_dim.t_time_sk - AND ss_hdemo_sk = household_demographics.hd_demo_sk - AND ss_store_sk = s_store_sk - AND time_dim.t_hour = 20 - AND time_dim.t_minute >= 30 - AND household_demographics.hd_dep_count = 7 - AND store.s_store_name = 'ese' -ORDER BY count(*) -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q97.sql b/src/test/resources/tpcds/queries/q97.sql deleted file mode 100644 index e7e0b1a05..000000000 --- a/src/test/resources/tpcds/queries/q97.sql +++ /dev/null @@ -1,30 +0,0 @@ -WITH ssci AS ( - SELECT - ss_customer_sk customer_sk, - ss_item_sk item_sk - FROM store_sales, date_dim - WHERE ss_sold_date_sk = d_date_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11 - GROUP BY ss_customer_sk, ss_item_sk), - csci AS ( - SELECT - cs_bill_customer_sk customer_sk, - cs_item_sk item_sk - FROM catalog_sales, date_dim - WHERE cs_sold_date_sk = d_date_sk - AND d_month_seq BETWEEN 1200 AND 1200 + 11 - GROUP BY cs_bill_customer_sk, cs_item_sk) -SELECT - sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL - THEN 1 - ELSE 0 END) store_only, - sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL - THEN 1 - ELSE 0 END) catalog_only, - sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL - THEN 1 - ELSE 0 END) store_and_catalog -FROM ssci - FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk - AND ssci.item_sk = csci.item_sk) -LIMIT 100 diff --git a/src/test/resources/tpcds/queries/q98.sql b/src/test/resources/tpcds/queries/q98.sql deleted file mode 100644 index bb10d4bf8..000000000 --- a/src/test/resources/tpcds/queries/q98.sql +++ /dev/null @@ -1,21 +0,0 @@ -SELECT - i_item_desc, - i_category, - i_class, - i_current_price, - sum(ss_ext_sales_price) AS itemrevenue, - sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) - OVER - (PARTITION BY i_class) AS revenueratio -FROM - store_sales, item, date_dim -WHERE - ss_item_sk = i_item_sk - AND i_category IN ('Sports', 'Books', 'Home') - AND ss_sold_date_sk = d_date_sk - AND d_date BETWEEN cast('1999-02-22' AS DATE) - AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) -GROUP BY - i_item_id, i_item_desc, i_category, i_class, i_current_price -ORDER BY - i_category, i_class, i_item_id, i_item_desc, revenueratio diff --git a/src/test/resources/tpcds/queries/q99.sql b/src/test/resources/tpcds/queries/q99.sql deleted file mode 100644 index f1a3d4d2b..000000000 --- a/src/test/resources/tpcds/queries/q99.sql +++ /dev/null @@ -1,34 +0,0 @@ -SELECT - substr(w_warehouse_name, 1, 20), - sm_type, - cc_name, - sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30) - THEN 1 - ELSE 0 END) AS `30 days `, - sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND - (cs_ship_date_sk - cs_sold_date_sk <= 60) - THEN 1 - ELSE 0 END) AS `31 - 60 days `, - sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND - (cs_ship_date_sk - cs_sold_date_sk <= 90) - THEN 1 - ELSE 0 END) AS `61 - 90 days `, - sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND - (cs_ship_date_sk - cs_sold_date_sk <= 120) - THEN 1 - ELSE 0 END) AS `91 - 120 days `, - sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120) - THEN 1 - ELSE 0 END) AS `>120 days ` -FROM - catalog_sales, warehouse, ship_mode, call_center, date_dim -WHERE - d_month_seq BETWEEN 1200 AND 1200 + 11 - AND cs_ship_date_sk = d_date_sk - AND cs_warehouse_sk = w_warehouse_sk - AND cs_ship_mode_sk = sm_ship_mode_sk - AND cs_call_center_sk = cc_call_center_sk -GROUP BY - substr(w_warehouse_name, 1, 20), sm_type, cc_name -ORDER BY substr(w_warehouse_name, 1, 20), sm_type, cc_name -LIMIT 100 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt deleted file mode 100644 index 1e2caffbe..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt +++ /dev/null @@ -1,43 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], output=[c_customer_id#1]) -+- *(9) Project [c_customer_id#1] - +- *(9) BroadcastHashJoin [ctr_customer_sk#2], [cast(c_customer_sk#3 as bigint)], Inner, BuildRight - :- *(9) Project [ctr_customer_sk#2] - : +- *(9) BroadcastHashJoin [ctr_store_sk#4], [cast(s_store_sk#5 as bigint)], Inner, BuildRight - : :- *(9) Project [ctr_customer_sk#2, ctr_store_sk#4] - : : +- *(9) BroadcastHashJoin [ctr_store_sk#4], [ctr_store_sk#4#6], Inner, BuildRight, (cast(ctr_total_return#7 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#8) - : : :- *(9) Filter isnotnull(ctr_total_return#7) - : : : +- *(9) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[sum(UnscaledValue(sr_return_amt#11))]) - : : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 5) - : : : +- *(2) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[partial_sum(UnscaledValue(sr_return_amt#11))]) - : : : +- *(2) Project [sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] - : : : +- *(2) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight - : : : :- *(2) Project [sr_returned_date_sk#12, sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] - : : : : +- *(2) Filter ((isnotnull(sr_returned_date_sk#12) && isnotnull(sr_store_sk#10)) && isnotnull(sr_customer_sk#9)) - : : : : +- *(2) FileScan parquet default.store_returns[sr_returned_date_sk#12,sr_customer_sk#9,sr_store_sk#10,sr_return_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true])) - : : +- *(6) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#8) - : : +- *(6) HashAggregate(keys=[ctr_store_sk#4], functions=[avg(ctr_total_return#7)]) - : : +- Exchange hashpartitioning(ctr_store_sk#4, 5) - : : +- *(5) HashAggregate(keys=[ctr_store_sk#4], functions=[partial_avg(ctr_total_return#7)]) - : : +- *(5) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[sum(UnscaledValue(sr_return_amt#11))]) - : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 5) - : : +- *(4) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[partial_sum(UnscaledValue(sr_return_amt#11))]) - : : +- *(4) Project [sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] - : : +- *(4) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight - : : :- *(4) Project [sr_returned_date_sk#12, sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] - : : : +- *(4) Filter (isnotnull(sr_returned_date_sk#12) && isnotnull(sr_store_sk#10)) - : : : +- *(4) FileScan parquet default.store_returns[sr_returned_date_sk#12,sr_customer_sk#9,sr_store_sk#10,sr_return_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [c_customer_sk#3, c_customer_id#1] - +- *(8) Filter isnotnull(c_customer_sk#3) - +- *(8) FileScan parquet default.customer[c_customer_sk#3,c_customer_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/simplified.txt deleted file mode 100644 index 4f838850c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/simplified.txt +++ /dev/null @@ -1,58 +0,0 @@ -TakeOrderedAndProject [c_customer_id] - WholeStageCodegen - Project [c_customer_id] - BroadcastHashJoin [c_customer_sk,ctr_customer_sk] - Project [ctr_customer_sk] - BroadcastHashJoin [ctr_store_sk,s_store_sk] - Project [ctr_customer_sk,ctr_store_sk] - BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_store_sk,ctr_store_skL,ctr_total_return] - Filter [ctr_total_return] - HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_customer_sk,ctr_store_sk,ctr_total_return,sum,sum(UnscaledValue(sr_return_amt))] - InputAdapter - Exchange [sr_customer_sk,sr_store_sk] #1 - WholeStageCodegen - HashAggregate [sr_customer_sk,sr_return_amt,sr_store_sk,sum,sum] [sum,sum] - Project [sr_customer_sk,sr_return_amt,sr_store_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] - Filter [sr_customer_sk,sr_returned_date_sk,sr_store_sk] - Scan parquet default.store_returns [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [avg(ctr_total_return),count,ctr_store_sk,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_store_skL,sum] - InputAdapter - Exchange [ctr_store_sk] #4 - WholeStageCodegen - HashAggregate [count,count,ctr_store_sk,ctr_total_return,sum,sum] [count,count,sum,sum] - HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_store_sk,ctr_total_return,sum,sum(UnscaledValue(sr_return_amt))] - InputAdapter - Exchange [sr_customer_sk,sr_store_sk] #5 - WholeStageCodegen - HashAggregate [sr_customer_sk,sr_return_amt,sr_store_sk,sum,sum] [sum,sum] - Project [sr_customer_sk,sr_return_amt,sr_store_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] - Filter [sr_returned_date_sk,sr_store_sk] - Scan parquet default.store_returns [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #2 - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [s_store_sk] - Filter [s_state,s_store_sk] - Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [c_customer_id,c_customer_sk] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_id,c_customer_sk] [c_customer_id,c_customer_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt deleted file mode 100644 index 678ed7254..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt +++ /dev/null @@ -1,49 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST,cd_dep_count#6 ASC NULLS FIRST,cd_dep_employed_count#7 ASC NULLS FIRST,cd_dep_college_count#8 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#9,cd_purchase_estimate#4,cnt2#10,cd_credit_rating#5,cnt3#11,cd_dep_count#6,cnt4#12,cd_dep_employed_count#7,cnt5#13,cd_dep_college_count#8,cnt6#14]) -+- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[count(1)]) - +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8, 5) - +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[partial_count(1)]) - +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] - +- *(9) BroadcastHashJoin [c_current_cdemo_sk#15], [cd_demo_sk#16], Inner, BuildRight - :- *(9) Project [c_current_cdemo_sk#15] - : +- *(9) BroadcastHashJoin [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight - : :- *(9) Project [c_current_cdemo_sk#15, c_current_addr_sk#17] - : : +- *(9) Filter (exists#19 || exists#20) - : : +- *(9) BroadcastHashJoin [c_customer_sk#21], [cs_ship_customer_sk#22], ExistenceJoin(exists#20), BuildRight - : : :- *(9) BroadcastHashJoin [c_customer_sk#21], [ws_bill_customer_sk#23], ExistenceJoin(exists#19), BuildRight - : : : :- *(9) BroadcastHashJoin [c_customer_sk#21], [ss_customer_sk#24], LeftSemi, BuildRight - : : : : :- *(9) Project [c_customer_sk#21, c_current_cdemo_sk#15, c_current_addr_sk#17] - : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#17) && isnotnull(c_current_cdemo_sk#15)) - : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#21,c_current_cdemo_sk#15,c_current_addr_sk#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [ss_customer_sk#24] - : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#25], [d_date_sk#26], Inner, BuildRight - : : : : :- *(2) Project [ss_sold_date_sk#25, ss_customer_sk#24] - : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#25) - : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#25,ss_customer_sk#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [d_date_sk#26] - : : : : +- *(1) Filter (((((isnotnull(d_year#27) && isnotnull(d_moy#28)) && (d_year#27 = 2002)) && (d_moy#28 >= 1)) && (d_moy#28 <= 4)) && isnotnull(d_date_sk#26)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#26,d_year#27,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThan..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [ws_bill_customer_sk#23] - : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#26], Inner, BuildRight - : : : :- *(4) Project [ws_sold_date_sk#29, ws_bill_customer_sk#23] - : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#29) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_bill_customer_sk#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [cs_ship_customer_sk#22] - : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#30], [d_date_sk#26], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#30, cs_ship_customer_sk#22] - : : : +- *(6) Filter isnotnull(cs_sold_date_sk#30) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_ship_customer_sk#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [ca_address_sk#18] - : +- *(7) Filter (ca_county#31 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) && isnotnull(ca_address_sk#18)) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#18,ca_county#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_county, [Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County]), IsNo..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [cd_demo_sk#16, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] - +- *(8) Filter isnotnull(cd_demo_sk#16) - +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#16,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5,cd_dep_count#6,cd_dep_employed_count#7,cd_dep_college_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct 0.00) THEN CheckOverflow((promote_precision(year_total#5) / promote_precision(year_total#4)), DecimalType(38,20)) ELSE null END > CASE WHEN (year_total#6 > 0.00) THEN CheckOverflow((promote_precision(year_total#7) / promote_precision(year_total#6)), DecimalType(38,20)) ELSE null END) - :- *(17) Project [customer_id#2, year_total#6, customer_preferred_cust_flag#1, year_total#7, year_total#4] - : +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#8], Inner, BuildRight - : :- *(17) Project [customer_id#2, year_total#6, customer_preferred_cust_flag#1, year_total#7] - : : +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#9], Inner, BuildRight - : : :- Union - : : : :- *(4) Filter (isnotnull(year_total#6) && (year_total#6 > 0.00)) - : : : : +- *(4) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) - : : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 5) - : : : : +- *(3) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) - : : : : +- *(3) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] - : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight - : : : : :- *(3) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_sold_date_sk#20, ss_ext_discount_amt#19, ss_ext_list_price#18] - : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#22], [ss_customer_sk#23], Inner, BuildRight - : : : : : :- *(3) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] - : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) - : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : : +- LocalTableScan , [customer_id#24, year_total#25] - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : +- Union - : : :- *(8) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) - : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 5) - : : : +- *(7) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) - : : : +- *(7) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] - : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight - : : : :- *(7) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_sold_date_sk#20, ss_ext_discount_amt#19, ss_ext_list_price#18] - : : : : +- *(7) BroadcastHashJoin [c_customer_sk#22], [ss_customer_sk#23], Inner, BuildRight - : : : : :- *(7) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] - : : : : : +- *(7) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) - : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : +- LocalTableScan , [customer_id#24, customer_preferred_cust_flag#26, year_total#25] - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- Union - : :- LocalTableScan , [customer_id#8, year_total#4] - : +- *(12) Filter (isnotnull(year_total#25) && (year_total#25 > 0.00)) - : +- *(12) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) - : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 5) - : +- *(11) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) - : +- *(11) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] - : +- *(11) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight - : :- *(11) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_sold_date_sk#29, ws_ext_discount_amt#28, ws_ext_list_price#27] - : : +- *(11) BroadcastHashJoin [c_customer_sk#22], [ws_bill_customer_sk#30], Inner, BuildRight - : : :- *(11) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] - : : : +- *(11) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) - : : : +- *(11) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#3, year_total#5] - +- *(16) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) - +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 5) - +- *(15) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) - +- *(15) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] - +- *(15) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight - :- *(15) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_sold_date_sk#29, ws_ext_discount_amt#28, ws_ext_list_price#27] - : +- *(15) BroadcastHashJoin [c_customer_sk#22], [ws_bill_customer_sk#30], Inner, BuildRight - : :- *(15) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] - : : +- *(15) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) - : : +- *(15) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] - : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) - : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) - +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt deleted file mode 100644 index b815e3d91..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt +++ /dev/null @@ -1,34 +0,0 @@ -TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] - WholeStageCodegen - Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] - InputAdapter - Window [_w1,i_class] - WholeStageCodegen - Sort [i_class] - InputAdapter - Exchange [i_class] #1 - WholeStageCodegen - HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ws_ext_sales_price))] - InputAdapter - Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 - WholeStageCodegen - HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] - Filter [i_category,i_item_sk] - Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt deleted file mode 100644 index 1e61c9ee5..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt +++ /dev/null @@ -1,37 +0,0 @@ -== Physical Plan == -*(7) HashAggregate(keys=[], functions=[avg(cast(ss_quantity#1 as bigint)), avg(UnscaledValue(ss_ext_sales_price#2)), avg(UnscaledValue(ss_ext_wholesale_cost#3)), sum(UnscaledValue(ss_ext_wholesale_cost#3))]) -+- Exchange SinglePartition - +- *(6) HashAggregate(keys=[], functions=[partial_avg(cast(ss_quantity#1 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#2)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#3)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#3))]) - +- *(6) Project [ss_quantity#1, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] - +- *(6) BroadcastHashJoin [ss_hdemo_sk#4], [hd_demo_sk#5], Inner, BuildRight, (((((((cd_marital_status#6 = M) && (cd_education_status#7 = Advanced Degree)) && (ss_sales_price#8 >= 100.00)) && (ss_sales_price#8 <= 150.00)) && (hd_dep_count#9 = 3)) || (((((cd_marital_status#6 = S) && (cd_education_status#7 = College)) && (ss_sales_price#8 >= 50.00)) && (ss_sales_price#8 <= 100.00)) && (hd_dep_count#9 = 1))) || (((((cd_marital_status#6 = W) && (cd_education_status#7 = 2 yr Degree)) && (ss_sales_price#8 >= 150.00)) && (ss_sales_price#8 <= 200.00)) && (hd_dep_count#9 = 1))) - :- *(6) Project [ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, cd_marital_status#6, cd_education_status#7] - : +- *(6) BroadcastHashJoin [ss_cdemo_sk#10], [cd_demo_sk#11], Inner, BuildRight - : :- *(6) Project [ss_cdemo_sk#10, ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] - : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] - : : : +- *(6) BroadcastHashJoin [ss_addr_sk#14], [ca_address_sk#15], Inner, BuildRight, ((((ca_state#16 IN (TX,OH) && (ss_net_profit#17 >= 100.00)) && (ss_net_profit#17 <= 200.00)) || ((ca_state#16 IN (OR,NM,KY) && (ss_net_profit#17 >= 150.00)) && (ss_net_profit#17 <= 300.00))) || ((ca_state#16 IN (VA,TX,MS) && (ss_net_profit#17 >= 50.00)) && (ss_net_profit#17 <= 250.00))) - : : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_addr_sk#14, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, ss_net_profit#17] - : : : : +- *(6) BroadcastHashJoin [ss_store_sk#18], [s_store_sk#19], Inner, BuildRight - : : : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_addr_sk#14, ss_store_sk#18, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, ss_net_profit#17] - : : : : : +- *(6) Filter ((((isnotnull(ss_store_sk#18) && isnotnull(ss_addr_sk#14)) && isnotnull(ss_sold_date_sk#12)) && isnotnull(ss_cdemo_sk#10)) && isnotnull(ss_hdemo_sk#4)) - : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_cdemo_sk#10,ss_hdemo_sk#4,ss_addr_sk#14,ss_store_sk#18,ss_quantity#1,ss_sales_price#8,ss_ext_sales_price#2,ss_ext_wholesale_cost#3,ss_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [ca_address_sk#15, ca_state#16] - : : : +- *(2) Filter ((isnotnull(ca_country#20) && (ca_country#20 = United States)) && isnotnull(ca_address_sk#15)) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#13] - : : +- *(3) Filter ((isnotnull(d_year#21) && (d_year#21 = 2001)) && isnotnull(d_date_sk#13)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#13,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [cd_demo_sk#11, cd_marital_status#6, cd_education_status#7] - : +- *(4) Filter isnotnull(cd_demo_sk#11) - : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#6,cd_education_status#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [hd_demo_sk#5, hd_dep_count#9] - +- *(5) Filter isnotnull(hd_demo_sk#5) - +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#5,hd_dep_count#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt deleted file mode 100644 index cd146d06c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt +++ /dev/null @@ -1,49 +0,0 @@ -WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),avg(cast(ss_quantity as bigint)),count,count,count,sum,sum,sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost))] [avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),avg(cast(ss_quantity as bigint)),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),avg(ss_quantity),count,count,count,sum,sum,sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum(ss_ext_wholesale_cost)] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [count,count,count,count,count,count,ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] - Project [ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity] - BroadcastHashJoin [cd_education_status,cd_marital_status,hd_demo_sk,hd_dep_count,ss_hdemo_sk,ss_sales_price] - Project [cd_education_status,cd_marital_status,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] - Project [ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] - BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] - Project [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_addr_sk,ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ca_address_sk,ca_state] - Filter [ca_address_sk,ca_country] - Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [cd_demo_sk,cd_education_status,cd_marital_status] - Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [hd_demo_sk,hd_dep_count] - Filter [hd_demo_sk] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt deleted file mode 100644 index 0acde64df..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt +++ /dev/null @@ -1,192 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sum(sales)#5,sum(number_sales)#6]) -+- *(80) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[sum(sales#8), sum(number_sales#9)]) - +- Exchange hashpartitioning(channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7, 5) - +- *(79) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[partial_sum(sales#8), partial_sum(number_sales#9)]) - +- *(79) Expand [List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13, 0), List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, null, 1), List(sales#8, number_sales#9, channel#10, i_brand_id#11, null, null, 3), List(sales#8, number_sales#9, channel#10, null, null, null, 7), List(sales#8, number_sales#9, null, null, null, null, 15)], [sales#8, number_sales#9, channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7] - +- Union - :- *(26) Project [sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(26) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16 as decimal(32,6)) > cast(Subquery subquery1662 as decimal(32,6)))) - : : +- Subquery subquery1662 - : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Exchange SinglePartition - : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Union - : : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] - : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] - : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#20] - : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] - : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight - : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] - : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) - : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] - : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] - : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(26) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) - : +- *(25) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) - : +- *(25) Project [ss_quantity#14, ss_list_price#15, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(25) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : :- *(25) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15, i_brand_id#11, i_class_id#12, i_category_id#13] - : : +- *(25) BroadcastHashJoin [ss_item_sk#32], [i_item_sk#33], Inner, BuildRight - : : :- *(25) BroadcastHashJoin [ss_item_sk#32], [ss_item_sk#34], LeftSemi, BuildRight - : : : :- *(25) Project [ss_sold_date_sk#19, ss_item_sk#32, ss_quantity#14, ss_list_price#15] - : : : : +- *(25) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) - : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(11) Project [i_item_sk#33 AS ss_item_sk#34] - : : : +- *(11) BroadcastHashJoin [i_brand_id#11, i_class_id#12, i_category_id#13], [brand_id#35, class_id#36, category_id#37], Inner, BuildRight - : : : :- *(11) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : +- *(11) Filter ((isnotnull(i_class_id#12) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) - : : : : +- *(11) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) - : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) - : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) - : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) - : : : :- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) - : : : : +- Exchange hashpartitioning(brand_id#35, class_id#36, category_id#37, 5) - : : : : +- *(6) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) - : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) - : : : : :- *(6) Project [i_brand_id#11 AS brand_id#35, i_class_id#12 AS class_id#36, i_category_id#13 AS category_id#37] - : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : : : : :- *(6) Project [ss_sold_date_sk#19, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#32], [i_item_sk#33], Inner, BuildRight - : : : : : : :- *(6) Project [ss_sold_date_sk#19, ss_item_sk#32] - : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) - : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- *(1) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#33) && isnotnull(i_class_id#12)) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) - : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(2) Project [d_date_sk#20] - : : : : : +- *(2) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) - : : : : +- *(5) Project [i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight - : : : : :- *(5) Project [cs_sold_date_sk#26, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#38], [i_item_sk#33], Inner, BuildRight - : : : : : :- *(5) Project [cs_sold_date_sk#26, cs_item_sk#38] - : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) - : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(3) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : : +- *(3) Filter isnotnull(i_item_sk#33) - : : : : : +- *(3) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) - : : : +- *(9) Project [i_brand_id#11, i_class_id#12, i_category_id#13] - : : : +- *(9) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight - : : : :- *(9) Project [ws_sold_date_sk#31, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : +- *(9) BroadcastHashJoin [ws_item_sk#39], [i_item_sk#33], Inner, BuildRight - : : : : :- *(9) Project [ws_sold_date_sk#31, ws_item_sk#39] - : : : : : +- *(9) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) - : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(23) BroadcastHashJoin [i_item_sk#33], [ss_item_sk#34], LeftSemi, BuildRight - : : :- *(23) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : +- *(23) Filter isnotnull(i_item_sk#33) - : : : +- *(23) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(24) Project [d_date_sk#20] - : +- *(24) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#40)) && (d_year#21 = 2001)) && (d_moy#40 = 11)) && isnotnull(d_date_sk#20)) - : +- *(24) FileScan parquet default.date_dim[d_date_sk#20,d_year#21,d_moy#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct - :- *(52) Project [sales#41, number_sales#42, channel#43, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44) && (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44 as decimal(32,6)) > cast(Subquery subquery1667 as decimal(32,6)))) - : : +- Subquery subquery1667 - : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Exchange SinglePartition - : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Union - : : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] - : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] - : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#20] - : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] - : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight - : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] - : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) - : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] - : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] - : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(52) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) - : +- *(51) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) - : +- *(51) Project [cs_quantity#22, cs_list_price#24, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(51) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight - : :- *(51) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24, i_brand_id#11, i_class_id#12, i_category_id#13] - : : +- *(51) BroadcastHashJoin [cs_item_sk#38], [i_item_sk#33], Inner, BuildRight - : : :- *(51) BroadcastHashJoin [cs_item_sk#38], [ss_item_sk#34], LeftSemi, BuildRight - : : : :- *(51) Project [cs_sold_date_sk#26, cs_item_sk#38, cs_quantity#22, cs_list_price#24] - : : : : +- *(51) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) - : : : : +- *(51) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(78) Project [sales#45, number_sales#46, channel#47, i_brand_id#11, i_class_id#12, i_category_id#13] - +- *(78) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48 as decimal(32,6)) > cast(Subquery subquery1672 as decimal(32,6)))) - : +- Subquery subquery1672 - : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : +- Exchange SinglePartition - : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : +- Union - : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] - : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] - : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) - : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [d_date_sk#20] - : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] - : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight - : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] - : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) - : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] - : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight - : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] - : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) - : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(78) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) - +- *(77) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) - +- *(77) Project [ws_quantity#27, ws_list_price#29, i_brand_id#11, i_class_id#12, i_category_id#13] - +- *(77) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight - :- *(77) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(77) BroadcastHashJoin [ws_item_sk#39], [i_item_sk#33], Inner, BuildRight - : :- *(77) BroadcastHashJoin [ws_item_sk#39], [ss_item_sk#34], LeftSemi, BuildRight - : : :- *(77) Project [ws_sold_date_sk#31, ws_item_sk#39, ws_quantity#27, ws_list_price#29] - : : : +- *(77) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) - : : : +- *(77) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt deleted file mode 100644 index 837757bfe..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt +++ /dev/null @@ -1,267 +0,0 @@ -TakeOrderedAndProject [channel,i_brand_id,i_category_id,i_class_id,sum(number_sales),sum(sales)] - WholeStageCodegen - HashAggregate [channel,i_brand_id,i_category_id,i_class_id,spark_grouping_id,sum,sum,sum(number_salesL),sum(sales)] [sum,sum,sum(number_salesL),sum(number_sales),sum(sales),sum(sales)] - InputAdapter - Exchange [channel,i_brand_id,i_category_id,i_class_id,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales,spark_grouping_id,sum,sum,sum,sum] [sum,sum,sum,sum] - Expand [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] - InputAdapter - Union - WholeStageCodegen - Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] - Subquery #1 - WholeStageCodegen - HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] - InputAdapter - Exchange #13 - WholeStageCodegen - HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] - InputAdapter - Union - WholeStageCodegen - Project [ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] - InputAdapter - BroadcastExchange #14 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - WholeStageCodegen - Project [cs_list_price,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - WholeStageCodegen - Project [ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] - InputAdapter - Exchange [i_brand_id,i_category_id,i_class_id] #2 - WholeStageCodegen - HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - BroadcastHashJoin [ss_item_sk,ss_item_sk] - Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_item_sk] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_brand_id,i_category_id,i_class_id] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - HashAggregate [brand_id,category_id,class_id] - HashAggregate [brand_id,category_id,class_id] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - HashAggregate [brand_id,category_id,class_id] - InputAdapter - Exchange [brand_id,category_id,class_id] #5 - WholeStageCodegen - HashAggregate [brand_id,category_id,class_id] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - Project [i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] [cs_item_sk,cs_sold_date_sk] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #7 - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_item_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] [ws_item_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #9 - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #7 - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] - InputAdapter - ReusedExchange [ss_item_sk] [ss_item_sk] #3 - InputAdapter - BroadcastExchange #12 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - WholeStageCodegen - Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] - Subquery #2 - WholeStageCodegen - HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] - InputAdapter - Exchange #13 - WholeStageCodegen - HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] - InputAdapter - Union - WholeStageCodegen - Project [ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] - InputAdapter - BroadcastExchange #14 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - WholeStageCodegen - Project [cs_list_price,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - WholeStageCodegen - Project [ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] - InputAdapter - Exchange [i_brand_id,i_category_id,i_class_id] #15 - WholeStageCodegen - HashAggregate [count,count,cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id,sum,sum] [count,count,sum,sum] - Project [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - BroadcastHashJoin [cs_item_sk,ss_item_sk] - Project [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] - InputAdapter - ReusedExchange [ss_item_sk] [ss_item_sk] #3 - InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #11 - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #12 - WholeStageCodegen - Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] - Subquery #3 - WholeStageCodegen - HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] - InputAdapter - Exchange #13 - WholeStageCodegen - HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] - InputAdapter - Union - WholeStageCodegen - Project [ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] - InputAdapter - BroadcastExchange #14 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - WholeStageCodegen - Project [cs_list_price,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - WholeStageCodegen - Project [ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] - InputAdapter - Exchange [i_brand_id,i_category_id,i_class_id] #16 - WholeStageCodegen - HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,sum,sum,ws_list_price,ws_quantity] [count,count,sum,sum] - Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - BroadcastHashJoin [ss_item_sk,ws_item_sk] - Project [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] - InputAdapter - ReusedExchange [ss_item_sk] [ss_item_sk] #3 - InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #11 - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #12 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt deleted file mode 100644 index 858eda8d6..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt +++ /dev/null @@ -1,166 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sales#5,number_sales#6,channel#7,i_brand_id#8,i_class_id#9,i_category_id#10,sales#11,number_sales#12]) -+- *(52) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [i_brand_id#8, i_class_id#9, i_category_id#10], Inner, BuildRight - :- *(52) Project [channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, sales#5, number_sales#6] - : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15 as decimal(32,6)) > cast(Subquery subquery1884 as decimal(32,6)))) - : : +- Subquery subquery1884 - : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Exchange SinglePartition - : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Union - : : :- *(2) Project [ss_quantity#13 AS quantity#16, ss_list_price#14 AS list_price#17] - : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight - : : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] - : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#19] - : : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] - : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight - : : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] - : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) - : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] - : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] - : : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(52) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - : +- Exchange hashpartitioning(i_brand_id#2, i_class_id#3, i_category_id#4, 5) - : +- *(25) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) - : +- *(25) Project [ss_quantity#13, ss_list_price#14, i_brand_id#2, i_class_id#3, i_category_id#4] - : +- *(25) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight - : :- *(25) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14, i_brand_id#2, i_class_id#3, i_category_id#4] - : : +- *(25) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#32], Inner, BuildRight - : : :- *(25) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight - : : : :- *(25) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] - : : : : +- *(25) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) - : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(11) Project [i_item_sk#32 AS ss_item_sk#33] - : : : +- *(11) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [brand_id#34, class_id#35, category_id#36], Inner, BuildRight - : : : :- *(11) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : +- *(11) Filter ((isnotnull(i_class_id#3) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) - : : : : +- *(11) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) - : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) - : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) - : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) - : : : :- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) - : : : : +- Exchange hashpartitioning(brand_id#34, class_id#35, category_id#36, 5) - : : : : +- *(6) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) - : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) - : : : : :- *(6) Project [i_brand_id#2 AS brand_id#34, i_class_id#3 AS class_id#35, i_category_id#4 AS category_id#36] - : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight - : : : : : :- *(6) Project [ss_sold_date_sk#18, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#32], Inner, BuildRight - : : : : : : :- *(6) Project [ss_sold_date_sk#18, ss_item_sk#31] - : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) - : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- *(1) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) - : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(2) Project [d_date_sk#19] - : : : : : +- *(2) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) - : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) - : : : : +- *(5) Project [i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight - : : : : :- *(5) Project [cs_sold_date_sk#25, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#37], [i_item_sk#32], Inner, BuildRight - : : : : : :- *(5) Project [cs_sold_date_sk#25, cs_item_sk#37] - : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#37) && isnotnull(cs_sold_date_sk#25)) - : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_item_sk#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(3) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : : +- *(3) Filter isnotnull(i_item_sk#32) - : : : : : +- *(3) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) - : : : +- *(9) Project [i_brand_id#2, i_class_id#3, i_category_id#4] - : : : +- *(9) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight - : : : :- *(9) Project [ws_sold_date_sk#30, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : +- *(9) BroadcastHashJoin [ws_item_sk#38], [i_item_sk#32], Inner, BuildRight - : : : : :- *(9) Project [ws_sold_date_sk#30, ws_item_sk#38] - : : : : : +- *(9) Filter (isnotnull(ws_item_sk#38) && isnotnull(ws_sold_date_sk#30)) - : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_item_sk#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : : +- ReusedExchange [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(23) BroadcastHashJoin [i_item_sk#32], [ss_item_sk#33], LeftSemi, BuildRight - : : :- *(23) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : +- *(23) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_category_id#4)) && isnotnull(i_brand_id#2)) - : : : +- *(23) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)], ReadSchema: struct - : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(24) Project [d_date_sk#19] - : +- *(24) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1883)) && isnotnull(d_date_sk#19)) - : : +- Subquery subquery1883 - : : +- *(1) Project [d_week_seq#39] - : : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct - : +- *(24) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct - : +- Subquery subquery1883 - : +- *(1) Project [d_week_seq#39] - : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true])) - +- *(51) Project [channel#7, i_brand_id#8, i_class_id#9, i_category_id#10, sales#11, number_sales#12] - +- *(51) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42 as decimal(32,6)) > cast(Subquery subquery1890 as decimal(32,6)))) - : +- Subquery subquery1890 - : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) - : +- Exchange SinglePartition - : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) - : +- Union - : :- *(2) Project [ss_quantity#13 AS quantity#16, ss_list_price#14 AS list_price#17] - : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight - : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] - : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) - : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [d_date_sk#19] - : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] - : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight - : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] - : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) - : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] - : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight - : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] - : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) - : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(51) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - +- Exchange hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, 5) - +- *(50) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) - +- *(50) Project [ss_quantity#13, ss_list_price#14, i_brand_id#8, i_class_id#9, i_category_id#10] - +- *(50) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight - :- *(50) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14, i_brand_id#8, i_class_id#9, i_category_id#10] - : +- *(50) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#43], Inner, BuildRight - : :- *(50) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight - : : :- *(50) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] - : : : +- *(50) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) - : : : +- *(50) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [i_item_sk#43, i_brand_id#8, i_class_id#9, i_category_id#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(49) Project [d_date_sk#19] - +- *(49) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1889)) && isnotnull(d_date_sk#19)) - : +- Subquery subquery1889 - : +- *(1) Project [d_week_seq#39] - : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct - +- *(49) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct - +- Subquery subquery1889 - +- *(1) Project [d_week_seq#39] - +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt deleted file mode 100644 index ee18cd267..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt +++ /dev/null @@ -1,226 +0,0 @@ -TakeOrderedAndProject [channel,channel,i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,number_sales,number_sales,sales,sales] - WholeStageCodegen - BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id] - Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] - Subquery #2 - WholeStageCodegen - HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] - InputAdapter - Exchange #12 - WholeStageCodegen - HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] - InputAdapter - Union - WholeStageCodegen - Project [ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - WholeStageCodegen - Project [cs_list_price,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #13 - WholeStageCodegen - Project [ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #13 - HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] - InputAdapter - Exchange [i_brand_id,i_category_id,i_class_id] #1 - WholeStageCodegen - HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - BroadcastHashJoin [ss_item_sk,ss_item_sk] - Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [i_item_sk] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_brand_id,i_category_id,i_class_id] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - HashAggregate [brand_id,category_id,class_id] - HashAggregate [brand_id,category_id,class_id] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - HashAggregate [brand_id,category_id,class_id] - InputAdapter - Exchange [brand_id,category_id,class_id] #4 - WholeStageCodegen - HashAggregate [brand_id,category_id,class_id] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - Project [i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] [cs_item_sk,cs_sold_date_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #6 - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_item_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] [ws_item_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #8 - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #6 - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] - InputAdapter - ReusedExchange [ss_item_sk] [ss_item_sk] #2 - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_week_seq] - Subquery #1 - WholeStageCodegen - Project [d_week_seq] - Filter [d_dom,d_moy,d_year] - Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] - Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] - Subquery #1 - WholeStageCodegen - Project [d_week_seq] - Filter [d_dom,d_moy,d_year] - Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] - InputAdapter - BroadcastExchange #14 - WholeStageCodegen - Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] - Subquery #4 - WholeStageCodegen - HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] - InputAdapter - Exchange #12 - WholeStageCodegen - HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] - InputAdapter - Union - WholeStageCodegen - Project [ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - WholeStageCodegen - Project [cs_list_price,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #13 - WholeStageCodegen - Project [ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #13 - HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] - InputAdapter - Exchange [i_brand_id,i_category_id,i_class_id] #15 - WholeStageCodegen - HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - BroadcastHashJoin [ss_item_sk,ss_item_sk] - Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] - InputAdapter - ReusedExchange [ss_item_sk] [ss_item_sk] #2 - InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #10 - InputAdapter - BroadcastExchange #16 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_week_seq] - Subquery #3 - WholeStageCodegen - Project [d_week_seq] - Filter [d_dom,d_moy,d_year] - Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] - Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] - Subquery #3 - WholeStageCodegen - Project [d_week_seq] - Filter [d_dom,d_moy,d_year] - Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt deleted file mode 100644 index d70d7905c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt +++ /dev/null @@ -1,26 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST], output=[ca_zip#1,sum(cs_sales_price)#2]) -+- *(5) HashAggregate(keys=[ca_zip#1], functions=[sum(UnscaledValue(cs_sales_price#3))]) - +- Exchange hashpartitioning(ca_zip#1, 5) - +- *(4) HashAggregate(keys=[ca_zip#1], functions=[partial_sum(UnscaledValue(cs_sales_price#3))]) - +- *(4) Project [cs_sales_price#3, ca_zip#1] - +- *(4) BroadcastHashJoin [cs_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight - :- *(4) Project [cs_sold_date_sk#4, cs_sales_price#3, ca_zip#1] - : +- *(4) BroadcastHashJoin [c_current_addr_sk#6], [ca_address_sk#7], Inner, BuildRight, ((substring(ca_zip#1, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) || ca_state#8 IN (CA,WA,GA)) || (cs_sales_price#3 > 500.00)) - : :- *(4) Project [cs_sold_date_sk#4, cs_sales_price#3, c_current_addr_sk#6] - : : +- *(4) BroadcastHashJoin [cs_bill_customer_sk#9], [c_customer_sk#10], Inner, BuildRight - : : :- *(4) Project [cs_sold_date_sk#4, cs_bill_customer_sk#9, cs_sales_price#3] - : : : +- *(4) Filter (isnotnull(cs_bill_customer_sk#9) && isnotnull(cs_sold_date_sk#4)) - : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#9,cs_sales_price#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [c_customer_sk#10, c_current_addr_sk#6] - : : +- *(1) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#6)) - : : +- *(1) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [ca_address_sk#7, ca_state#8, ca_zip#1] - : +- *(2) Filter isnotnull(ca_address_sk#7) - : +- *(2) FileScan parquet default.customer_address[ca_address_sk#7,ca_state#8,ca_zip#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [d_date_sk#5] - +- *(3) Filter ((((isnotnull(d_qoy#11) && isnotnull(d_year#12)) && (d_qoy#11 = 2)) && (d_year#12 = 2001)) && isnotnull(d_date_sk#5)) - +- *(3) FileScan parquet default.date_dim[d_date_sk#5,d_year#12,d_qoy#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt deleted file mode 100644 index aa1157759..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt +++ /dev/null @@ -1,34 +0,0 @@ -TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] - WholeStageCodegen - HashAggregate [ca_zip,sum,sum(UnscaledValue(cs_sales_price))] [sum,sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price)] - InputAdapter - Exchange [ca_zip] #1 - WholeStageCodegen - HashAggregate [ca_zip,cs_sales_price,sum,sum] [sum,sum] - Project [ca_zip,cs_sales_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ca_zip,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_state,ca_zip,cs_sales_price] - Project [c_current_addr_sk,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ca_address_sk,ca_state,ca_zip] - Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip] [ca_address_sk,ca_state,ca_zip] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt deleted file mode 100644 index 589464c44..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt +++ /dev/null @@ -1,37 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) -+- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(cs_ext_ship_cost#4)), sum(UnscaledValue(cs_net_profit#5)), count(distinct cs_order_number#6)]) - +- Exchange SinglePartition - +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5)), partial_count(distinct cs_order_number#6)]) - +- *(7) HashAggregate(keys=[cs_order_number#6], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5))]) - +- Exchange hashpartitioning(cs_order_number#6, 5) - +- *(6) HashAggregate(keys=[cs_order_number#6], functions=[partial_sum(UnscaledValue(cs_ext_ship_cost#4)), partial_sum(UnscaledValue(cs_net_profit#5))]) - +- *(6) Project [cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] - +- *(6) BroadcastHashJoin [cs_call_center_sk#7], [cc_call_center_sk#8], Inner, BuildRight - :- *(6) Project [cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] - : +- *(6) BroadcastHashJoin [cs_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight - : :- *(6) Project [cs_ship_addr_sk#9, cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] - : : +- *(6) BroadcastHashJoin [cs_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight - : : :- *(6) BroadcastHashJoin [cs_order_number#6], [cr_order_number#13], LeftAnti, BuildRight - : : : :- *(6) Project [cs_ship_date_sk#11, cs_ship_addr_sk#9, cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] - : : : : +- *(6) BroadcastHashJoin [cs_order_number#6], [cs_order_number#6#14], LeftSemi, BuildRight, NOT (cs_warehouse_sk#15 = cs_warehouse_sk#15#16) - : : : : :- *(6) Project [cs_ship_date_sk#11, cs_ship_addr_sk#9, cs_call_center_sk#7, cs_warehouse_sk#15, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] - : : : : : +- *(6) Filter ((isnotnull(cs_ship_date_sk#11) && isnotnull(cs_ship_addr_sk#9)) && isnotnull(cs_call_center_sk#7)) - : : : : : +- *(6) FileScan parquet default.catalog_sales[cs_ship_date_sk#11,cs_ship_addr_sk#9,cs_call_center_sk#7,cs_warehouse_sk#15,cs_order_number#6,cs_ext_ship_cost#4,cs_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) FileScan parquet default.catalog_returns[cr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#12] - : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 2002-02-01)) && (d_date#17 <= 11779)) && isnotnull(d_date_sk#12)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [ca_address_sk#10] - : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = GA)) && isnotnull(ca_address_sk#10)) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [cc_call_center_sk#8] - +- *(5) Filter ((isnotnull(cc_county#19) && (cc_county#19 = Williamson County)) && isnotnull(cc_call_center_sk#8)) - +- *(5) FileScan parquet default.call_center[cc_call_center_sk#8,cc_county#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt deleted file mode 100644 index fe14e6333..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt +++ /dev/null @@ -1,51 +0,0 @@ -TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] - WholeStageCodegen - HashAggregate [count,count(cs_order_number),sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [count,count(cs_order_number),order count ,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),total net profit ,total shipping cost ] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [count,count(cs_order_number),cs_order_number,sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [count,count,count(cs_order_number),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] - HashAggregate [cs_order_number,sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] - InputAdapter - Exchange [cs_order_number] #2 - WholeStageCodegen - HashAggregate [cs_ext_ship_cost,cs_net_profit,cs_order_number,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] - Project [cs_ext_ship_cost,cs_net_profit,cs_order_number] - BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] - Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] - BroadcastHashJoin [ca_address_sk,cs_ship_addr_sk] - Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk] - BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - BroadcastHashJoin [cr_order_number,cs_order_number] - Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk] - BroadcastHashJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] - Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] - Filter [cs_call_center_sk,cs_ship_addr_sk,cs_ship_date_sk] - Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [cs_order_number,cs_warehouse_sk] - Scan parquet default.catalog_sales [cs_order_number,cs_warehouse_sk] [cs_order_number,cs_warehouse_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Scan parquet default.catalog_returns [cr_order_number] [cr_order_number] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [ca_address_sk] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [cc_call_center_sk] - Filter [cc_call_center_sk,cc_county] - Scan parquet default.call_center [cc_call_center_sk,cc_county] [cc_call_center_sk,cc_county] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt deleted file mode 100644 index 5d0ff39f9..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt +++ /dev/null @@ -1,50 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_state#3 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_state#3,store_sales_quantitycount#4,store_sales_quantityave#5,store_sales_quantitystdev#6,store_sales_quantitycov#7,as_store_returns_quantitycount#8,as_store_returns_quantityave#9,as_store_returns_quantitystdev#10,store_returns_quantitycov#11,catalog_sales_quantitycount#12,catalog_sales_quantityave#13,catalog_sales_quantitystdev#14,catalog_sales_quantitycov#15]) -+- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[count(ss_quantity#16), avg(cast(ss_quantity#16 as bigint)), stddev_samp(cast(ss_quantity#16 as double)), count(sr_return_quantity#17), avg(cast(sr_return_quantity#17 as bigint)), stddev_samp(cast(sr_return_quantity#17 as double)), count(cs_quantity#18), avg(cast(cs_quantity#18 as bigint)), stddev_samp(cast(cs_quantity#18 as double))]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_state#3, 5) - +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[partial_count(ss_quantity#16), partial_avg(cast(ss_quantity#16 as bigint)), partial_stddev_samp(cast(ss_quantity#16 as double)), partial_count(sr_return_quantity#17), partial_avg(cast(sr_return_quantity#17 as bigint)), partial_stddev_samp(cast(sr_return_quantity#17 as double)), partial_count(cs_quantity#18), partial_avg(cast(cs_quantity#18 as bigint)), partial_stddev_samp(cast(cs_quantity#18 as double))]) - +- *(8) Project [ss_quantity#16, sr_return_quantity#17, cs_quantity#18, s_state#3, i_item_id#1, i_item_desc#2] - +- *(8) BroadcastHashJoin [ss_item_sk#19], [i_item_sk#20], Inner, BuildRight - :- *(8) Project [ss_item_sk#19, ss_quantity#16, sr_return_quantity#17, cs_quantity#18, s_state#3] - : +- *(8) BroadcastHashJoin [ss_store_sk#21], [s_store_sk#22], Inner, BuildRight - : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_return_quantity#17, cs_quantity#18] - : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight - : : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] - : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight - : : : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] - : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#27], [d_date_sk#28], Inner, BuildRight - : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] - : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#29, sr_item_sk#30], [cast(cs_bill_customer_sk#31 as bigint), cast(cs_item_sk#32 as bigint)], Inner, BuildRight - : : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_item_sk#30, sr_customer_sk#29, sr_return_quantity#17] - : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#33 as bigint), cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#34 as bigint)], [sr_customer_sk#29, sr_item_sk#30, sr_ticket_number#35], Inner, BuildRight - : : : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_customer_sk#33, ss_store_sk#21, ss_ticket_number#34, ss_quantity#16] - : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#34) && isnotnull(ss_item_sk#19)) && isnotnull(ss_customer_sk#33)) && isnotnull(ss_sold_date_sk#27)) && isnotnull(ss_store_sk#21)) - : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#27,ss_item_sk#19,ss_customer_sk#33,ss_store_sk#21,ss_ticket_number#34,ss_quantity#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(3) Project [d_date_sk#28] - : : : : +- *(3) Filter ((isnotnull(d_quarter_name#36) && (d_quarter_name#36 = 2001Q1)) && isnotnull(d_date_sk#28)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#28,d_quarter_name#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [d_date_sk#26] - : : : +- *(4) Filter (d_quarter_name#37 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#26)) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#26,d_quarter_name#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [d_date_sk#24] - : : +- *(5) Filter (d_quarter_name#38 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#24)) - : : +- *(5) FileScan parquet default.date_dim[d_date_sk#24,d_quarter_name#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [s_store_sk#22, s_state#3] - : +- *(6) Filter isnotnull(s_store_sk#22) - : +- *(6) FileScan parquet default.store[s_store_sk#22,s_state#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [i_item_sk#20, i_item_id#1, i_item_desc#2] - +- *(7) Filter isnotnull(i_item_sk#20) - +- *(7) FileScan parquet default.item[i_item_sk#20,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt deleted file mode 100644 index 780e633e2..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt +++ /dev/null @@ -1,66 +0,0 @@ -TakeOrderedAndProject [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,i_item_desc,i_item_id,s_state,store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev] - WholeStageCodegen - HashAggregate [avg,avg,avg,avg(cast(cs_quantity as bigint)),avg(cast(sr_return_quantity as bigint)),avg(cast(ss_quantity as bigint)),count,count,count,count,count,count,count(cs_quantity),count(sr_return_quantity),count(ss_quantity),i_item_desc,i_item_id,m2,m2,m2,n,n,n,s_state,stddev_samp(cast(cs_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),stddev_samp(cast(ss_quantity as double)),sum,sum,sum] [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,avg,avg,avg,avg(cast(cs_quantity as bigint)),avg(cast(sr_return_quantity as bigint)),avg(cast(ss_quantity as bigint)),catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,count,count,count,count,count,count,count(cs_quantity),count(sr_return_quantity),count(ss_quantity),m2,m2,m2,n,n,n,stddev_samp(cast(cs_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),stddev_samp(cast(ss_quantity as double)),store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev,sum,sum,sum] - InputAdapter - Exchange [i_item_desc,i_item_id,s_state] #1 - WholeStageCodegen - HashAggregate [avg,avg,avg,avg,avg,avg,count,count,count,count,count,count,count,count,count,count,count,count,cs_quantity,i_item_desc,i_item_id,m2,m2,m2,m2,m2,m2,n,n,n,n,n,n,s_state,sr_return_quantity,ss_quantity,sum,sum,sum,sum,sum,sum] [avg,avg,avg,avg,avg,avg,count,count,count,count,count,count,count,count,count,count,count,count,m2,m2,m2,m2,m2,m2,n,n,n,n,n,n,sum,sum,sum,sum,sum,sum] - Project [cs_quantity,i_item_desc,i_item_id,s_state,sr_return_quantity,ss_quantity] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [cs_quantity,s_state,sr_return_quantity,ss_item_sk,ss_quantity] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [cs_quantity,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] - Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] - Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_quarter_name] - Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_quarter_name] - Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_quarter_name] - Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [s_state,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [i_item_desc,i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt deleted file mode 100644 index fd9786c06..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt +++ /dev/null @@ -1,45 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ca_country#1 ASC NULLS FIRST,ca_state#2 ASC NULLS FIRST,ca_county#3 ASC NULLS FIRST,i_item_id#4 ASC NULLS FIRST], output=[i_item_id#4,ca_country#1,ca_state#2,ca_county#3,agg1#5,agg2#6,agg3#7,agg4#8,agg5#9,agg6#10,agg7#11]) -+- *(8) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[avg(cast(cs_quantity#13 as decimal(12,2))), avg(cast(cs_list_price#14 as decimal(12,2))), avg(cast(cs_coupon_amt#15 as decimal(12,2))), avg(cast(cs_sales_price#16 as decimal(12,2))), avg(cast(cs_net_profit#17 as decimal(12,2))), avg(cast(c_birth_year#18 as decimal(12,2))), avg(cast(cd_dep_count#19 as decimal(12,2)))]) - +- Exchange hashpartitioning(i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12, 5) - +- *(7) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[partial_avg(cast(cs_quantity#13 as decimal(12,2))), partial_avg(cast(cs_list_price#14 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#15 as decimal(12,2))), partial_avg(cast(cs_sales_price#16 as decimal(12,2))), partial_avg(cast(cs_net_profit#17 as decimal(12,2))), partial_avg(cast(c_birth_year#18 as decimal(12,2))), partial_avg(cast(cd_dep_count#19 as decimal(12,2)))]) - +- *(7) Expand [List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, ca_county#23, 0), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, null, 1), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, null, null, 3), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, null, null, null, 7), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, null, null, null, null, 15)], [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12] - +- *(7) Project [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#24 AS i_item_id#20, ca_country#25 AS ca_country#21, ca_state#26 AS ca_state#22, ca_county#27 AS ca_county#23] - +- *(7) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#29], Inner, BuildRight - :- *(7) Project [cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, ca_county#27, ca_state#26, ca_country#25] - : +- *(7) BroadcastHashJoin [cs_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight - : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, ca_county#27, ca_state#26, ca_country#25] - : : +- *(7) BroadcastHashJoin [c_current_addr_sk#32], [ca_address_sk#33], Inner, BuildRight - : : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_current_addr_sk#32, c_birth_year#18] - : : : +- *(7) BroadcastHashJoin [c_current_cdemo_sk#34], [cd_demo_sk#35], Inner, BuildRight - : : : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_current_cdemo_sk#34, c_current_addr_sk#32, c_birth_year#18] - : : : : +- *(7) BroadcastHashJoin [cs_bill_customer_sk#36], [c_customer_sk#37], Inner, BuildRight - : : : : :- *(7) Project [cs_sold_date_sk#30, cs_bill_customer_sk#36, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19] - : : : : : +- *(7) BroadcastHashJoin [cs_bill_cdemo_sk#38], [cd_demo_sk#39], Inner, BuildRight - : : : : : :- *(7) Project [cs_sold_date_sk#30, cs_bill_customer_sk#36, cs_bill_cdemo_sk#38, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17] - : : : : : : +- *(7) Filter (((isnotnull(cs_bill_cdemo_sk#38) && isnotnull(cs_bill_customer_sk#36)) && isnotnull(cs_sold_date_sk#30)) && isnotnull(cs_item_sk#28)) - : : : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_bill_customer_sk#36,cs_bill_cdemo_sk#38,cs_item_sk#28,cs_quantity#13,cs_list_price#14,cs_sales_price#16,cs_coupon_amt#15,cs_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNu..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [c_customer_sk#37, c_current_cdemo_sk#34, c_current_addr_sk#32, c_birth_year#18] - : : : : +- *(2) Filter (((c_birth_month#42 IN (1,6,8,9,12,2) && isnotnull(c_customer_sk#37)) && isnotnull(c_current_cdemo_sk#34)) && isnotnull(c_current_addr_sk#32)) - : : : : +- *(2) FileScan parquet default.customer[c_customer_sk#37,c_current_cdemo_sk#34,c_current_addr_sk#32,c_birth_month#42,c_birth_year#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNo..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [ca_address_sk#33, ca_county#27, ca_state#26, ca_country#25] - : : +- *(4) Filter (ca_state#26 IN (MS,IN,ND,OK,NM,VA) && isnotnull(ca_address_sk#33)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#33,ca_county#27,ca_state#26,ca_country#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(5) Project [d_date_sk#31] - : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 1998)) && isnotnull(d_date_sk#31)) - : +- *(5) FileScan parquet default.date_dim[d_date_sk#31,d_year#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(6) Project [i_item_sk#29, i_item_id#24] - +- *(6) Filter isnotnull(i_item_sk#29) - +- *(6) FileScan parquet default.item[i_item_sk#29,i_item_id#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt deleted file mode 100644 index 5a4e9d503..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt +++ /dev/null @@ -1,59 +0,0 @@ -TakeOrderedAndProject [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_county,ca_state,i_item_id] - WholeStageCodegen - HashAggregate [avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),ca_country,ca_county,ca_state,count,count,count,count,count,count,count,i_item_id,spark_grouping_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum] - InputAdapter - Exchange [ca_country,ca_county,ca_state,i_item_id,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id,spark_grouping_id,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Expand [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] - Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_birth_year,c_current_addr_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] - Project [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] - Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [cd_demo_sk,cd_dep_count] - Filter [cd_demo_sk,cd_education_status,cd_gender] - Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] - Filter [c_birth_month,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] - Scan parquet default.customer [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [cd_demo_sk] - Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk] [cd_demo_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [ca_address_sk,ca_country,ca_county,ca_state] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_country,ca_county,ca_state] [ca_address_sk,ca_country,ca_county,ca_state] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt deleted file mode 100644 index a3cb20be7..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt +++ /dev/null @@ -1,38 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand#2 ASC NULLS FIRST,brand_id#3 ASC NULLS FIRST,i_manufact_id#4 ASC NULLS FIRST,i_manufact#5 ASC NULLS FIRST], output=[brand_id#3,brand#2,i_manufact_id#4,i_manufact#5,ext_price#1]) -+- *(7) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[sum(UnscaledValue(ss_ext_sales_price#8))]) - +- Exchange hashpartitioning(i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5, 5) - +- *(6) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#8))]) - +- *(6) Project [ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5] - +- *(6) BroadcastHashJoin [ss_store_sk#9], [s_store_sk#10], Inner, BuildRight, NOT (substring(ca_zip#11, 1, 5) = substring(s_zip#12, 1, 5)) - :- *(6) Project [ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5, ca_zip#11] - : +- *(6) BroadcastHashJoin [c_current_addr_sk#13], [ca_address_sk#14], Inner, BuildRight - : :- *(6) Project [ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5, c_current_addr_sk#13] - : : +- *(6) BroadcastHashJoin [ss_customer_sk#15], [c_customer_sk#16], Inner, BuildRight - : : :- *(6) Project [ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5] - : : : +- *(6) BroadcastHashJoin [ss_item_sk#17], [i_item_sk#18], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#17, ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8] - : : : : +- *(6) BroadcastHashJoin [d_date_sk#19], [ss_sold_date_sk#20], Inner, BuildRight - : : : : :- *(6) Project [d_date_sk#19] - : : : : : +- *(6) Filter ((((isnotnull(d_moy#21) && isnotnull(d_year#22)) && (d_moy#21 = 11)) && (d_year#22 = 1998)) && isnotnull(d_date_sk#19)) - : : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#19,d_year#22,d_moy#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [ss_sold_date_sk#20, ss_item_sk#17, ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8] - : : : : +- *(1) Filter (((isnotnull(ss_sold_date_sk#20) && isnotnull(ss_item_sk#17)) && isnotnull(ss_customer_sk#15)) && isnotnull(ss_store_sk#9)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_item_sk#17,ss_customer_sk#15,ss_store_sk#9,ss_ext_sales_price#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [ca_address_sk#14, ca_zip#11] - : +- *(4) Filter (isnotnull(ca_address_sk#14) && isnotnull(ca_zip#11)) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#14,ca_zip#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [s_store_sk#10, s_zip#12] - +- *(5) Filter (isnotnull(s_zip#12) && isnotnull(s_store_sk#10)) - +- *(5) FileScan parquet default.store[s_store_sk#10,s_zip#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt deleted file mode 100644 index fb69c76b7..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt +++ /dev/null @@ -1,50 +0,0 @@ -TakeOrderedAndProject [brand,brand_id,ext_price,i_manufact,i_manufact_id] - WholeStageCodegen - HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] - InputAdapter - Exchange [i_brand,i_brand_id,i_manufact,i_manufact_id] #1 - WholeStageCodegen - HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] - BroadcastHashJoin [ca_zip,s_store_sk,s_zip,ss_store_sk] - Project [ca_zip,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_customer_sk,ss_ext_sales_price,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_brand_id,i_item_sk,i_manufact,i_manufact_id] - Filter [i_item_sk,i_manager_id] - Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [ca_address_sk,ca_zip] - Filter [ca_address_sk,ca_zip] - Scan parquet default.customer_address [ca_address_sk,ca_zip] [ca_address_sk,ca_zip] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [s_store_sk,s_zip] - Filter [s_store_sk,s_zip] - Scan parquet default.store [s_store_sk,s_zip] [s_store_sk,s_zip] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt deleted file mode 100644 index 27134fbb9..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt +++ /dev/null @@ -1,36 +0,0 @@ -== Physical Plan == -*(13) Sort [d_week_seq1#1 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(d_week_seq1#1 ASC NULLS FIRST, 5) - +- *(12) Project [d_week_seq1#1, round(CheckOverflow((promote_precision(sun_sales1#2) / promote_precision(sun_sales2#3)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#4, round(CheckOverflow((promote_precision(mon_sales1#5) / promote_precision(mon_sales2#6)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#7, round(CheckOverflow((promote_precision(tue_sales1#8) / promote_precision(tue_sales2#9)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#10, round(CheckOverflow((promote_precision(wed_sales1#11) / promote_precision(wed_sales2#12)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#13, round(CheckOverflow((promote_precision(thu_sales1#14) / promote_precision(thu_sales2#15)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#16, round(CheckOverflow((promote_precision(fri_sales1#17) / promote_precision(fri_sales2#18)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#19, round(CheckOverflow((promote_precision(sat_sales1#20) / promote_precision(sat_sales2#21)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#22] - +- *(12) BroadcastHashJoin [d_week_seq1#1], [(d_week_seq2#23 - 53)], Inner, BuildRight - :- *(12) Project [d_week_seq#24 AS d_week_seq1#1, sun_sales#25 AS sun_sales1#2, mon_sales#26 AS mon_sales1#5, tue_sales#27 AS tue_sales1#8, wed_sales#28 AS wed_sales1#11, thu_sales#29 AS thu_sales1#14, fri_sales#30 AS fri_sales1#17, sat_sales#31 AS sat_sales1#20] - : +- *(12) BroadcastHashJoin [d_week_seq#24], [d_week_seq#32], Inner, BuildRight - : :- *(12) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) - : : +- Exchange hashpartitioning(d_week_seq#24, 5) - : : +- *(4) HashAggregate(keys=[d_week_seq#24], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) - : : +- *(4) Project [sales_price#34, d_week_seq#24, d_day_name#33] - : : +- *(4) BroadcastHashJoin [sold_date_sk#35], [d_date_sk#36], Inner, BuildRight - : : :- Union - : : : :- *(1) Project [ws_sold_date_sk#37 AS sold_date_sk#35, ws_ext_sales_price#38 AS sales_price#34] - : : : : +- *(1) Filter isnotnull(ws_sold_date_sk#37) - : : : : +- *(1) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : +- *(2) Project [cs_sold_date_sk#39 AS sold_date_sk#40, cs_ext_sales_price#41 AS sales_price#42] - : : : +- *(2) Filter isnotnull(cs_sold_date_sk#39) - : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#39,cs_ext_sales_price#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#36, d_week_seq#24, d_day_name#33] - : : +- *(3) Filter (isnotnull(d_date_sk#36) && isnotnull(d_week_seq#24)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#36,d_week_seq#24,d_day_name#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(5) Project [d_week_seq#32] - : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 2001)) && isnotnull(d_week_seq#32)) - : +- *(5) FileScan parquet default.date_dim[d_week_seq#32,d_year#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint))) - +- *(11) Project [d_week_seq#24 AS d_week_seq2#23, sun_sales#25 AS sun_sales2#3, mon_sales#26 AS mon_sales2#6, tue_sales#27 AS tue_sales2#9, wed_sales#28 AS wed_sales2#12, thu_sales#29 AS thu_sales2#15, fri_sales#30 AS fri_sales2#18, sat_sales#31 AS sat_sales2#21] - +- *(11) BroadcastHashJoin [d_week_seq#24], [d_week_seq#44], Inner, BuildRight - :- *(11) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) - : +- ReusedExchange [d_week_seq#24, sum#45, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51], Exchange hashpartitioning(d_week_seq#24, 5) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(10) Project [d_week_seq#44] - +- *(10) Filter ((isnotnull(d_year#52) && (d_year#52 = 2002)) && isnotnull(d_week_seq#44)) - +- *(10) FileScan parquet default.date_dim[d_week_seq#44,d_year#52] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt deleted file mode 100644 index 7e604b292..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt +++ /dev/null @@ -1,52 +0,0 @@ -WholeStageCodegen - Sort [d_week_seq1] - InputAdapter - Exchange [d_week_seq1] #1 - WholeStageCodegen - Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] - BroadcastHashJoin [d_week_seq1,d_week_seq2] - Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] - BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] - InputAdapter - Exchange [d_week_seq] #2 - WholeStageCodegen - HashAggregate [d_day_name,d_week_seq,sales_price,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_day_name,d_week_seq,sales_price] - BroadcastHashJoin [d_date_sk,sold_date_sk] - InputAdapter - Union - WholeStageCodegen - Project [ws_ext_sales_price,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_sold_date_sk] [ws_ext_sales_price,ws_sold_date_sk] - WholeStageCodegen - Project [cs_ext_sales_price,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk] [cs_ext_sales_price,cs_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_day_name,d_week_seq] - Filter [d_date_sk,d_week_seq] - Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] [d_date_sk,d_day_name,d_week_seq] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_week_seq] - Filter [d_week_seq,d_year] - Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] - BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] - InputAdapter - ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_week_seq] - Filter [d_week_seq,d_year] - Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt deleted file mode 100644 index f0c1bc89c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt +++ /dev/null @@ -1,24 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 ASC NULLS FIRST,i_item_id#3 ASC NULLS FIRST,i_item_desc#4 ASC NULLS FIRST,revenueratio#5 ASC NULLS FIRST], output=[i_item_desc#4,i_category#1,i_class#2,i_current_price#6,itemrevenue#7,revenueratio#5]) -+- *(6) Project [i_item_desc#4, i_category#1, i_class#2, i_current_price#6, itemrevenue#7, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#5, i_item_id#3] - +- Window [sum(_w1#10) windowspecdefinition(i_class#2, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#2] - +- *(5) Sort [i_class#2 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_class#2, 5) - +- *(4) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[sum(UnscaledValue(cs_ext_sales_price#11))]) - +- Exchange hashpartitioning(i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6, 5) - +- *(3) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#11))]) - +- *(3) Project [cs_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] - +- *(3) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - :- *(3) Project [cs_sold_date_sk#12, cs_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] - : +- *(3) BroadcastHashJoin [cs_item_sk#14], [i_item_sk#15], Inner, BuildRight - : :- *(3) Project [cs_sold_date_sk#12, cs_item_sk#14, cs_ext_sales_price#11] - : : +- *(3) Filter (isnotnull(cs_item_sk#14) && isnotnull(cs_sold_date_sk#12)) - : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_item_sk#14,cs_ext_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] - : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) - : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) - +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt deleted file mode 100644 index a6a8b7457..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt +++ /dev/null @@ -1,34 +0,0 @@ -TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] - WholeStageCodegen - Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] - InputAdapter - Window [_w1,i_class] - WholeStageCodegen - Sort [i_class] - InputAdapter - Exchange [i_class] #1 - WholeStageCodegen - HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(cs_ext_sales_price))] - InputAdapter - Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 - WholeStageCodegen - HashAggregate [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum] [sum,sum] - Project [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_sales_price,cs_sold_date_sk,i_category,i_class,i_current_price,i_item_desc,i_item_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] - Filter [i_category,i_item_sk] - Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt deleted file mode 100644 index 7c1b02f29..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt +++ /dev/null @@ -1,27 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST,i_item_id#2 ASC NULLS FIRST], output=[w_warehouse_name#1,i_item_id#2,inv_before#3,inv_after#4]) -+- *(5) Filter ((CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END >= 0.666667) && (CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END <= 1.5)) - +- *(5) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(w_warehouse_name#1, i_item_id#2, 5) - +- *(4) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[partial_sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) - +- *(4) Project [inv_quantity_on_hand#6, w_warehouse_name#1, i_item_id#2, d_date#5] - +- *(4) BroadcastHashJoin [inv_date_sk#7], [d_date_sk#8], Inner, BuildRight - :- *(4) Project [inv_date_sk#7, inv_quantity_on_hand#6, w_warehouse_name#1, i_item_id#2] - : +- *(4) BroadcastHashJoin [inv_item_sk#9], [i_item_sk#10], Inner, BuildRight - : :- *(4) Project [inv_date_sk#7, inv_item_sk#9, inv_quantity_on_hand#6, w_warehouse_name#1] - : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#11], [w_warehouse_sk#12], Inner, BuildRight - : : :- *(4) Project [inv_date_sk#7, inv_item_sk#9, inv_warehouse_sk#11, inv_quantity_on_hand#6] - : : : +- *(4) Filter ((isnotnull(inv_warehouse_sk#11) && isnotnull(inv_item_sk#9)) && isnotnull(inv_date_sk#7)) - : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#7,inv_item_sk#9,inv_warehouse_sk#11,inv_quantity_on_hand#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [w_warehouse_sk#12, w_warehouse_name#1] - : : +- *(1) Filter isnotnull(w_warehouse_sk#12) - : : +- *(1) FileScan parquet default.warehouse[w_warehouse_sk#12,w_warehouse_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [i_item_sk#10, i_item_id#2] - : +- *(2) Filter (((isnotnull(i_current_price#13) && (i_current_price#13 >= 0.99)) && (i_current_price#13 <= 1.49)) && isnotnull(i_item_sk#10)) - : +- *(2) FileScan parquet default.item[i_item_sk#10,i_item_id#2,i_current_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [d_date_sk#8, d_date#5] - +- *(3) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#8)) - +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_date#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt deleted file mode 100644 index 472388330..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt +++ /dev/null @@ -1,35 +0,0 @@ -TakeOrderedAndProject [i_item_id,inv_after,inv_before,w_warehouse_name] - WholeStageCodegen - Filter [inv_after,inv_before] - HashAggregate [i_item_id,sum,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),w_warehouse_name] [inv_after,inv_before,sum,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint))] - InputAdapter - Exchange [i_item_id,w_warehouse_name] #1 - WholeStageCodegen - HashAggregate [d_date,i_item_id,inv_quantity_on_hand,sum,sum,sum,sum,w_warehouse_name] [sum,sum,sum,sum] - Project [d_date,i_item_id,inv_quantity_on_hand,w_warehouse_name] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_item_id,inv_date_sk,inv_quantity_on_hand,w_warehouse_name] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] - BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_current_price,i_item_id,i_item_sk] [i_current_price,i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt deleted file mode 100644 index 171688e3a..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt +++ /dev/null @@ -1,27 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[qoh#1 ASC NULLS FIRST,i_product_name#2 ASC NULLS FIRST,i_brand#3 ASC NULLS FIRST,i_class#4 ASC NULLS FIRST,i_category#5 ASC NULLS FIRST], output=[i_product_name#2,i_brand#3,i_class#4,i_category#5,qoh#1]) -+- *(5) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[avg(cast(inv_quantity_on_hand#7 as bigint))]) - +- Exchange hashpartitioning(i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6, 5) - +- *(4) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[partial_avg(cast(inv_quantity_on_hand#7 as bigint))]) - +- *(4) Expand [List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, i_category#11, 0), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, null, 1), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, null, null, 3), List(inv_quantity_on_hand#7, i_product_name#8, null, null, null, 7), List(inv_quantity_on_hand#7, null, null, null, null, 15)], [inv_quantity_on_hand#7, i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6] - +- *(4) Project [inv_quantity_on_hand#7, i_product_name#12 AS i_product_name#8, i_brand#13 AS i_brand#9, i_class#14 AS i_class#10, i_category#15 AS i_category#11] - +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#17], Inner, BuildRight - :- *(4) Project [inv_warehouse_sk#16, inv_quantity_on_hand#7, i_brand#13, i_class#14, i_category#15, i_product_name#12] - : +- *(4) BroadcastHashJoin [inv_item_sk#18], [i_item_sk#19], Inner, BuildRight - : :- *(4) Project [inv_item_sk#18, inv_warehouse_sk#16, inv_quantity_on_hand#7] - : : +- *(4) BroadcastHashJoin [inv_date_sk#20], [d_date_sk#21], Inner, BuildRight - : : :- *(4) Project [inv_date_sk#20, inv_item_sk#18, inv_warehouse_sk#16, inv_quantity_on_hand#7] - : : : +- *(4) Filter ((isnotnull(inv_date_sk#20) && isnotnull(inv_item_sk#18)) && isnotnull(inv_warehouse_sk#16)) - : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#20,inv_item_sk#18,inv_warehouse_sk#16,inv_quantity_on_hand#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [d_date_sk#21] - : : +- *(1) Filter (((isnotnull(d_month_seq#22) && (d_month_seq#22 >= 1200)) && (d_month_seq#22 <= 1211)) && isnotnull(d_date_sk#21)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_month_seq#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [i_item_sk#19, i_brand#13, i_class#14, i_category#15, i_product_name#12] - : +- *(2) Filter isnotnull(i_item_sk#19) - : +- *(2) FileScan parquet default.item[i_item_sk#19,i_brand#13,i_class#14,i_category#15,i_product_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [w_warehouse_sk#17] - +- *(3) Filter isnotnull(w_warehouse_sk#17) - +- *(3) FileScan parquet default.warehouse[w_warehouse_sk#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt deleted file mode 100644 index d10f9d4ef..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt +++ /dev/null @@ -1,35 +0,0 @@ -TakeOrderedAndProject [i_brand,i_category,i_class,i_product_name,qoh] - WholeStageCodegen - HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),count,i_brand,i_category,i_class,i_product_name,spark_grouping_id,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] - InputAdapter - Exchange [i_brand,i_category,i_class,i_product_name,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [count,count,i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,spark_grouping_id,sum,sum] [count,count,sum,sum] - Expand [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] - Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] - BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_category,i_class,i_item_sk,i_product_name] - Filter [i_item_sk] - Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] [i_brand,i_category,i_class,i_item_sk,i_product_name] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_sk] [w_warehouse_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt deleted file mode 100644 index cd1cae198..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt +++ /dev/null @@ -1,89 +0,0 @@ -== Physical Plan == -CollectLimit 100 -+- *(20) HashAggregate(keys=[], functions=[sum(sales#1)]) - +- Exchange SinglePartition - +- *(19) HashAggregate(keys=[], functions=[partial_sum(sales#1)]) - +- Union - :- *(9) Project [CheckOverflow((promote_precision(cast(cast(cs_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#3 as decimal(12,2)))), DecimalType(18,2)) AS sales#1] - : +- *(9) BroadcastHashJoin [cs_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight - : :- *(9) Project [cs_sold_date_sk#4, cs_quantity#2, cs_list_price#3] - : : +- *(9) BroadcastHashJoin [cs_bill_customer_sk#6], [c_customer_sk#7], LeftSemi, BuildRight - : : :- *(9) Project [cs_sold_date_sk#4, cs_bill_customer_sk#6, cs_quantity#2, cs_list_price#3] - : : : +- *(9) BroadcastHashJoin [cs_item_sk#8], [item_sk#9], LeftSemi, BuildRight - : : : :- *(9) Project [cs_sold_date_sk#4, cs_bill_customer_sk#6, cs_item_sk#8, cs_quantity#2, cs_list_price#3] - : : : : +- *(9) Filter isnotnull(cs_sold_date_sk#4) - : : : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#6,cs_item_sk#8,cs_quantity#2,cs_list_price#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct 4) - : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[count(1)]) - : : : +- Exchange hashpartitioning(substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14, 5) - : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#11, 1, 30) AS substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[partial_count(1)]) - : : : +- *(3) Project [d_date#14, i_item_sk#13, i_item_desc#11] - : : : +- *(3) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#13], Inner, BuildRight - : : : :- *(3) Project [ss_item_sk#15, d_date#14] - : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight - : : : : :- *(3) Project [ss_sold_date_sk#16, ss_item_sk#15] - : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#16) && isnotnull(ss_item_sk#15)) - : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_item_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [d_date_sk#5, d_date#14] - : : : : +- *(1) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#5,d_date#14,d_year#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [i_item_sk#13, i_item_desc#11] - : : : +- *(2) Filter isnotnull(i_item_sk#13) - : : : +- *(2) FileScan parquet default.item[i_item_sk#13,i_item_desc#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(7) Project [c_customer_sk#7] - : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3052 as decimal(32,6)))), DecimalType(38,8)))) - : : : +- Subquery subquery3052 - : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#21)]) - : : : +- Exchange SinglePartition - : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#21)]) - : : : +- *(4) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) - : : : +- Exchange hashpartitioning(c_customer_sk#7, 5) - : : : +- *(3) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) - : : : +- *(3) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] - : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight - : : : :- *(3) Project [ss_sold_date_sk#16, ss_quantity#18, ss_sales_price#19, c_customer_sk#7] - : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight - : : : : :- *(3) Project [ss_sold_date_sk#16, ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] - : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#22) && isnotnull(ss_sold_date_sk#16)) - : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [c_customer_sk#7] - : : : : +- *(1) Filter isnotnull(c_customer_sk#7) - : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [d_date_sk#5] - : : : +- *(2) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#5,d_year#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- *(7) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Exchange hashpartitioning(c_customer_sk#7, 5) - : : +- *(6) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- *(6) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] - : : +- *(6) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight - : : :- *(6) Project [ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] - : : : +- *(6) Filter isnotnull(ss_customer_sk#22) - : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [c_customer_sk#7] - : : +- *(5) Filter isnotnull(c_customer_sk#7) - : : +- *(5) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(8) Project [d_date_sk#5] - : +- *(8) Filter ((((isnotnull(d_year#17) && isnotnull(d_moy#23)) && (d_year#17 = 2000)) && (d_moy#23 = 2)) && isnotnull(d_date_sk#5)) - : +- *(8) FileScan parquet default.date_dim[d_date_sk#5,d_year#17,d_moy#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct - +- *(18) Project [CheckOverflow((promote_precision(cast(cast(ws_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#25 as decimal(12,2)))), DecimalType(18,2)) AS sales#26] - +- *(18) BroadcastHashJoin [ws_sold_date_sk#27], [d_date_sk#5], Inner, BuildRight - :- *(18) Project [ws_sold_date_sk#27, ws_quantity#24, ws_list_price#25] - : +- *(18) BroadcastHashJoin [ws_bill_customer_sk#28], [c_customer_sk#7], LeftSemi, BuildRight - : :- *(18) Project [ws_sold_date_sk#27, ws_bill_customer_sk#28, ws_quantity#24, ws_list_price#25] - : : +- *(18) BroadcastHashJoin [ws_item_sk#29], [item_sk#9], LeftSemi, BuildRight - : : :- *(18) Project [ws_sold_date_sk#27, ws_item_sk#29, ws_bill_customer_sk#28, ws_quantity#24, ws_list_price#25] - : : : +- *(18) Filter isnotnull(ws_sold_date_sk#27) - : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#27,ws_item_sk#29,ws_bill_customer_sk#28,ws_quantity#24,ws_list_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct 4) - : : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[count(1)]) - : : : : +- Exchange hashpartitioning(substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17, 5) - : : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#14, 1, 30) AS substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[partial_count(1)]) - : : : : +- *(3) Project [d_date#17, i_item_sk#16, i_item_desc#14] - : : : : +- *(3) BroadcastHashJoin [ss_item_sk#18], [i_item_sk#16], Inner, BuildRight - : : : : :- *(3) Project [ss_item_sk#18, d_date#17] - : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight - : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_item_sk#18] - : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#19) && isnotnull(ss_item_sk#18)) - : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(1) Project [d_date_sk#7, d_date#17] - : : : : : +- *(1) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) - : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#7,d_date#17,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [i_item_sk#16, i_item_desc#14] - : : : : +- *(2) Filter isnotnull(i_item_sk#16) - : : : : +- *(2) FileScan parquet default.item[i_item_sk#16,i_item_desc#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(7) Project [c_customer_sk#9 AS c_customer_sk#9#10] - : : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3144 as decimal(32,6)))), DecimalType(38,8)))) - : : : : +- Subquery subquery3144 - : : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#24)]) - : : : : +- Exchange SinglePartition - : : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#24)]) - : : : : +- *(4) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) - : : : : +- Exchange hashpartitioning(c_customer_sk#9, 5) - : : : : +- *(3) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) - : : : : +- *(3) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] - : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight - : : : : :- *(3) Project [ss_sold_date_sk#19, ss_quantity#21, ss_sales_price#22, c_customer_sk#9] - : : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight - : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] - : : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#25) && isnotnull(ss_sold_date_sk#19)) - : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(1) Project [c_customer_sk#9] - : : : : : +- *(1) Filter isnotnull(c_customer_sk#9) - : : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [d_date_sk#7] - : : : : +- *(2) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) - : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- *(7) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) - : : : +- Exchange hashpartitioning(c_customer_sk#9, 5) - : : : +- *(6) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) - : : : +- *(6) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] - : : : +- *(6) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight - : : : :- *(6) Project [ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] - : : : : +- *(6) Filter isnotnull(ss_customer_sk#25) - : : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(5) Project [c_customer_sk#9] - : : : +- *(5) Filter isnotnull(c_customer_sk#9) - : : : +- *(5) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(11) BroadcastHashJoin [c_customer_sk#9], [c_customer_sk#9#10], LeftSemi, BuildRight - : : :- *(11) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] - : : : +- *(11) Filter isnotnull(c_customer_sk#9) - : : : +- *(11) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : : +- ReusedExchange [c_customer_sk#9#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(12) Project [d_date_sk#7] - : +- *(12) Filter ((((isnotnull(d_year#20) && isnotnull(d_moy#26)) && (d_year#20 = 2000)) && (d_moy#26 = 2)) && isnotnull(d_date_sk#7)) - : +- *(12) FileScan parquet default.date_dim[d_date_sk#7,d_year#20,d_moy#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct - +- *(28) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 5) - +- *(27) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) - +- *(27) Project [ws_quantity#27, ws_list_price#28, c_first_name#2, c_last_name#1] - +- *(27) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#7], Inner, BuildRight - :- *(27) Project [ws_sold_date_sk#29, ws_quantity#27, ws_list_price#28, c_first_name#2, c_last_name#1] - : +- *(27) BroadcastHashJoin [ws_bill_customer_sk#30], [c_customer_sk#9], Inner, BuildRight - : :- *(27) BroadcastHashJoin [ws_bill_customer_sk#30], [c_customer_sk#9#31], LeftSemi, BuildRight - : : :- *(27) Project [ws_sold_date_sk#29, ws_bill_customer_sk#30, ws_quantity#27, ws_list_price#28] - : : : +- *(27) BroadcastHashJoin [ws_item_sk#32], [item_sk#12], LeftSemi, BuildRight - : : : :- *(27) Project [ws_sold_date_sk#29, ws_item_sk#32, ws_bill_customer_sk#30, ws_quantity#27, ws_list_price#28] - : : : : +- *(27) Filter (isnotnull(ws_bill_customer_sk#30) && isnotnull(ws_sold_date_sk#29)) - : : : : +- *(27) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_item_sk#32,ws_bill_customer_sk#30,ws_quantity#27,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct cast(Subquery subquery3246 as decimal(33,8)))) - : +- Subquery subquery3246 - : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) - : +- Exchange SinglePartition - : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) - : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) - : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) - : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] - : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight - : :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] - : : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - : : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] - : : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight - : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] - : : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight - : : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] - : : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) - : : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] - : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : : +- *(3) Filter isnotnull(i_item_sk#22) - : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) - : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] - : +- *(5) Filter isnotnull(ca_zip#18) - : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct - +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 5) - +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) - +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) - +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) - +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] - +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight - :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] - : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight - : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] - : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] - : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight - : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] - : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) - : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] - : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = pale)) && isnotnull(i_item_sk#22)) - : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) - +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] - +- *(5) Filter isnotnull(ca_zip#18) - +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt deleted file mode 100644 index d8bda2e69..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt +++ /dev/null @@ -1,111 +0,0 @@ -WholeStageCodegen - Project [c_first_name,c_last_name,paid,s_store_name] - Filter [sum(netpaid)] - Subquery #1 - WholeStageCodegen - HashAggregate [avg(netpaid),count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] - InputAdapter - Exchange #8 - WholeStageCodegen - HashAggregate [count,count,netpaid,sum,sum] [count,count,sum,sum] - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] - InputAdapter - Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #9 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] - Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] - BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] - Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_state,s_store_name,s_store_sk,s_zip] - Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - Filter [i_item_sk] - Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] - Filter [c_birth_country,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [ca_country,ca_state,ca_zip] - Filter [ca_zip] - Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] - HashAggregate [c_first_name,c_last_name,s_store_name,sum,sum(netpaid)] [paid,sum,sum(netpaid),sum(netpaid)] - InputAdapter - Exchange [c_first_name,c_last_name,s_store_name] #1 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,netpaid,s_store_name,sum,sum] [sum,sum] - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] - InputAdapter - Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] - Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] - BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] - Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_state,s_store_name,s_store_sk,s_zip] - Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - Filter [i_color,i_item_sk] - Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] - Filter [c_birth_country,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [ca_country,ca_state,ca_zip] - Filter [ca_zip] - Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt deleted file mode 100644 index 35e443a34..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt +++ /dev/null @@ -1,82 +0,0 @@ -== Physical Plan == -*(8) Project [c_last_name#1, c_first_name#2, s_store_name#3, paid#4] -+- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery3290 as decimal(33,8)))) - : +- Subquery subquery3290 - : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) - : +- Exchange SinglePartition - : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) - : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) - : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) - : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] - : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight - : :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] - : : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - : : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] - : : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight - : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] - : : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight - : : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] - : : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) - : : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] - : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : : +- *(3) Filter isnotnull(i_item_sk#22) - : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) - : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] - : +- *(5) Filter isnotnull(ca_zip#18) - : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct - +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 5) - +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) - +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) - +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) - +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] - +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight - :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] - : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight - : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] - : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] - : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight - : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] - : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) - : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] - : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = chiffon)) && isnotnull(i_item_sk#22)) - : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon), IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) - +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] - +- *(5) Filter isnotnull(ca_zip#18) - +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt deleted file mode 100644 index d8bda2e69..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt +++ /dev/null @@ -1,111 +0,0 @@ -WholeStageCodegen - Project [c_first_name,c_last_name,paid,s_store_name] - Filter [sum(netpaid)] - Subquery #1 - WholeStageCodegen - HashAggregate [avg(netpaid),count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] - InputAdapter - Exchange #8 - WholeStageCodegen - HashAggregate [count,count,netpaid,sum,sum] [count,count,sum,sum] - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] - InputAdapter - Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #9 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] - Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] - BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] - Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_state,s_store_name,s_store_sk,s_zip] - Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - Filter [i_item_sk] - Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] - Filter [c_birth_country,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [ca_country,ca_state,ca_zip] - Filter [ca_zip] - Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] - HashAggregate [c_first_name,c_last_name,s_store_name,sum,sum(netpaid)] [paid,sum,sum(netpaid),sum(netpaid)] - InputAdapter - Exchange [c_first_name,c_last_name,s_store_name] #1 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,netpaid,s_store_name,sum,sum] [sum,sum] - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] - InputAdapter - Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] - Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] - BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] - Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_state,s_store_name,s_store_sk,s_zip] - Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - Filter [i_color,i_item_sk] - Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] - Filter [c_birth_country,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [ca_country,ca_state,ca_zip] - Filter [ca_zip] - Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt deleted file mode 100644 index c1f44f753..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt +++ /dev/null @@ -1,50 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_store_id#3 ASC NULLS FIRST,s_store_name#4 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_store_id#3,s_store_name#4,store_sales_profit#5,store_returns_loss#6,catalog_sales_profit#7]) -+- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[sum(UnscaledValue(ss_net_profit#8)), sum(UnscaledValue(sr_net_loss#9)), sum(UnscaledValue(cs_net_profit#10))]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4, 5) - +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[partial_sum(UnscaledValue(ss_net_profit#8)), partial_sum(UnscaledValue(sr_net_loss#9)), partial_sum(UnscaledValue(cs_net_profit#10))]) - +- *(8) Project [ss_net_profit#8, sr_net_loss#9, cs_net_profit#10, s_store_id#3, s_store_name#4, i_item_id#1, i_item_desc#2] - +- *(8) BroadcastHashJoin [ss_item_sk#11], [i_item_sk#12], Inner, BuildRight - :- *(8) Project [ss_item_sk#11, ss_net_profit#8, sr_net_loss#9, cs_net_profit#10, s_store_id#3, s_store_name#4] - : +- *(8) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight - : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_net_loss#9, cs_net_profit#10] - : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#15], [d_date_sk#16], Inner, BuildRight - : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] - : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#17], [cast(d_date_sk#18 as bigint)], Inner, BuildRight - : : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] - : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] - : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#21, sr_item_sk#22], [cast(cs_bill_customer_sk#23 as bigint), cast(cs_item_sk#24 as bigint)], Inner, BuildRight - : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_item_sk#22, sr_customer_sk#21, sr_net_loss#9] - : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#25 as bigint), cast(ss_item_sk#11 as bigint), cast(ss_ticket_number#26 as bigint)], [sr_customer_sk#21, sr_item_sk#22, sr_ticket_number#27], Inner, BuildRight - : : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_customer_sk#25, ss_store_sk#13, ss_ticket_number#26, ss_net_profit#8] - : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#26) && isnotnull(ss_item_sk#11)) && isnotnull(ss_customer_sk#25)) && isnotnull(ss_sold_date_sk#19)) && isnotnull(ss_store_sk#13)) - : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_net_profit#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(3) Project [d_date_sk#20] - : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 4)) && (d_year#29 = 2001)) && isnotnull(d_date_sk#20)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [d_date_sk#18] - : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 4)) && (d_moy#31 <= 10)) && (d_year#30 = 2001)) && isnotnull(d_date_sk#18)) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [d_date_sk#16] - : : +- *(5) Filter (((((isnotnull(d_year#32) && isnotnull(d_moy#33)) && (d_moy#33 >= 4)) && (d_moy#33 <= 10)) && (d_year#32 = 2001)) && isnotnull(d_date_sk#16)) - : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32,d_moy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] - : +- *(6) Filter isnotnull(s_store_sk#14) - : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] - +- *(7) Filter isnotnull(i_item_sk#12) - +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt deleted file mode 100644 index df34b61ae..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt +++ /dev/null @@ -1,66 +0,0 @@ -TakeOrderedAndProject [catalog_sales_profit,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_loss,store_sales_profit] - WholeStageCodegen - HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit))] [catalog_sales_profit,store_returns_loss,store_sales_profit,sum,sum,sum,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit))] - InputAdapter - Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 - WholeStageCodegen - HashAggregate [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [cs_net_profit,s_store_id,s_store_name,sr_net_loss,ss_item_sk,ss_net_profit] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [cs_net_profit,sr_net_loss,ss_item_sk,ss_net_profit,ss_store_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,ss_item_sk,ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] - Project [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] - Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [s_store_id,s_store_name,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [i_item_desc,i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt deleted file mode 100644 index c5c7da628..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt +++ /dev/null @@ -1,32 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,agg1#2,agg2#3,agg3#4,agg4#5]) -+- *(6) HashAggregate(keys=[i_item_id#1], functions=[avg(cast(cs_quantity#6 as bigint)), avg(UnscaledValue(cs_list_price#7)), avg(UnscaledValue(cs_coupon_amt#8)), avg(UnscaledValue(cs_sales_price#9))]) - +- Exchange hashpartitioning(i_item_id#1, 5) - +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_avg(cast(cs_quantity#6 as bigint)), partial_avg(UnscaledValue(cs_list_price#7)), partial_avg(UnscaledValue(cs_coupon_amt#8)), partial_avg(UnscaledValue(cs_sales_price#9))]) - +- *(5) Project [cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8, i_item_id#1] - +- *(5) BroadcastHashJoin [cs_promo_sk#10], [p_promo_sk#11], Inner, BuildRight - :- *(5) Project [cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8, i_item_id#1] - : +- *(5) BroadcastHashJoin [cs_item_sk#12], [i_item_sk#13], Inner, BuildRight - : :- *(5) Project [cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] - : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight - : : :- *(5) Project [cs_sold_date_sk#14, cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] - : : : +- *(5) BroadcastHashJoin [cs_bill_cdemo_sk#16], [cd_demo_sk#17], Inner, BuildRight - : : : :- *(5) Project [cs_sold_date_sk#14, cs_bill_cdemo_sk#16, cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] - : : : : +- *(5) Filter (((isnotnull(cs_bill_cdemo_sk#16) && isnotnull(cs_sold_date_sk#14)) && isnotnull(cs_item_sk#12)) && isnotnull(cs_promo_sk#10)) - : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#14,cs_bill_cdemo_sk#16,cs_item_sk#12,cs_promo_sk#10,cs_quantity#6,cs_list_price#7,cs_sales_price#9,cs_coupon_amt#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_pro..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_date_sk#15] - : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [i_item_sk#13, i_item_id#1] - : +- *(3) Filter isnotnull(i_item_sk#13) - : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [p_promo_sk#11] - +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) - +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt deleted file mode 100644 index bfb96b05c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt +++ /dev/null @@ -1,42 +0,0 @@ -TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] - WholeStageCodegen - HashAggregate [avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_sales_price)),avg(cast(cs_quantity as bigint)),count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_sales_price)),avg(cast(cs_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] - InputAdapter - Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [count,count,count,count,count,count,count,count,cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] - Project [cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id] - BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_coupon_amt,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,i_item_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] - Project [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] - Filter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [cd_demo_sk] - Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [p_promo_sk] - Filter [p_channel_email,p_channel_event,p_promo_sk] - Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] [p_channel_email,p_channel_event,p_promo_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt deleted file mode 100644 index c166032cd..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt +++ /dev/null @@ -1,33 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,s_state#2 ASC NULLS FIRST], output=[i_item_id#1,s_state#2,g_state#3,agg1#4,agg2#5,agg3#6,agg4#7]) -+- *(6) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[avg(cast(ss_quantity#9 as bigint)), avg(UnscaledValue(ss_list_price#10)), avg(UnscaledValue(ss_coupon_amt#11)), avg(UnscaledValue(ss_sales_price#12))]) - +- Exchange hashpartitioning(i_item_id#1, s_state#2, spark_grouping_id#8, 5) - +- *(5) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[partial_avg(cast(ss_quantity#9 as bigint)), partial_avg(UnscaledValue(ss_list_price#10)), partial_avg(UnscaledValue(ss_coupon_amt#11)), partial_avg(UnscaledValue(ss_sales_price#12))]) - +- *(5) Expand [List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, s_state#14, 0), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, null, 1), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, null, null, 3)], [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#1, s_state#2, spark_grouping_id#8] - +- *(5) Project [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#15 AS i_item_id#13, s_state#16 AS s_state#14] - +- *(5) BroadcastHashJoin [ss_item_sk#17], [i_item_sk#18], Inner, BuildRight - :- *(5) Project [ss_item_sk#17, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, s_state#16] - : +- *(5) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#20], Inner, BuildRight - : :- *(5) Project [ss_item_sk#17, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] - : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight - : : :- *(5) Project [ss_sold_date_sk#21, ss_item_sk#17, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] - : : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#23], [cd_demo_sk#24], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#21, ss_item_sk#17, ss_cdemo_sk#23, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] - : : : : +- *(5) Filter (((isnotnull(ss_cdemo_sk#23) && isnotnull(ss_sold_date_sk#21)) && isnotnull(ss_store_sk#19)) && isnotnull(ss_item_sk#17)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_item_sk#17,ss_cdemo_sk#23,ss_store_sk#19,ss_quantity#9,ss_list_price#10,ss_sales_price#12,ss_coupon_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_date_sk#22] - : : +- *(2) Filter ((isnotnull(d_year#28) && (d_year#28 = 2002)) && isnotnull(d_date_sk#22)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#22,d_year#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [s_store_sk#20, s_state#16] - : +- *(3) Filter ((isnotnull(s_state#16) && (s_state#16 = TN)) && isnotnull(s_store_sk#20)) - : +- *(3) FileScan parquet default.store[s_store_sk#20,s_state#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [i_item_sk#18, i_item_id#15] - +- *(4) Filter isnotnull(i_item_sk#18) - +- *(4) FileScan parquet default.item[i_item_sk#18,i_item_id#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt deleted file mode 100644 index e3a2d7ca1..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt +++ /dev/null @@ -1,43 +0,0 @@ -TakeOrderedAndProject [agg1,agg2,agg3,agg4,g_state,i_item_id,s_state] - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,i_item_id,s_state,spark_grouping_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,g_state,sum,sum,sum,sum] - InputAdapter - Exchange [i_item_id,s_state,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [count,count,count,count,count,count,count,count,i_item_id,s_state,spark_grouping_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] - Expand [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] - Project [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_state,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] - Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [cd_demo_sk] - Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_state,s_store_sk] - Filter [s_state,s_store_sk] - Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt deleted file mode 100644 index b76609a84..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt +++ /dev/null @@ -1,66 +0,0 @@ -== Physical Plan == -CollectLimit 100 -+- BroadcastNestedLoopJoin BuildRight, Inner - :- BroadcastNestedLoopJoin BuildRight, Inner - : :- BroadcastNestedLoopJoin BuildRight, Inner - : : :- BroadcastNestedLoopJoin BuildRight, Inner - : : : :- BroadcastNestedLoopJoin BuildRight, Inner - : : : : :- *(3) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_list_price#1)), count(ss_list_price#1), count(distinct ss_list_price#1)]) - : : : : : +- Exchange SinglePartition - : : : : : +- *(2) HashAggregate(keys=[], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1), partial_count(distinct ss_list_price#1)]) - : : : : : +- *(2) HashAggregate(keys=[ss_list_price#1], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1)]) - : : : : : +- Exchange hashpartitioning(ss_list_price#1, 5) - : : : : : +- *(1) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) - : : : : : +- *(1) Project [ss_list_price#1] - : : : : : +- *(1) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 0)) && (ss_quantity#2 <= 5)) && ((((ss_list_price#1 >= 8.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 18.00)) || ((ss_coupon_amt#3 >= 459.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1459.00))) || ((ss_wholesale_cost#4 >= 57.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 77.00)))) - : : : : : +- *(1) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)], ReadSchema: struct= 6)) && (ss_quantity#2 <= 10)) && ((((ss_list_price#1 >= 90.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 100.00)) || ((ss_coupon_amt#3 >= 2323.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 3323.00))) || ((ss_wholesale_cost#4 >= 31.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 51.00)))) - : : : : +- *(4) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)], ReadSchema: struct= 11)) && (ss_quantity#2 <= 15)) && ((((ss_list_price#1 >= 142.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 152.00)) || ((ss_coupon_amt#3 >= 12214.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 13214.00))) || ((ss_wholesale_cost#4 >= 79.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 99.00)))) - : : : +- *(7) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)], ReadSchema: struct= 16)) && (ss_quantity#2 <= 20)) && ((((ss_list_price#1 >= 135.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 145.00)) || ((ss_coupon_amt#3 >= 6071.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 7071.00))) || ((ss_wholesale_cost#4 >= 38.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 58.00)))) - : : +- *(10) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct= 21)) && (ss_quantity#2 <= 25)) && ((((ss_list_price#1 >= 122.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 132.00)) || ((ss_coupon_amt#3 >= 836.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1836.00))) || ((ss_wholesale_cost#4 >= 17.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 37.00)))) - : +- *(13) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)], ReadSchema: struct= 26)) && (ss_quantity#2 <= 30)) && ((((ss_list_price#1 >= 154.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 164.00)) || ((ss_coupon_amt#3 >= 7326.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 8326.00))) || ((ss_wholesale_cost#4 >= 7.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 27.00)))) - +- *(16) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(3) Project [d_date_sk#20] - : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 9)) && (d_year#29 = 1999)) && isnotnull(d_date_sk#20)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [d_date_sk#18] - : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 9)) && (d_moy#31 <= 12)) && (d_year#30 = 1999)) && isnotnull(d_date_sk#18)) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), Equ..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [d_date_sk#16] - : : +- *(5) Filter (d_year#32 IN (1999,2000,2001) && isnotnull(d_date_sk#16)) - : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] - : +- *(6) Filter isnotnull(s_store_sk#14) - : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] - +- *(7) Filter isnotnull(i_item_sk#12) - +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt deleted file mode 100644 index f8c72710f..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt +++ /dev/null @@ -1,66 +0,0 @@ -TakeOrderedAndProject [catalog_sales_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_quantity,store_sales_quantity] - WholeStageCodegen - HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum,sum(cast(cs_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(ss_quantity as bigint))] [catalog_sales_quantity,store_returns_quantity,store_sales_quantity,sum,sum,sum,sum(cast(cs_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(ss_quantity as bigint))] - InputAdapter - Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 - WholeStageCodegen - HashAggregate [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [cs_quantity,s_store_id,s_store_name,sr_return_quantity,ss_item_sk,ss_quantity] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [cs_quantity,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] - Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] - Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [s_store_id,s_store_name,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [i_item_desc,i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt deleted file mode 100644 index 55173cb25..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt +++ /dev/null @@ -1,20 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,sum_agg#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,sum_agg#2]) -+- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) - +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 5) - +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) - +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] - +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight - :- *(3) Project [d_year#1, ss_item_sk#8, ss_ext_sales_price#7] - : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight - : :- *(3) Project [d_date_sk#10, d_year#1] - : : +- *(3) Filter ((isnotnull(d_moy#12) && (d_moy#12 = 11)) && isnotnull(d_date_sk#10)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] - : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] - +- *(2) Filter ((isnotnull(i_manufact_id#13) && (i_manufact_id#13 = 128)) && isnotnull(i_item_sk#9)) - +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manufact_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt deleted file mode 100644 index 23253bbd2..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt +++ /dev/null @@ -1,26 +0,0 @@ -TakeOrderedAndProject [brand,brand_id,d_year,sum_agg] - WholeStageCodegen - HashAggregate [d_year,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,sum,sum(UnscaledValue(ss_ext_sales_price)),sum_agg] - InputAdapter - Exchange [d_year,i_brand,i_brand_id] #1 - WholeStageCodegen - HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [d_year,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [d_date_sk,d_year] - Filter [d_date_sk,d_moy] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_brand_id,i_item_sk] - Filter [i_item_sk,i_manufact_id] - Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manufact_id] [i_brand,i_brand_id,i_item_sk,i_manufact_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt deleted file mode 100644 index d96b8f5b0..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt +++ /dev/null @@ -1,52 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salutation#2 ASC NULLS FIRST,c_first_name#3 ASC NULLS FIRST,c_last_name#4 ASC NULLS FIRST,c_preferred_cust_flag#5 ASC NULLS FIRST,c_birth_day#6 ASC NULLS FIRST,c_birth_month#7 ASC NULLS FIRST,c_birth_year#8 ASC NULLS FIRST,c_birth_country#9 ASC NULLS FIRST,c_login#10 ASC NULLS FIRST,c_email_address#11 ASC NULLS FIRST,c_last_review_date#12 ASC NULLS FIRST,ctr_total_return#13 ASC NULLS FIRST], output=[c_customer_id#1,c_salutation#2,c_first_name#3,c_last_name#4,c_preferred_cust_flag#5,c_birth_day#6,c_birth_month#7,c_birth_year#8,c_birth_country#9,c_login#10,c_email_address#11,c_last_review_date#12,ctr_total_return#13]) -+- *(11) Project [c_customer_id#1, c_salutation#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_day#6, c_birth_month#7, c_birth_year#8, c_birth_country#9, c_login#10, c_email_address#11, c_last_review_date#12, ctr_total_return#13] - +- *(11) BroadcastHashJoin [c_current_addr_sk#14], [ca_address_sk#15], Inner, BuildRight - :- *(11) Project [ctr_total_return#13, c_customer_id#1, c_current_addr_sk#14, c_salutation#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_day#6, c_birth_month#7, c_birth_year#8, c_birth_country#9, c_login#10, c_email_address#11, c_last_review_date#12] - : +- *(11) BroadcastHashJoin [ctr_customer_sk#16], [cast(c_customer_sk#17 as bigint)], Inner, BuildRight - : :- *(11) Project [ctr_customer_sk#16, ctr_total_return#13] - : : +- *(11) BroadcastHashJoin [ctr_state#18], [ctr_state#18#19], Inner, BuildRight, (cast(ctr_total_return#13 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) - : : :- *(11) Filter isnotnull(ctr_total_return#13) - : : : +- *(11) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) - : : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 5) - : : : +- *(3) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) - : : : +- *(3) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] - : : : +- *(3) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight - : : : :- *(3) Project [wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] - : : : : +- *(3) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight - : : : : :- *(3) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] - : : : : : +- *(3) Filter ((isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) && isnotnull(wr_returning_customer_sk#21)) - : : : : : +- *(3) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [ca_address_sk#15, ca_state#22] - : : : +- *(2) Filter (isnotnull(ca_address_sk#15) && isnotnull(ca_state#22)) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) - : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) - : : +- *(8) HashAggregate(keys=[ctr_state#18], functions=[avg(ctr_total_return#13)]) - : : +- Exchange hashpartitioning(ctr_state#18, 5) - : : +- *(7) HashAggregate(keys=[ctr_state#18], functions=[partial_avg(ctr_total_return#13)]) - : : +- *(7) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) - : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 5) - : : +- *(6) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) - : : +- *(6) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] - : : +- *(6) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight - : : :- *(6) Project [wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] - : : : +- *(6) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight - : : : :- *(6) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] - : : : : +- *(6) Filter (isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) - : : : : +- *(6) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt deleted file mode 100644 index a17fc2aea..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt +++ /dev/null @@ -1,70 +0,0 @@ -TakeOrderedAndProject [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] - WholeStageCodegen - Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] - BroadcastHashJoin [c_customer_sk,ctr_customer_sk] - Project [ctr_customer_sk,ctr_total_return] - BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,ctr_state,ctr_total_return] - Filter [ctr_total_return] - HashAggregate [ca_state,sum,sum(UnscaledValue(wr_return_amt)),wr_returning_customer_sk] [ctr_customer_sk,ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] - InputAdapter - Exchange [ca_state,wr_returning_customer_sk] #1 - WholeStageCodegen - HashAggregate [ca_state,sum,sum,wr_return_amt,wr_returning_customer_sk] [sum,sum] - Project [ca_state,wr_return_amt,wr_returning_customer_sk] - BroadcastHashJoin [ca_address_sk,wr_returning_addr_sk] - Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] - BroadcastHashJoin [d_date_sk,wr_returned_date_sk] - Project [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] - Filter [wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] - Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ca_address_sk,ca_state] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [avg(ctr_total_return),count,ctr_state,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_state,sum] - InputAdapter - Exchange [ctr_state] #5 - WholeStageCodegen - HashAggregate [count,count,ctr_state,ctr_total_return,sum,sum] [count,count,sum,sum] - HashAggregate [ca_state,sum,sum(UnscaledValue(wr_return_amt)),wr_returning_customer_sk] [ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] - InputAdapter - Exchange [ca_state,wr_returning_customer_sk] #6 - WholeStageCodegen - HashAggregate [ca_state,sum,sum,wr_return_amt,wr_returning_customer_sk] [sum,sum] - Project [ca_state,wr_return_amt,wr_returning_customer_sk] - BroadcastHashJoin [ca_address_sk,wr_returning_addr_sk] - Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] - BroadcastHashJoin [d_date_sk,wr_returned_date_sk] - Project [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] - Filter [wr_returned_date_sk,wr_returning_addr_sk] - Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #2 - InputAdapter - ReusedExchange [ca_address_sk,ca_state] [ca_address_sk,ca_state] #3 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [ca_address_sk] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt deleted file mode 100644 index 991349307..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt +++ /dev/null @@ -1,100 +0,0 @@ -== Physical Plan == -*(25) Sort [ca_county#1 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(ca_county#1 ASC NULLS FIRST, 5) - +- *(24) Project [ca_county#1, d_year#2, CheckOverflow((promote_precision(web_sales#3) / promote_precision(web_sales#4)), DecimalType(37,20)) AS web_q1_q2_increase#5, CheckOverflow((promote_precision(store_sales#6) / promote_precision(store_sales#7)), DecimalType(37,20)) AS store_q1_q2_increase#8, CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) AS web_q2_q3_increase#10, CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) AS store_q2_q3_increase#12] - +- *(24) BroadcastHashJoin [ca_county#13], [ca_county#14], Inner, BuildRight, (CASE WHEN (web_sales#3 > 0.00) THEN CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#6 > 0.00) THEN CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) ELSE null END) - :- *(24) Project [ca_county#1, d_year#2, store_sales#7, store_sales#6, store_sales#11, ca_county#13, web_sales#4, web_sales#3] - : +- *(24) BroadcastHashJoin [ca_county#13], [ca_county#15], Inner, BuildRight, (CASE WHEN (web_sales#4 > 0.00) THEN CheckOverflow((promote_precision(web_sales#3) / promote_precision(web_sales#4)), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#7 > 0.00) THEN CheckOverflow((promote_precision(store_sales#6) / promote_precision(store_sales#7)), DecimalType(37,20)) ELSE null END) - : :- *(24) BroadcastHashJoin [ca_county#1], [ca_county#13], Inner, BuildRight - : : :- *(24) Project [ca_county#1, d_year#2, store_sales#7, store_sales#6, store_sales#11] - : : : +- *(24) BroadcastHashJoin [ca_county#16], [ca_county#17], Inner, BuildRight - : : : :- *(24) BroadcastHashJoin [ca_county#1], [ca_county#16], Inner, BuildRight - : : : : :- *(24) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : : : +- Exchange hashpartitioning(ca_county#1, d_qoy#18, d_year#2, 5) - : : : : : +- *(3) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : : : +- *(3) Project [ss_ext_sales_price#19, d_year#2, d_qoy#18, ca_county#1] - : : : : : +- *(3) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#21], Inner, BuildRight - : : : : : :- *(3) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#2, d_qoy#18] - : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight - : : : : : : :- *(3) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] - : : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) - : : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct - : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- *(1) Project [d_date_sk#23, d_year#2, d_qoy#18] - : : : : : : +- *(1) Filter ((((isnotnull(d_qoy#18) && isnotnull(d_year#2)) && (d_qoy#18 = 1)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#23)) - : : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#23,d_year#2,d_qoy#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(2) Project [ca_address_sk#21, ca_county#1] - : : : : : +- *(2) Filter (isnotnull(ca_address_sk#21) && isnotnull(ca_county#1)) - : : : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#21,ca_county#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : : : +- *(7) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : : +- Exchange hashpartitioning(ca_county#16, d_qoy#24, d_year#25, 5) - : : : : +- *(6) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : : +- *(6) Project [ss_ext_sales_price#19, d_year#25, d_qoy#24, ca_county#16] - : : : : +- *(6) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#26], Inner, BuildRight - : : : : :- *(6) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#25, d_qoy#24] - : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#27], Inner, BuildRight - : : : : : :- *(6) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] - : : : : : : +- *(6) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) - : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(4) Project [d_date_sk#27, d_year#25, d_qoy#24] - : : : : : +- *(4) Filter ((((isnotnull(d_qoy#24) && isnotnull(d_year#25)) && (d_qoy#24 = 2)) && (d_year#25 = 2000)) && isnotnull(d_date_sk#27)) - : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#27,d_year#25,d_qoy#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : : : : +- ReusedExchange [ca_address_sk#26, ca_county#16], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : : +- *(11) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : +- Exchange hashpartitioning(ca_county#17, d_qoy#28, d_year#29, 5) - : : : +- *(10) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : +- *(10) Project [ss_ext_sales_price#19, d_year#29, d_qoy#28, ca_county#17] - : : : +- *(10) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#30], Inner, BuildRight - : : : :- *(10) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#29, d_qoy#28] - : : : : +- *(10) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#31], Inner, BuildRight - : : : : :- *(10) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] - : : : : : +- *(10) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) - : : : : : +- *(10) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(8) Project [d_date_sk#31, d_year#29, d_qoy#28] - : : : : +- *(8) Filter ((((isnotnull(d_qoy#28) && isnotnull(d_year#29)) && (d_qoy#28 = 3)) && (d_year#29 = 2000)) && isnotnull(d_date_sk#31)) - : : : : +- *(8) FileScan parquet default.date_dim[d_date_sk#31,d_year#29,d_qoy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [ca_address_sk#30, ca_county#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : +- *(15) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) - : : +- Exchange hashpartitioning(ca_county#13, d_qoy#32, d_year#33, 5) - : : +- *(14) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) - : : +- *(14) Project [ws_ext_sales_price#34, d_year#33, d_qoy#32, ca_county#13] - : : +- *(14) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#36], Inner, BuildRight - : : :- *(14) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#33, d_qoy#32] - : : : +- *(14) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#38], Inner, BuildRight - : : : :- *(14) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] - : : : : +- *(14) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) - : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#38, d_year#33, d_qoy#32], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [ca_address_sk#36, ca_county#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- *(19) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) - : +- Exchange hashpartitioning(ca_county#15, d_qoy#39, d_year#40, 5) - : +- *(18) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) - : +- *(18) Project [ws_ext_sales_price#34, d_year#40, d_qoy#39, ca_county#15] - : +- *(18) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#41], Inner, BuildRight - : :- *(18) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#40, d_qoy#39] - : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#42], Inner, BuildRight - : : :- *(18) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] - : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) - : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#42, d_year#40, d_qoy#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [ca_address_sk#41, ca_county#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(23) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) - +- Exchange hashpartitioning(ca_county#14, d_qoy#43, d_year#44, 5) - +- *(22) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) - +- *(22) Project [ws_ext_sales_price#34, d_year#44, d_qoy#43, ca_county#14] - +- *(22) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#45], Inner, BuildRight - :- *(22) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#44, d_qoy#43] - : +- *(22) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#46], Inner, BuildRight - : :- *(22) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] - : : +- *(22) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) - : : +- *(22) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#46, d_year#44, d_qoy#43], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [ca_address_sk#45, ca_county#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt deleted file mode 100644 index bf41c1709..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt +++ /dev/null @@ -1,140 +0,0 @@ -WholeStageCodegen - Sort [ca_county] - InputAdapter - Exchange [ca_county] #1 - WholeStageCodegen - Project [ca_county,d_year,store_sales,store_sales,store_sales,web_sales,web_sales,web_sales] - BroadcastHashJoin [ca_county,ca_county,store_sales,store_sales,web_sales,web_sales] - Project [ca_county,ca_county,d_year,store_sales,store_sales,store_sales,web_sales,web_sales] - BroadcastHashJoin [ca_county,ca_county,store_sales,store_sales,web_sales,web_sales] - BroadcastHashJoin [ca_county,ca_county] - Project [ca_county,d_year,store_sales,store_sales,store_sales] - BroadcastHashJoin [ca_county,ca_county] - BroadcastHashJoin [ca_county,ca_county] - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] - InputAdapter - Exchange [ca_county,d_qoy,d_year] #2 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] - Project [ca_county,d_qoy,d_year,ss_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] - Filter [ss_addr_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_qoy,d_year] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [ca_address_sk,ca_county] - Filter [ca_address_sk,ca_county] - Scan parquet default.customer_address [ca_address_sk,ca_county] [ca_address_sk,ca_county] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] - InputAdapter - Exchange [ca_county,d_qoy,d_year] #6 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] - Project [ca_county,d_qoy,d_year,ss_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] - Filter [ss_addr_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [d_date_sk,d_qoy,d_year] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] - InputAdapter - ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] - InputAdapter - Exchange [ca_county,d_qoy,d_year] #9 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] - Project [ca_county,d_qoy,d_year,ss_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] - Filter [ss_addr_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [d_date_sk,d_qoy,d_year] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] - InputAdapter - ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] - InputAdapter - Exchange [ca_county,d_qoy,d_year] #12 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] - Project [ca_county,d_qoy,d_year,ws_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 - InputAdapter - ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 - InputAdapter - BroadcastExchange #13 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] - InputAdapter - Exchange [ca_county,d_qoy,d_year] #14 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] - Project [ca_county,d_qoy,d_year,ws_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #7 - InputAdapter - ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 - InputAdapter - BroadcastExchange #15 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] - InputAdapter - Exchange [ca_county,d_qoy,d_year] #16 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] - Project [ca_county,d_qoy,d_year,ws_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #10 - InputAdapter - ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt deleted file mode 100644 index f677286fe..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt +++ /dev/null @@ -1,30 +0,0 @@ -== Physical Plan == -CollectLimit 100 -+- *(6) Project [1 AS excess discount amount #1] - +- *(6) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight - :- *(6) Project [cs_sold_date_sk#2] - : +- *(6) BroadcastHashJoin [i_item_sk#4], [cs_item_sk#5#6], Inner, BuildRight, (cast(cs_ext_discount_amt#7 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) - : :- *(6) Project [cs_sold_date_sk#2, cs_ext_discount_amt#7, i_item_sk#4] - : : +- *(6) BroadcastHashJoin [cs_item_sk#5], [i_item_sk#4], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] - : : : +- *(6) Filter ((isnotnull(cs_item_sk#5) && isnotnull(cs_ext_discount_amt#7)) && isnotnull(cs_sold_date_sk#2)) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [i_item_sk#4] - : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 977)) && isnotnull(i_item_sk#4)) - : : +- *(1) FileScan parquet default.item[i_item_sk#4,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) - : +- *(4) HashAggregate(keys=[cs_item_sk#5], functions=[avg(UnscaledValue(cs_ext_discount_amt#7))]) - : +- Exchange hashpartitioning(cs_item_sk#5, 5) - : +- *(3) HashAggregate(keys=[cs_item_sk#5], functions=[partial_avg(UnscaledValue(cs_ext_discount_amt#7))]) - : +- *(3) Project [cs_item_sk#5, cs_ext_discount_amt#7] - : +- *(3) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight - : :- *(3) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] - : : +- *(3) Filter (isnotnull(cs_sold_date_sk#2) && isnotnull(cs_item_sk#5)) - : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#3] - : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#3)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#3,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt deleted file mode 100644 index aeba4db83..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt +++ /dev/null @@ -1,39 +0,0 @@ -CollectLimit - WholeStageCodegen - Project - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk] - BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),cs_ext_discount_amt,cs_item_sk,i_item_sk] - Project [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] - Filter [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] - InputAdapter - BroadcastExchange #1 - WholeStageCodegen - Project [i_item_sk] - Filter [i_item_sk,i_manufact_id] - Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [avg(UnscaledValue(cs_ext_discount_amt)),count,cs_item_sk,sum] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(cs_ext_discount_amt)),count,cs_item_sk,sum] - InputAdapter - Exchange [cs_item_sk] #3 - WholeStageCodegen - HashAggregate [count,count,cs_ext_discount_amt,cs_item_sk,sum,sum] [count,count,sum,sum] - Project [cs_ext_discount_amt,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt deleted file mode 100644 index 2989a8b23..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt +++ /dev/null @@ -1,65 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_manufact_id#2,total_sales#1]) -+- *(20) HashAggregate(keys=[i_manufact_id#2], functions=[sum(total_sales#3)]) - +- Exchange hashpartitioning(i_manufact_id#2, 5) - +- *(19) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(total_sales#3)]) - +- Union - :- *(6) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- Exchange hashpartitioning(i_manufact_id#2, 5) - : +- *(5) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- *(5) Project [ss_ext_sales_price#4, i_manufact_id#2] - : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] - : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight - : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#10] - : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 5)) && isnotnull(d_date_sk#10)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [ca_address_sk#8] - : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) BroadcastHashJoin [i_manufact_id#2], [i_manufact_id#2#14], LeftSemi, BuildRight - : :- *(4) Project [i_item_sk#6, i_manufact_id#2] - : : +- *(4) Filter isnotnull(i_item_sk#6) - : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_manufact_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [i_manufact_id#2 AS i_manufact_id#2#14] - : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Electronics)) - : +- *(3) FileScan parquet default.item[i_category#15,i_manufact_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)], ReadSchema: struct - :- *(12) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- Exchange hashpartitioning(i_manufact_id#2, 5) - : +- *(11) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- *(11) Project [cs_ext_sales_price#16, i_manufact_id#2] - : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight - : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] - : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight - : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight - : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(18) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) - +- Exchange hashpartitioning(i_manufact_id#2, 5) - +- *(17) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) - +- *(17) Project [ws_ext_sales_price#20, i_manufact_id#2] - +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight - :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] - : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight - : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight - : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) - : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt deleted file mode 100644 index 28a14a1ea..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt +++ /dev/null @@ -1,91 +0,0 @@ -TakeOrderedAndProject [i_manufact_id,total_sales] - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] - InputAdapter - Exchange [i_manufact_id] #1 - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum,total_sales] [sum,sum] - InputAdapter - Union - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] - InputAdapter - Exchange [i_manufact_id] #2 - WholeStageCodegen - HashAggregate [i_manufact_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_manufact_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [ca_address_sk] - Filter [ca_address_sk,ca_gmt_offset] - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - BroadcastHashJoin [i_manufact_id,i_manufact_id] - Project [i_item_sk,i_manufact_id] - Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [i_manufact_id] - Filter [i_category] - Scan parquet default.item [i_category,i_manufact_id] [i_category,i_manufact_id] - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] - InputAdapter - Exchange [i_manufact_id] #7 - WholeStageCodegen - HashAggregate [cs_ext_sales_price,i_manufact_id,sum,sum] [sum,sum] - Project [cs_ext_sales_price,i_manufact_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk] - BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 - InputAdapter - ReusedExchange [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] #5 - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] - InputAdapter - Exchange [i_manufact_id] #8 - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum,ws_ext_sales_price] [sum,sum] - Project [i_manufact_id,ws_ext_sales_price] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 - InputAdapter - ReusedExchange [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt deleted file mode 100644 index 30008ef45..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt +++ /dev/null @@ -1,34 +0,0 @@ -== Physical Plan == -*(7) Sort [c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST], true, 0 -+- Exchange rangepartitioning(c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST, 5) - +- *(6) Project [c_last_name#1, c_first_name#2, c_salutation#3, c_preferred_cust_flag#4, ss_ticket_number#5, cnt#6] - +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight - :- *(6) Filter ((cnt#6 >= 15) && (cnt#6 <= 20)) - : +- *(6) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[count(1)]) - : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#7, 5) - : +- *(4) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[partial_count(1)]) - : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#5] - : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight - : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_ticket_number#5] - : : +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight - : : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#5] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight - : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#5] - : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#14] - : : : +- *(1) Filter (((((d_dom#15 >= 1) && (d_dom#15 <= 3)) || ((d_dom#15 >= 25) && (d_dom#15 <= 28))) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),Le..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [s_store_sk#12] - : : +- *(2) Filter ((isnotnull(s_county#17) && (s_county#17 = Williamson County)) && isnotnull(s_store_sk#12)) - : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [hd_demo_sk#10] - : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.2)) && isnotnull(hd_demo_sk#10)) - : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [c_customer_sk#8, c_salutation#3, c_first_name#2, c_last_name#1, c_preferred_cust_flag#4] - +- *(5) Filter isnotnull(c_customer_sk#8) - +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#3,c_first_name#2,c_last_name#1,c_preferred_cust_flag#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [ss_customer_sk#29] - : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight - : : : : :- *(2) Project [ss_sold_date_sk#30, ss_customer_sk#29] - : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#30) - : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_customer_sk#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [d_date_sk#31] - : : : : +- *(1) Filter ((((isnotnull(d_year#32) && isnotnull(d_qoy#33)) && (d_year#32 = 2002)) && (d_qoy#33 < 4)) && isnotnull(d_date_sk#31)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#31,d_year#32,d_qoy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [ws_bill_customer_sk#28] - : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#34], [d_date_sk#31], Inner, BuildRight - : : : :- *(4) Project [ws_sold_date_sk#34, ws_bill_customer_sk#28] - : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#34) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#34,ws_bill_customer_sk#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [cs_ship_customer_sk#27] - : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#35], [d_date_sk#31], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#35, cs_ship_customer_sk#27] - : : : +- *(6) Filter isnotnull(cs_sold_date_sk#35) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#35,cs_ship_customer_sk#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [ca_address_sk#23, ca_state#1] - : +- *(7) Filter isnotnull(ca_address_sk#23) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#23,ca_state#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [cd_demo_sk#21, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6] - +- *(8) Filter isnotnull(cd_demo_sk#21) - +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_gender#2,cd_marital_status#3,cd_dep_count#19,cd_dep_employed_count#5,cd_dep_college_count#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [i_item_sk#19, i_class#15, i_category#14] - : +- *(2) Filter isnotnull(i_item_sk#19) - : +- *(2) FileScan parquet default.item[i_item_sk#19,i_class#15,i_category#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [s_store_sk#17] - +- *(3) Filter ((isnotnull(s_state#23) && (s_state#23 = TN)) && isnotnull(s_store_sk#17)) - +- *(3) FileScan parquet default.store[s_store_sk#17,s_state#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt deleted file mode 100644 index 8d87de2ff..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt +++ /dev/null @@ -1,43 +0,0 @@ -TakeOrderedAndProject [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] - WholeStageCodegen - Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] - InputAdapter - Window [_w1,_w2,_w3] - WholeStageCodegen - Sort [_w1,_w2,_w3] - InputAdapter - Exchange [_w1,_w2] #1 - WholeStageCodegen - HashAggregate [i_category,i_class,spark_grouping_id,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] [_w1,_w2,_w3,gross_margin,lochierarchy,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] - InputAdapter - Exchange [i_category,i_class,spark_grouping_id] #2 - WholeStageCodegen - HashAggregate [i_category,i_class,spark_grouping_id,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] - Expand [i_category,i_class,ss_ext_sales_price,ss_net_profit] - Project [i_category,i_class,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [i_category,i_class,ss_ext_sales_price,ss_net_profit,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [i_category,i_class,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [s_store_sk] - Filter [s_state,s_store_sk] - Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt deleted file mode 100644 index 9ffa2c5ee..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt +++ /dev/null @@ -1,26 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,i_current_price#3]) -+- *(5) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, i_current_price#3, 5) - +- *(4) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) - +- *(4) Project [i_item_id#1, i_item_desc#2, i_current_price#3] - +- *(4) BroadcastHashJoin [i_item_sk#4], [cs_item_sk#5], Inner, BuildRight - :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] - : +- *(4) BroadcastHashJoin [inv_date_sk#6], [d_date_sk#7], Inner, BuildRight - : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3, inv_date_sk#6] - : : +- *(4) BroadcastHashJoin [i_item_sk#4], [inv_item_sk#8], Inner, BuildRight - : : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] - : : : +- *(4) Filter ((((isnotnull(i_current_price#3) && (i_current_price#3 >= 68.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 98.00)) && i_manufact_id#9 IN (677,940,694,808)) && isnotnull(i_item_sk#4)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), In(i_manufact_id, [677,94..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) - : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#7] - : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 10988)) && (d_date#11 <= 11048)) && isnotnull(d_date_sk#7)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), Is..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [cs_item_sk#5] - +- *(3) Filter isnotnull(cs_item_sk#5) - +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt deleted file mode 100644 index 23745be9e..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt +++ /dev/null @@ -1,34 +0,0 @@ -TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] - WholeStageCodegen - HashAggregate [i_current_price,i_item_desc,i_item_id] - InputAdapter - Exchange [i_current_price,i_item_desc,i_item_id] #1 - WholeStageCodegen - HashAggregate [i_current_price,i_item_desc,i_item_id] - Project [i_current_price,i_item_desc,i_item_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk] - Filter [i_current_price,i_item_sk,i_manufact_id] - Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [inv_date_sk,inv_item_sk] - Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [cs_item_sk] - Filter [cs_item_sk] - Scan parquet default.catalog_sales [cs_item_sk] [cs_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt deleted file mode 100644 index 8072eda0e..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt +++ /dev/null @@ -1,55 +0,0 @@ -== Physical Plan == -CollectLimit 100 -+- *(13) HashAggregate(keys=[], functions=[count(1)]) - +- Exchange SinglePartition - +- *(12) HashAggregate(keys=[], functions=[partial_count(1)]) - +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#4, ), coalesce(c_first_name#5, ), coalesce(d_date#6, 0)], LeftSemi, BuildRight, (((c_last_name#1 <=> c_last_name#4) && (c_first_name#2 <=> c_first_name#5)) && (d_date#3 <=> d_date#6)) - :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftSemi, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) - : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 5) - : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] - : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight - : : :- *(3) Project [ss_customer_sk#10, d_date#3] - : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] - : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#13, d_date#3] - : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] - : : +- *(2) Filter isnotnull(c_customer_sk#11) - : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) - : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) - : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 5) - : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) - : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] - : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight - : :- *(6) Project [cs_bill_customer_sk#15, d_date#9] - : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] - : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) - +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) - +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 5) - +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) - +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] - +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - :- *(10) Project [ws_bill_customer_sk#19, d_date#6] - : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight - : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] - : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) - : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt deleted file mode 100644 index c7e30522f..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt +++ /dev/null @@ -1,75 +0,0 @@ -CollectLimit - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] - HashAggregate [c_first_name,c_last_name,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #2 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [d_date,ss_customer_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #6 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_customer_sk,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #8 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [d_date,ws_bill_customer_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_customer_sk,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt deleted file mode 100644 index 5455e1849..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt +++ /dev/null @@ -1,51 +0,0 @@ -== Physical Plan == -*(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 5) - +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight - :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] - : +- *(10) Filter (CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) - : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 5) - : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) - : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] - : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight - : :- *(4) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12] - : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#1], Inner, BuildRight - : : :- *(4) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#2] - : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight - : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] - : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [i_item_sk#2] - : : : +- *(1) Filter isnotnull(i_item_sk#2) - : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] - : : +- *(2) Filter isnotnull(w_warehouse_sk#1) - : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [d_date_sk#15, d_moy#3] - : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) - : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) - +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] - +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) - +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 5) - +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) - +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] - +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight - :- *(8) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20] - : +- *(8) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#10], Inner, BuildRight - : :- *(8) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#9] - : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight - : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] - : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct - : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [d_date_sk#21, d_moy#6] - +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) - +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt deleted file mode 100644 index d80f4a4ee..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt +++ /dev/null @@ -1,69 +0,0 @@ -WholeStageCodegen - Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] - InputAdapter - Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 - WholeStageCodegen - BroadcastHashJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] - Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] - Filter [mean,stdev] - HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] - InputAdapter - Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #2 - WholeStageCodegen - HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] - Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_sk] [i_item_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk,d_moy] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] - Filter [mean,stdev] - HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] - InputAdapter - Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #7 - WholeStageCodegen - HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] - Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - InputAdapter - ReusedExchange [i_item_sk] [i_item_sk] #3 - InputAdapter - ReusedExchange [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] #4 - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [d_date_sk,d_moy] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt deleted file mode 100644 index 3c845e77b..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt +++ /dev/null @@ -1,51 +0,0 @@ -== Physical Plan == -*(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 5) - +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight - :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] - : +- *(10) Filter ((CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) && (CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END > 1.5)) - : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 5) - : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) - : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] - : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight - : :- *(4) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12] - : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#1], Inner, BuildRight - : : :- *(4) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#2] - : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight - : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] - : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [i_item_sk#2] - : : : +- *(1) Filter isnotnull(i_item_sk#2) - : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] - : : +- *(2) Filter isnotnull(w_warehouse_sk#1) - : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [d_date_sk#15, d_moy#3] - : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) - : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) - +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] - +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) - +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 5) - +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) - +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] - +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight - :- *(8) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20] - : +- *(8) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#10], Inner, BuildRight - : :- *(8) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#9] - : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight - : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] - : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct - : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [d_date_sk#21, d_moy#6] - +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) - +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt deleted file mode 100644 index d80f4a4ee..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt +++ /dev/null @@ -1,69 +0,0 @@ -WholeStageCodegen - Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] - InputAdapter - Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 - WholeStageCodegen - BroadcastHashJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] - Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] - Filter [mean,stdev] - HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] - InputAdapter - Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #2 - WholeStageCodegen - HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] - Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_sk] [i_item_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk,d_moy] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] - Filter [mean,stdev] - HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] - InputAdapter - Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #7 - WholeStageCodegen - HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] - Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - InputAdapter - ReusedExchange [i_item_sk] [i_item_sk] #3 - InputAdapter - ReusedExchange [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] #4 - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [d_date_sk,d_moy] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt deleted file mode 100644 index 16fa8a95c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt +++ /dev/null @@ -1,124 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer_first_name#2 ASC NULLS FIRST,customer_last_name#3 ASC NULLS FIRST,customer_preferred_cust_flag#4 ASC NULLS FIRST,customer_birth_country#5 ASC NULLS FIRST,customer_login#6 ASC NULLS FIRST,customer_email_address#7 ASC NULLS FIRST], output=[customer_id#1,customer_first_name#2,customer_last_name#3,customer_preferred_cust_flag#4,customer_birth_country#5,customer_login#6,customer_email_address#7]) -+- *(25) Project [customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7] - +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#9], Inner, BuildRight, (CASE WHEN (year_total#10 > 0.000000) THEN CheckOverflow((promote_precision(year_total#11) / promote_precision(year_total#10)), DecimalType(38,14)) ELSE null END > CASE WHEN (year_total#12 > 0.000000) THEN CheckOverflow((promote_precision(year_total#13) / promote_precision(year_total#12)), DecimalType(38,14)) ELSE null END) - :- *(25) Project [customer_id#8, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#10, year_total#11, year_total#12] - : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#14], Inner, BuildRight - : :- *(25) Project [customer_id#8, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#10, year_total#11] - : : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#15], Inner, BuildRight, (CASE WHEN (year_total#10 > 0.000000) THEN CheckOverflow((promote_precision(year_total#11) / promote_precision(year_total#10)), DecimalType(38,14)) ELSE null END > CASE WHEN (year_total#16 > 0.000000) THEN CheckOverflow((promote_precision(year_total#17) / promote_precision(year_total#16)), DecimalType(38,14)) ELSE null END) - : : :- *(25) Project [customer_id#8, year_total#16, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#17, year_total#10] - : : : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#18], Inner, BuildRight - : : : :- *(25) BroadcastHashJoin [customer_id#8], [customer_id#1], Inner, BuildRight - : : : : :- Union - : : : : : :- *(4) Filter (isnotnull(year_total#16) && (year_total#16 > 0.000000)) - : : : : : : +- *(4) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - : : : : : : +- *(3) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : : : +- *(3) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] - : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight - : : : : : : :- *(3) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_sold_date_sk#31, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27] - : : : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight - : : : : : : : :- *(3) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : : : : :- LocalTableScan , [customer_id#35, year_total#36] - : : : : : +- LocalTableScan , [customer_id#37, year_total#38] - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : : : +- Union - : : : : :- *(8) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - : : : : : +- *(7) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : : +- *(7) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] - : : : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight - : : : : : :- *(7) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_sold_date_sk#31, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27] - : : : : : : +- *(7) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight - : : : : : : :- *(7) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : : : : : : +- *(7) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : : : :- LocalTableScan , [customer_id#35, customer_first_name#39, customer_last_name#40, customer_preferred_cust_flag#41, customer_birth_country#42, customer_login#43, customer_email_address#44, year_total#36] - : : : : +- LocalTableScan , [customer_id#37, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#38] - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : : +- Union - : : : :- LocalTableScan , [customer_id#18, year_total#10] - : : : :- *(12) Filter (isnotnull(year_total#36) && (year_total#36 > 0.000000)) - : : : : +- *(12) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - : : : : +- *(11) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : +- *(11) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] - : : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight - : : : : :- *(11) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_sold_date_sk#55, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51] - : : : : : +- *(11) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight - : : : : : :- *(11) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : : : : : +- *(11) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : : +- *(11) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : +- Union - : : :- LocalTableScan , [customer_id#15, year_total#11] - : : :- *(16) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - : : : +- *(15) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : +- *(15) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] - : : : +- *(15) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight - : : : :- *(15) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_sold_date_sk#55, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51] - : : : : +- *(15) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight - : : : : :- *(15) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : : : : +- *(15) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : +- *(15) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- Union - : :- LocalTableScan , [customer_id#14, year_total#12] - : :- LocalTableScan , [customer_id#35, year_total#36] - : +- *(20) Filter (isnotnull(year_total#38) && (year_total#38 > 0.000000)) - : +- *(20) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - : +- *(19) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : +- *(19) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] - : +- *(19) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight - : :- *(19) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_sold_date_sk#61, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57] - : : +- *(19) BroadcastHashJoin [c_customer_sk#33], [ws_bill_customer_sk#62], Inner, BuildRight - : : :- *(19) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : : +- *(19) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : +- *(19) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#9, year_total#13] - :- LocalTableScan , [customer_id#35, year_total#36] - +- *(24) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - +- *(23) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - +- *(23) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] - +- *(23) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight - :- *(23) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_sold_date_sk#61, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57] - : +- *(23) BroadcastHashJoin [c_customer_sk#33], [ws_bill_customer_sk#62], Inner, BuildRight - : :- *(23) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : +- *(23) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : +- *(23) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) - +- Exchange hashpartitioning(w_state#1, i_item_id#2, 5) - +- *(5) HashAggregate(keys=[w_state#1, i_item_id#2], functions=[partial_sum(CASE WHEN (d_date#5 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#5 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) - +- *(5) Project [cs_sales_price#6, cr_refunded_cash#7, w_state#1, i_item_id#2, d_date#5] - +- *(5) BroadcastHashJoin [cs_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight - :- *(5) Project [cs_sold_date_sk#8, cs_sales_price#6, cr_refunded_cash#7, w_state#1, i_item_id#2] - : +- *(5) BroadcastHashJoin [cs_item_sk#10], [i_item_sk#11], Inner, BuildRight - : :- *(5) Project [cs_sold_date_sk#8, cs_item_sk#10, cs_sales_price#6, cr_refunded_cash#7, w_state#1] - : : +- *(5) BroadcastHashJoin [cs_warehouse_sk#12], [w_warehouse_sk#13], Inner, BuildRight - : : :- *(5) Project [cs_sold_date_sk#8, cs_warehouse_sk#12, cs_item_sk#10, cs_sales_price#6, cr_refunded_cash#7] - : : : +- *(5) BroadcastHashJoin [cs_order_number#14, cs_item_sk#10], [cr_order_number#15, cr_item_sk#16], LeftOuter, BuildRight - : : : :- *(5) Project [cs_sold_date_sk#8, cs_warehouse_sk#12, cs_item_sk#10, cs_order_number#14, cs_sales_price#6] - : : : : +- *(5) Filter ((isnotnull(cs_warehouse_sk#12) && isnotnull(cs_item_sk#10)) && isnotnull(cs_sold_date_sk#8)) - : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#8,cs_warehouse_sk#12,cs_item_sk#10,cs_order_number#14,cs_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [w_warehouse_sk#13, w_state#1] - : : +- *(2) Filter isnotnull(w_warehouse_sk#13) - : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#13,w_state#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [i_item_sk#11, i_item_id#2] - : +- *(3) Filter (((isnotnull(i_current_price#17) && (i_current_price#17 >= 0.99)) && (i_current_price#17 <= 1.49)) && isnotnull(i_item_sk#11)) - : +- *(3) FileScan parquet default.item[i_item_sk#11,i_item_id#2,i_current_price#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [d_date_sk#9, d_date#5] - +- *(4) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#9)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#9,d_date#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt deleted file mode 100644 index fcc311d0d..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt +++ /dev/null @@ -1,42 +0,0 @@ -TakeOrderedAndProject [i_item_id,sales_after,sales_before,w_state] - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),w_state] [sales_after,sales_before,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] - InputAdapter - Exchange [i_item_id,w_state] #1 - WholeStageCodegen - HashAggregate [cr_refunded_cash,cs_sales_price,d_date,i_item_id,sum,sum,sum,sum,w_state] [sum,sum,sum,sum] - Project [cr_refunded_cash,cs_sales_price,d_date,i_item_id,w_state] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cr_refunded_cash,cs_sales_price,cs_sold_date_sk,i_item_id,w_state] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cr_refunded_cash,cs_item_sk,cs_sales_price,cs_sold_date_sk,w_state] - BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cr_refunded_cash,cs_item_sk,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] - Filter [cs_item_sk,cs_sold_date_sk,cs_warehouse_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_refunded_cash] - Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash] [cr_item_sk,cr_order_number,cr_refunded_cash] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [w_state,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_state,w_warehouse_sk] [w_state,w_warehouse_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_current_price,i_item_id,i_item_sk] [i_current_price,i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt deleted file mode 100644 index 31bc2e514..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt +++ /dev/null @@ -1,19 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_product_name#1 ASC NULLS FIRST], output=[i_product_name#1]) -+- *(4) HashAggregate(keys=[i_product_name#1], functions=[]) - +- Exchange hashpartitioning(i_product_name#1, 5) - +- *(3) HashAggregate(keys=[i_product_name#1], functions=[]) - +- *(3) Project [i_product_name#1] - +- *(3) BroadcastHashJoin [i_manufact#2], [i_manufact#2#3], Inner, BuildRight - :- *(3) Project [i_manufact#2, i_product_name#1] - : +- *(3) Filter (((isnotnull(i_manufact_id#4) && (i_manufact_id#4 >= 738)) && (i_manufact_id#4 <= 778)) && isnotnull(i_manufact#2)) - : +- *(3) FileScan parquet default.item[i_manufact_id#4,i_manufact#2,i_product_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,7..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(2) Project [i_manufact#2#3] - +- *(2) Filter (if (isnull(alwaysTrue#5)) 0 else item_cnt#6 > 0) - +- *(2) HashAggregate(keys=[i_manufact#2], functions=[count(1)]) - +- Exchange hashpartitioning(i_manufact#2, 5) - +- *(1) HashAggregate(keys=[i_manufact#2], functions=[partial_count(1)]) - +- *(1) Project [i_manufact#2] - +- *(1) Filter (((((i_category#7 = Women) && (((((i_color#8 = powder) || (i_color#8 = khaki)) && ((i_units#9 = Ounce) || (i_units#9 = Oz))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = brown) || (i_color#8 = honeydew)) && ((i_units#9 = Bunch) || (i_units#9 = Ton))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = floral) || (i_color#8 = deep)) && ((i_units#9 = N/A) || (i_units#9 = Dozen))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = light) || (i_color#8 = cornflower)) && ((i_units#9 = Box) || (i_units#9 = Pound))) && ((i_size#10 = medium) || (i_size#10 = extra large)))))) || (((i_category#7 = Women) && (((((i_color#8 = midnight) || (i_color#8 = snow)) && ((i_units#9 = Pallet) || (i_units#9 = Gross))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = cyan) || (i_color#8 = papaya)) && ((i_units#9 = Cup) || (i_units#9 = Dram))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = orange) || (i_color#8 = frosted)) && ((i_units#9 = Each) || (i_units#9 = Tbl))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = forest) || (i_color#8 = ghost)) && ((i_units#9 = Lb) || (i_units#9 = Bundle))) && ((i_size#10 = medium) || (i_size#10 = extra large))))))) && isnotnull(i_manufact#2)) - +- *(1) FileScan parquet default.item[i_category#7,i_manufact#2,i_size#10,i_color#8,i_units#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,powder),EqualTo(i_color,khaki)..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt deleted file mode 100644 index 7196f719a..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt +++ /dev/null @@ -1,25 +0,0 @@ -TakeOrderedAndProject [i_product_name] - WholeStageCodegen - HashAggregate [i_product_name] - InputAdapter - Exchange [i_product_name] #1 - WholeStageCodegen - HashAggregate [i_product_name] - Project [i_product_name] - BroadcastHashJoin [i_manufact,i_manufact] - Project [i_manufact,i_product_name] - Filter [i_manufact,i_manufact_id] - Scan parquet default.item [i_manufact,i_manufact_id,i_product_name] [i_manufact,i_manufact_id,i_product_name] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [i_manufact] - Filter [alwaysTrue,item_cnt] - HashAggregate [count,count(1),i_manufact] [alwaysTrue,count,count(1),i_manufact,item_cnt] - InputAdapter - Exchange [i_manufact] #3 - WholeStageCodegen - HashAggregate [count,count,i_manufact] [count,count] - Project [i_manufact] - Filter [i_category,i_color,i_manufact,i_size,i_units] - Scan parquet default.item [i_category,i_color,i_manufact,i_size,i_units] [i_category,i_color,i_manufact,i_size,i_units] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt deleted file mode 100644 index 1e560817c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt +++ /dev/null @@ -1,20 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[sum(ss_ext_sales_price)#1 DESC NULLS LAST,d_year#2 ASC NULLS FIRST,i_category_id#3 ASC NULLS FIRST,i_category#4 ASC NULLS FIRST], output=[d_year#2,i_category_id#3,i_category#4,sum(ss_ext_sales_price)#1]) -+- *(4) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) - +- Exchange hashpartitioning(d_year#2, i_category_id#3, i_category#4, 5) - +- *(3) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) - +- *(3) Project [d_year#2, ss_ext_sales_price#5, i_category_id#3, i_category#4] - +- *(3) BroadcastHashJoin [ss_item_sk#6], [i_item_sk#7], Inner, BuildRight - :- *(3) Project [d_year#2, ss_item_sk#6, ss_ext_sales_price#5] - : +- *(3) BroadcastHashJoin [d_date_sk#8], [ss_sold_date_sk#9], Inner, BuildRight - : :- *(3) Project [d_date_sk#8, d_year#2] - : : +- *(3) Filter ((((isnotnull(d_moy#10) && isnotnull(d_year#2)) && (d_moy#10 = 11)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#8)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_year#2,d_moy#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#6, ss_ext_sales_price#5] - : +- *(1) Filter (isnotnull(ss_sold_date_sk#9) && isnotnull(ss_item_sk#6)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#6,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [i_item_sk#7, i_category_id#3, i_category#4] - +- *(2) Filter ((isnotnull(i_manager_id#11) && (i_manager_id#11 = 1)) && isnotnull(i_item_sk#7)) - +- *(2) FileScan parquet default.item[i_item_sk#7,i_category_id#3,i_category#4,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt deleted file mode 100644 index 67de156ac..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt +++ /dev/null @@ -1,26 +0,0 @@ -TakeOrderedAndProject [d_year,i_category,i_category_id,sum(ss_ext_sales_price)] - WholeStageCodegen - HashAggregate [d_year,i_category,i_category_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price)] - InputAdapter - Exchange [d_year,i_category,i_category_id] #1 - WholeStageCodegen - HashAggregate [d_year,i_category,i_category_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [d_year,i_category,i_category_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [d_year,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [d_date_sk,d_year] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_category,i_category_id,i_item_sk] - Filter [i_item_sk,i_manager_id] - Scan parquet default.item [i_category,i_category_id,i_item_sk,i_manager_id] [i_category,i_category_id,i_item_sk,i_manager_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt deleted file mode 100644 index b68348e44..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt +++ /dev/null @@ -1,20 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_store_id#2 ASC NULLS FIRST,sun_sales#3 ASC NULLS FIRST,mon_sales#4 ASC NULLS FIRST,tue_sales#5 ASC NULLS FIRST,wed_sales#6 ASC NULLS FIRST,thu_sales#7 ASC NULLS FIRST,fri_sales#8 ASC NULLS FIRST,sat_sales#9 ASC NULLS FIRST], output=[s_store_name#1,s_store_id#2,sun_sales#3,mon_sales#4,tue_sales#5,wed_sales#6,thu_sales#7,fri_sales#8,sat_sales#9]) -+- *(4) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) - +- Exchange hashpartitioning(s_store_name#1, s_store_id#2, 5) - +- *(3) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) - +- *(3) Project [d_day_name#10, ss_sales_price#11, s_store_id#2, s_store_name#1] - +- *(3) BroadcastHashJoin [ss_store_sk#12], [s_store_sk#13], Inner, BuildRight - :- *(3) Project [d_day_name#10, ss_store_sk#12, ss_sales_price#11] - : +- *(3) BroadcastHashJoin [d_date_sk#14], [ss_sold_date_sk#15], Inner, BuildRight - : :- *(3) Project [d_date_sk#14, d_day_name#10] - : : +- *(3) Filter ((isnotnull(d_year#16) && (d_year#16 = 2000)) && isnotnull(d_date_sk#14)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_day_name#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [ss_sold_date_sk#15, ss_store_sk#12, ss_sales_price#11] - : +- *(1) Filter (isnotnull(ss_sold_date_sk#15) && isnotnull(ss_store_sk#12)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#15,ss_store_sk#12,ss_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [s_store_sk#13, s_store_id#2, s_store_name#1] - +- *(2) Filter ((isnotnull(s_gmt_offset#17) && (s_gmt_offset#17 = -5.00)) && isnotnull(s_store_sk#13)) - +- *(2) FileScan parquet default.store[s_store_sk#13,s_store_id#2,s_store_name#1,s_gmt_offset#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt deleted file mode 100644 index 136a277e1..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt +++ /dev/null @@ -1,26 +0,0 @@ -TakeOrderedAndProject [fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] - WholeStageCodegen - HashAggregate [s_store_id,s_store_name,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] - InputAdapter - Exchange [s_store_id,s_store_name] #1 - WholeStageCodegen - HashAggregate [d_day_name,s_store_id,s_store_name,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_day_name,s_store_id,s_store_name,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_day_name,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [d_date_sk,d_day_name] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_day_name,d_year] [d_date_sk,d_day_name,d_year] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [s_store_id,s_store_name,s_store_sk] - Filter [s_gmt_offset,s_store_sk] - Scan parquet default.store [s_gmt_offset,s_store_id,s_store_name,s_store_sk] [s_gmt_offset,s_store_id,s_store_name,s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt deleted file mode 100644 index 99c27e16a..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt +++ /dev/null @@ -1,50 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1,best_performing#2,worst_performing#3]) -+- *(10) Project [rnk#1, i_product_name#4 AS best_performing#2, i_product_name#5 AS worst_performing#3] - +- *(10) BroadcastHashJoin [item_sk#6], [i_item_sk#7], Inner, BuildRight - :- *(10) Project [rnk#1, item_sk#6, i_product_name#4] - : +- *(10) BroadcastHashJoin [item_sk#8], [i_item_sk#9], Inner, BuildRight - : :- *(10) Project [item_sk#8, rnk#1, item_sk#6] - : : +- *(10) BroadcastHashJoin [rnk#1], [rnk#10], Inner, BuildRight - : : :- *(10) Project [item_sk#8, rnk#1] - : : : +- *(10) Filter ((isnotnull(rnk#1) && (rnk#1 < 11)) && isnotnull(item_sk#8)) - : : : +- Window [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#1], [rank_col#11 ASC NULLS FIRST] - : : : +- *(3) Sort [rank_col#11 ASC NULLS FIRST], false, 0 - : : : +- Exchange SinglePartition - : : : +- *(2) Project [item_sk#8, rank_col#11] - : : : +- *(2) Filter (isnotnull(avg(ss_net_profit#12)#13) && (cast(avg(ss_net_profit#12)#13 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7089)), DecimalType(13,7)))) - : : : : +- Subquery subquery7089 - : : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) - : : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) - : : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] - : : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct - : : : +- *(2) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : : +- Exchange hashpartitioning(ss_item_sk#16, 5) - : : : +- *(1) HashAggregate(keys=[ss_item_sk#16], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) - : : : +- *(1) Project [ss_item_sk#16, ss_net_profit#12] - : : : +- *(1) Filter (isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) - : : : +- *(1) FileScan parquet default.store_sales[ss_item_sk#16,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- *(7) Project [item_sk#6, rnk#10] - : : +- *(7) Filter ((isnotnull(rnk#10) && (rnk#10 < 11)) && isnotnull(item_sk#6)) - : : +- Window [rank(rank_col#17) windowspecdefinition(rank_col#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#10], [rank_col#17 DESC NULLS LAST] - : : +- *(6) Sort [rank_col#17 DESC NULLS LAST], false, 0 - : : +- Exchange SinglePartition - : : +- *(5) Project [item_sk#6, rank_col#17] - : : +- *(5) Filter (isnotnull(avg(ss_net_profit#12)#18) && (cast(avg(ss_net_profit#12)#18 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7094)), DecimalType(13,7)))) - : : : +- Subquery subquery7094 - : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) - : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) - : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] - : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) - : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct - : : +- *(5) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : +- ReusedExchange [ss_item_sk#16, sum#19, count#20], Exchange hashpartitioning(ss_item_sk#16, 5) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(8) Project [i_item_sk#9, i_product_name#4] - : +- *(8) Filter isnotnull(i_item_sk#9) - : +- *(8) FileScan parquet default.item[i_item_sk#9,i_product_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- ReusedExchange [i_item_sk#7, i_product_name#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt deleted file mode 100644 index 139fd1f7b..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt +++ /dev/null @@ -1,72 +0,0 @@ -TakeOrderedAndProject [best_performing,rnk,worst_performing] - WholeStageCodegen - Project [i_product_name,i_product_name,rnk] - BroadcastHashJoin [i_item_sk,item_sk] - Project [i_product_name,item_sk,rnk] - BroadcastHashJoin [i_item_sk,item_sk] - Project [item_sk,item_sk,rnk] - BroadcastHashJoin [rnk,rnk] - Project [item_sk,rnk] - Filter [item_sk,rnk] - InputAdapter - Window [rank_col] - WholeStageCodegen - Sort [rank_col] - InputAdapter - Exchange #1 - WholeStageCodegen - Project [item_sk,rank_col] - Filter [avg(ss_net_profit)] - Subquery #1 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_store_sk,sum] [avg(UnscaledValue(ss_net_profit)),count,rank_col,sum] - InputAdapter - Exchange [ss_store_sk] #3 - WholeStageCodegen - HashAggregate [count,count,ss_net_profit,ss_store_sk,sum,sum] [count,count,sum,sum] - Project [ss_net_profit,ss_store_sk] - Filter [ss_addr_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_net_profit,ss_store_sk] [ss_addr_sk,ss_net_profit,ss_store_sk] - HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] - InputAdapter - Exchange [ss_item_sk] #2 - WholeStageCodegen - HashAggregate [count,count,ss_item_sk,ss_net_profit,sum,sum] [count,count,sum,sum] - Project [ss_item_sk,ss_net_profit] - Filter [ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_net_profit,ss_store_sk] [ss_item_sk,ss_net_profit,ss_store_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [item_sk,rnk] - Filter [item_sk,rnk] - InputAdapter - Window [rank_col] - WholeStageCodegen - Sort [rank_col] - InputAdapter - Exchange #5 - WholeStageCodegen - Project [item_sk,rank_col] - Filter [avg(ss_net_profit)] - Subquery #2 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_store_sk,sum] [avg(UnscaledValue(ss_net_profit)),count,rank_col,sum] - InputAdapter - Exchange [ss_store_sk] #6 - WholeStageCodegen - HashAggregate [count,count,ss_net_profit,ss_store_sk,sum,sum] [count,count,sum,sum] - Project [ss_net_profit,ss_store_sk] - Filter [ss_addr_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_net_profit,ss_store_sk] [ss_addr_sk,ss_net_profit,ss_store_sk] - HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] - InputAdapter - ReusedExchange [count,ss_item_sk,sum] [count,ss_item_sk,sum] #2 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [i_item_sk,i_product_name] - Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_product_name] [i_item_sk,i_product_name] - InputAdapter - ReusedExchange [i_item_sk,i_product_name] [i_item_sk,i_product_name] #7 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt deleted file mode 100644 index ad97e7d1a..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt +++ /dev/null @@ -1,39 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST,ca_city#2 ASC NULLS FIRST], output=[ca_zip#1,ca_city#2,sum(ws_sales_price)#3]) -+- *(7) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[sum(UnscaledValue(ws_sales_price#4))]) - +- Exchange hashpartitioning(ca_zip#1, ca_city#2, 5) - +- *(6) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[partial_sum(UnscaledValue(ws_sales_price#4))]) - +- *(6) Project [ws_sales_price#4, ca_city#2, ca_zip#1] - +- *(6) Filter (substring(ca_zip#1, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) || exists#5) - +- *(6) BroadcastHashJoin [i_item_id#6], [i_item_id#6#7], ExistenceJoin(exists#5), BuildRight - :- *(6) Project [ws_sales_price#4, ca_city#2, ca_zip#1, i_item_id#6] - : +- *(6) BroadcastHashJoin [ws_item_sk#8], [i_item_sk#9], Inner, BuildRight - : :- *(6) Project [ws_item_sk#8, ws_sales_price#4, ca_city#2, ca_zip#1] - : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_sales_price#4, ca_city#2, ca_zip#1] - : : : +- *(6) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight - : : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_sales_price#4, c_current_addr_sk#12] - : : : : +- *(6) BroadcastHashJoin [ws_bill_customer_sk#14], [c_customer_sk#15], Inner, BuildRight - : : : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_bill_customer_sk#14, ws_sales_price#4] - : : : : : +- *(6) Filter ((isnotnull(ws_bill_customer_sk#14) && isnotnull(ws_sold_date_sk#10)) && isnotnull(ws_item_sk#8)) - : : : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#10,ws_item_sk#8,ws_bill_customer_sk#14,ws_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [c_customer_sk#15, c_current_addr_sk#12] - : : : : +- *(1) Filter (isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) - : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#15,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [ca_address_sk#13, ca_city#2, ca_zip#1] - : : : +- *(2) Filter isnotnull(ca_address_sk#13) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#13,ca_city#2,ca_zip#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#11] - : : +- *(3) Filter ((((isnotnull(d_qoy#16) && isnotnull(d_year#17)) && (d_qoy#16 = 2)) && (d_year#17 = 2001)) && isnotnull(d_date_sk#11)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#11,d_year#17,d_qoy#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [i_item_sk#9, i_item_id#6] - : +- *(4) Filter isnotnull(i_item_sk#9) - : +- *(4) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(5) Project [i_item_id#6 AS i_item_id#6#7] - +- *(5) Filter i_item_sk#9 IN (2,3,5,7,11,13,17,19,23,29) - +- *(5) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_item_sk, [2,3,5,7,11,13,17,19,23,29])], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt deleted file mode 100644 index 72b6b03ea..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt +++ /dev/null @@ -1,51 +0,0 @@ -TakeOrderedAndProject [ca_city,ca_zip,sum(ws_sales_price)] - WholeStageCodegen - HashAggregate [ca_city,ca_zip,sum,sum(UnscaledValue(ws_sales_price))] [sum,sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price)] - InputAdapter - Exchange [ca_city,ca_zip] #1 - WholeStageCodegen - HashAggregate [ca_city,ca_zip,sum,sum,ws_sales_price] [sum,sum] - Project [ca_city,ca_zip,ws_sales_price] - Filter [ca_zip,exists] - BroadcastHashJoin [i_item_id,i_item_id] - Project [ca_city,ca_zip,i_item_id,ws_sales_price] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ca_city,ca_zip,ws_item_sk,ws_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ca_city,ca_zip,ws_item_sk,ws_sales_price,ws_sold_date_sk] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ca_address_sk,ca_city,ca_zip] - Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip] [ca_address_sk,ca_city,ca_zip] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [i_item_id] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt deleted file mode 100644 index a5d4817de..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt +++ /dev/null @@ -1,41 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,ca_city#3 ASC NULLS FIRST,bought_city#4 ASC NULLS FIRST,ss_ticket_number#5 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,ca_city#3,bought_city#4,ss_ticket_number#5,amt#6,profit#7]) -+- *(8) Project [c_last_name#1, c_first_name#2, ca_city#3, bought_city#4, ss_ticket_number#5, amt#6, profit#7] - +- *(8) BroadcastHashJoin [c_current_addr_sk#8], [ca_address_sk#9], Inner, BuildRight, NOT (ca_city#3 = bought_city#4) - :- *(8) Project [ss_ticket_number#5, bought_city#4, amt#6, profit#7, c_current_addr_sk#8, c_first_name#2, c_last_name#1] - : +- *(8) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight - : :- *(8) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#13)), sum(UnscaledValue(ss_net_profit#14))]) - : : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3, 5) - : : +- *(5) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#13)), partial_sum(UnscaledValue(ss_net_profit#14))]) - : : +- *(5) Project [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14, ca_city#3] - : : +- *(5) BroadcastHashJoin [ss_addr_sk#12], [ca_address_sk#9], Inner, BuildRight - : : :- *(5) Project [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] - : : : +- *(5) BroadcastHashJoin [ss_hdemo_sk#15], [hd_demo_sk#16], Inner, BuildRight - : : : :- *(5) Project [ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] - : : : : +- *(5) BroadcastHashJoin [ss_store_sk#17], [s_store_sk#18], Inner, BuildRight - : : : : :- *(5) Project [ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_store_sk#17, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] - : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : : : : :- *(5) Project [ss_sold_date_sk#19, ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_store_sk#17, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] - : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#19) && isnotnull(ss_store_sk#17)) && isnotnull(ss_hdemo_sk#15)) && isnotnull(ss_addr_sk#12)) && isnotnull(ss_customer_sk#10)) - : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#10,ss_hdemo_sk#15,ss_addr_sk#12,ss_store_sk#17,ss_ticket_number#5,ss_coupon_amt#13,ss_net_profit#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [s_store_sk#18] - : : : : +- *(2) Filter (s_city#23 IN (Fairview,Midway) && isnotnull(s_store_sk#18)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#18,s_city#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [hd_demo_sk#16] - : : : +- *(3) Filter (((hd_dep_count#24 = 4) || (hd_vehicle_count#25 = 3)) && isnotnull(hd_demo_sk#16)) - : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#16,hd_dep_count#24,hd_vehicle_count#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [ca_address_sk#9, ca_city#3] - : : +- *(4) Filter (isnotnull(ca_address_sk#9) && isnotnull(ca_city#3)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#9,ca_city#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [c_customer_sk#11, c_current_addr_sk#8, c_first_name#2, c_last_name#1] - : +- *(6) Filter (isnotnull(c_customer_sk#11) && isnotnull(c_current_addr_sk#8)) - : +- *(6) FileScan parquet default.customer[c_customer_sk#11,c_current_addr_sk#8,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - +- ReusedExchange [ca_address_sk#9, ca_city#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt deleted file mode 100644 index 213915231..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt +++ /dev/null @@ -1,54 +0,0 @@ -TakeOrderedAndProject [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] - WholeStageCodegen - Project [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] - BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] - Project [amt,bought_city,c_current_addr_sk,c_first_name,c_last_name,profit,ss_ticket_number] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] [amt,bought_city,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] - InputAdapter - Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 - WholeStageCodegen - HashAggregate [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_dow,d_year] - Scan parquet default.date_dim [d_date_sk,d_dow,d_year] [d_date_sk,d_dow,d_year] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [s_store_sk] - Filter [s_city,s_store_sk] - Scan parquet default.store [s_city,s_store_sk] [s_city,s_store_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [ca_address_sk,ca_city] - Filter [ca_address_sk,ca_city] - Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] - InputAdapter - ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt deleted file mode 100644 index e7dcf37e4..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt +++ /dev/null @@ -1,51 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,s_store_name#3,s_company_name#6,d_year#7,d_moy#8,avg_monthly_sales#2,sum_sales#1,psum#9,nsum#10]) -+- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] - +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight - :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] - : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight - : :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13] - : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) - : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] - : : +- *(7) Filter (isnotnull(d_year#7) && (d_year#7 = 1999)) - : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] - : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 5) - : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) - : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) - : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] - : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight - : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight - : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight - : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] - : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] - : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] - : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) - : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1))) - : +- *(14) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] - : +- *(14) Filter isnotnull(rn#23) - : +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#23], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] - : +- *(13) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) - : +- *(12) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) - : +- ReusedExchange [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33, sum#34], Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1))) - +- *(21) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] - +- *(21) Filter isnotnull(rn#18) - +- Window [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] - +- *(20) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 - +- ReusedExchange [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35, d_moy#36, sum_sales#12], Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt deleted file mode 100644 index f0ed21cce..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt +++ /dev/null @@ -1,77 +0,0 @@ -TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,psum,s_company_name,s_store_name,sum_sales] - WholeStageCodegen - Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales,sum_sales,sum_sales] - BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] - Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales,sum_sales] - BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] - Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] - Filter [avg_monthly_sales,rn,sum_sales] - InputAdapter - Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Filter [d_year] - InputAdapter - Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - InputAdapter - Exchange [i_brand,i_category,s_company_name,s_store_name] #1 - WholeStageCodegen - HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] - InputAdapter - Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #2 - WholeStageCodegen - HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] - Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_moy,d_year,i_brand,i_category,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand,i_category,ss_sales_price,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_brand,i_category,i_item_sk] - Filter [i_brand,i_category,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk,d_moy,d_year] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [s_company_name,s_store_name,s_store_sk] - Filter [s_company_name,s_store_name,s_store_sk] - Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] - Filter [rn] - InputAdapter - Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - InputAdapter - Exchange [i_brand,i_category,s_company_name,s_store_name] #7 - WholeStageCodegen - HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [sum,sum(UnscaledValue(ss_sales_price)),sum_sales] - InputAdapter - ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #2 - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] - Filter [rn] - InputAdapter - Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - InputAdapter - ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #7 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt deleted file mode 100644 index 735a739a8..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt +++ /dev/null @@ -1,31 +0,0 @@ -== Physical Plan == -*(6) HashAggregate(keys=[], functions=[sum(cast(ss_quantity#1 as bigint))]) -+- Exchange SinglePartition - +- *(5) HashAggregate(keys=[], functions=[partial_sum(cast(ss_quantity#1 as bigint))]) - +- *(5) Project [ss_quantity#1] - +- *(5) BroadcastHashJoin [ss_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight - :- *(5) Project [ss_sold_date_sk#2, ss_quantity#1] - : +- *(5) BroadcastHashJoin [ss_addr_sk#4], [ca_address_sk#5], Inner, BuildRight, ((((ca_state#6 IN (CO,OH,TX) && (ss_net_profit#7 >= 0.00)) && (ss_net_profit#7 <= 2000.00)) || ((ca_state#6 IN (OR,MN,KY) && (ss_net_profit#7 >= 150.00)) && (ss_net_profit#7 <= 3000.00))) || ((ca_state#6 IN (VA,CA,MS) && (ss_net_profit#7 >= 50.00)) && (ss_net_profit#7 <= 25000.00))) - : :- *(5) Project [ss_sold_date_sk#2, ss_addr_sk#4, ss_quantity#1, ss_net_profit#7] - : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#8], [cd_demo_sk#9], Inner, BuildRight, ((((((cd_marital_status#10 = M) && (cd_education_status#11 = 4 yr Degree)) && (ss_sales_price#12 >= 100.00)) && (ss_sales_price#12 <= 150.00)) || ((((cd_marital_status#10 = D) && (cd_education_status#11 = 2 yr Degree)) && (ss_sales_price#12 >= 50.00)) && (ss_sales_price#12 <= 100.00))) || ((((cd_marital_status#10 = S) && (cd_education_status#11 = College)) && (ss_sales_price#12 >= 150.00)) && (ss_sales_price#12 <= 200.00))) - : : :- *(5) Project [ss_sold_date_sk#2, ss_cdemo_sk#8, ss_addr_sk#4, ss_quantity#1, ss_sales_price#12, ss_net_profit#7] - : : : +- *(5) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#2, ss_cdemo_sk#8, ss_addr_sk#4, ss_store_sk#13, ss_quantity#1, ss_sales_price#12, ss_net_profit#7] - : : : : +- *(5) Filter (((isnotnull(ss_store_sk#13) && isnotnull(ss_cdemo_sk#8)) && isnotnull(ss_addr_sk#4)) && isnotnull(ss_sold_date_sk#2)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#2,ss_cdemo_sk#8,ss_addr_sk#4,ss_store_sk#13,ss_quantity#1,ss_sales_price#12,ss_net_profit#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] - : : +- *(2) Filter isnotnull(cd_demo_sk#9) - : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#9,cd_marital_status#10,cd_education_status#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [ca_address_sk#5, ca_state#6] - : +- *(3) Filter ((isnotnull(ca_country#15) && (ca_country#15 = United States)) && isnotnull(ca_address_sk#5)) - : +- *(3) FileScan parquet default.customer_address[ca_address_sk#5,ca_state#6,ca_country#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [d_date_sk#3] - +- *(4) Filter ((isnotnull(d_year#16) && (d_year#16 = 2001)) && isnotnull(d_date_sk#3)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#3,d_year#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt deleted file mode 100644 index b4f78ab5c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt +++ /dev/null @@ -1,41 +0,0 @@ -WholeStageCodegen - HashAggregate [sum,sum(cast(ss_quantity as bigint))] [sum,sum(cast(ss_quantity as bigint)),sum(ss_quantity)] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [ss_quantity,sum,sum] [sum,sum] - Project [ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] - Project [ss_addr_sk,ss_net_profit,ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,ss_cdemo_sk,ss_sales_price] - Project [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_addr_sk,ss_cdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [cd_demo_sk,cd_education_status,cd_marital_status] - Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [ca_address_sk,ca_state] - Filter [ca_address_sk,ca_country] - Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt deleted file mode 100644 index 2b0459139..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt +++ /dev/null @@ -1,77 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,return_rank#2 ASC NULLS FIRST,currency_rank#3 ASC NULLS FIRST], output=[channel#1,item#4,return_ratio#5,return_rank#2,currency_rank#3]) -+- *(23) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) - +- Exchange hashpartitioning(channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3, 5) - +- *(22) HashAggregate(keys=[channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3], functions=[]) - +- Union - :- *(7) Project [web AS channel#1, item#4, return_ratio#5, return_rank#2, currency_rank#3] - : +- *(7) Filter ((return_rank#2 <= 10) || (currency_rank#3 <= 10)) - : +- Window [rank(currency_ratio#6) windowspecdefinition(currency_ratio#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#3], [currency_ratio#6 ASC NULLS FIRST] - : +- *(6) Sort [currency_ratio#6 ASC NULLS FIRST], false, 0 - : +- Window [rank(return_ratio#5) windowspecdefinition(return_ratio#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#2], [return_ratio#5 ASC NULLS FIRST] - : +- *(5) Sort [return_ratio#5 ASC NULLS FIRST], false, 0 - : +- Exchange SinglePartition - : +- *(4) HashAggregate(keys=[ws_item_sk#7], functions=[sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), sum(cast(coalesce(ws_quantity#9, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(ws_item_sk#7, 5) - : +- *(3) HashAggregate(keys=[ws_item_sk#7], functions=[partial_sum(cast(coalesce(wr_return_quantity#8, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#9, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#11 as decimal(12,2)), 0.00))]) - : +- *(3) Project [ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] - : +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_quantity#9, ws_net_paid#11, wr_return_quantity#8, wr_return_amt#10] - : : +- *(3) BroadcastHashJoin [cast(ws_order_number#14 as bigint), cast(ws_item_sk#7 as bigint)], [wr_order_number#15, wr_item_sk#16], Inner, BuildRight - : : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#7, ws_order_number#14, ws_quantity#9, ws_net_paid#11] - : : : +- *(3) Filter ((((((((isnotnull(ws_net_profit#17) && isnotnull(ws_net_paid#11)) && isnotnull(ws_quantity#9)) && (ws_net_profit#17 > 1.00)) && (ws_net_paid#11 > 0.00)) && (ws_quantity#9 > 0)) && isnotnull(ws_order_number#14)) && isnotnull(ws_item_sk#7)) && isnotnull(ws_sold_date_sk#12)) - : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#7,ws_order_number#14,ws_quantity#9,ws_net_paid#11,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(wr_item_sk#16)) && isnotnull(wr_order_number#15)) - : : +- *(1) FileScan parquet default.web_returns[wr_item_sk#16,wr_order_number#15,wr_return_quantity#8,wr_return_amt#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_item_sk), IsNotNull(..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#13] - : +- *(2) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#19)) && (d_year#18 = 2001)) && (d_moy#19 = 12)) && isnotnull(d_date_sk#13)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_year#18,d_moy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)], ReadSchema: struct - :- *(14) Project [catalog AS channel#20, item#21, return_ratio#22, return_rank#23, currency_rank#24] - : +- *(14) Filter ((return_rank#23 <= 10) || (currency_rank#24 <= 10)) - : +- Window [rank(return_ratio#22) windowspecdefinition(return_ratio#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#23], [return_ratio#22 ASC NULLS FIRST] - : +- *(13) Sort [return_ratio#22 ASC NULLS FIRST], false, 0 - : +- *(13) Project [item#21, return_ratio#22, currency_rank#24] - : +- Window [rank(currency_ratio#25) windowspecdefinition(currency_ratio#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#24], [currency_ratio#25 ASC NULLS FIRST] - : +- *(12) Sort [currency_ratio#25 ASC NULLS FIRST], false, 0 - : +- Exchange SinglePartition - : +- *(11) HashAggregate(keys=[cs_item_sk#26], functions=[sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), sum(cast(coalesce(cs_quantity#28, 0) as bigint)), sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- Exchange hashpartitioning(cs_item_sk#26, 5) - : +- *(10) HashAggregate(keys=[cs_item_sk#26], functions=[partial_sum(cast(coalesce(cr_return_quantity#27, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#28, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#29 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#30 as decimal(12,2)), 0.00))]) - : +- *(10) Project [cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : +- *(10) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#13], Inner, BuildRight - : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_quantity#28, cs_net_paid#30, cr_return_quantity#27, cr_return_amount#29] - : : +- *(10) BroadcastHashJoin [cs_order_number#32, cs_item_sk#26], [cr_order_number#33, cr_item_sk#34], Inner, BuildRight - : : :- *(10) Project [cs_sold_date_sk#31, cs_item_sk#26, cs_order_number#32, cs_quantity#28, cs_net_paid#30] - : : : +- *(10) Filter ((((((((isnotnull(cs_net_paid#30) && isnotnull(cs_quantity#28)) && isnotnull(cs_net_profit#35)) && (cs_net_profit#35 > 1.00)) && (cs_net_paid#30 > 0.00)) && (cs_quantity#28 > 0)) && isnotnull(cs_item_sk#26)) && isnotnull(cs_order_number#32)) && isnotnull(cs_sold_date_sk#31)) - : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#26,cs_order_number#32,cs_quantity#28,cs_net_paid#30,cs_net_profit#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_net_paid), IsNotNull(cs_quantity), IsNotNull(cs_net_profit), GreaterThan(cs_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(cr_order_number#33)) && isnotnull(cr_item_sk#34)) - : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#34,cr_order_number#33,cr_return_quantity#27,cr_return_amount#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number),..., ReadSchema: struct - : +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(21) Project [store AS channel#36, item#37, return_ratio#38, return_rank#39, currency_rank#40] - +- *(21) Filter ((return_rank#39 <= 10) || (currency_rank#40 <= 10)) - +- Window [rank(return_ratio#38) windowspecdefinition(return_ratio#38 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#39], [return_ratio#38 ASC NULLS FIRST] - +- *(20) Sort [return_ratio#38 ASC NULLS FIRST], false, 0 - +- *(20) Project [item#37, return_ratio#38, currency_rank#40] - +- Window [rank(currency_ratio#41) windowspecdefinition(currency_ratio#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#40], [currency_ratio#41 ASC NULLS FIRST] - +- *(19) Sort [currency_ratio#41 ASC NULLS FIRST], false, 0 - +- Exchange SinglePartition - +- *(18) HashAggregate(keys=[ss_item_sk#42], functions=[sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), sum(cast(coalesce(ss_quantity#44, 0) as bigint)), sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) - +- Exchange hashpartitioning(ss_item_sk#42, 5) - +- *(17) HashAggregate(keys=[ss_item_sk#42], functions=[partial_sum(cast(coalesce(sr_return_quantity#43, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#44, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#46 as decimal(12,2)), 0.00))]) - +- *(17) Project [ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] - +- *(17) BroadcastHashJoin [ss_sold_date_sk#47], [d_date_sk#13], Inner, BuildRight - :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_quantity#44, ss_net_paid#46, sr_return_quantity#43, sr_return_amt#45] - : +- *(17) BroadcastHashJoin [cast(ss_ticket_number#48 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#49, sr_item_sk#50], Inner, BuildRight - : :- *(17) Project [ss_sold_date_sk#47, ss_item_sk#42, ss_ticket_number#48, ss_quantity#44, ss_net_paid#46] - : : +- *(17) Filter ((((((((isnotnull(ss_net_paid#46) && isnotnull(ss_net_profit#51)) && isnotnull(ss_quantity#44)) && (ss_net_profit#51 > 1.00)) && (ss_net_paid#46 > 0.00)) && (ss_quantity#44 > 0)) && isnotnull(ss_ticket_number#48)) && isnotnull(ss_item_sk#42)) && isnotnull(ss_sold_date_sk#47)) - : : +- *(17) FileScan parquet default.store_sales[ss_sold_date_sk#47,ss_item_sk#42,ss_ticket_number#48,ss_quantity#44,ss_net_paid#46,ss_net_profit#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_net_paid), IsNotNull(ss_net_profit), IsNotNull(ss_quantity), GreaterThan(ss_net_pro..., ReadSchema: struct 10000.00)) && isnotnull(sr_item_sk#50)) && isnotnull(sr_ticket_number#49)) - : +- *(15) FileScan parquet default.store_returns[sr_item_sk#50,sr_ticket_number#49,sr_return_quantity#43,sr_return_amt#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_item_sk), IsNotNull(..., ReadSchema: struct - +- ReusedExchange [d_date_sk#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt deleted file mode 100644 index 238eb448c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt +++ /dev/null @@ -1,117 +0,0 @@ -TakeOrderedAndProject [channel,currency_rank,item,return_rank,return_ratio] - WholeStageCodegen - HashAggregate [channel,currency_rank,item,return_rank,return_ratio] - InputAdapter - Exchange [channel,currency_rank,item,return_rank,return_ratio] #1 - WholeStageCodegen - HashAggregate [channel,currency_rank,item,return_rank,return_ratio] - InputAdapter - Union - WholeStageCodegen - Project [currency_rank,item,return_rank,return_ratio] - Filter [currency_rank,return_rank] - InputAdapter - Window [currency_ratio] - WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - InputAdapter - Exchange #2 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),ws_item_sk] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00))] - InputAdapter - Exchange [ws_item_sk] #3 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum,sum,sum,sum,wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [wr_return_amt,wr_return_quantity,ws_item_sk,ws_net_paid,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [ws_item_sk,ws_net_paid,ws_order_number,ws_quantity,ws_sold_date_sk] - Filter [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_net_profit,ws_order_number,ws_quantity,ws_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] - Filter [wr_item_sk,wr_order_number,wr_return_amt] - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - WholeStageCodegen - Project [currency_rank,item,return_rank,return_ratio] - Filter [currency_rank,return_rank] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - Project [currency_rank,item,return_ratio] - InputAdapter - Window [currency_ratio] - WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Exchange #6 - WholeStageCodegen - HashAggregate [cs_item_sk,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00))] - InputAdapter - Exchange [cs_item_sk] #7 - WholeStageCodegen - HashAggregate [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cr_return_amount,cr_return_quantity,cs_item_sk,cs_net_paid,cs_quantity,cs_sold_date_sk] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_item_sk,cs_net_paid,cs_order_number,cs_quantity,cs_sold_date_sk] - Filter [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_net_paid,cs_net_profit,cs_order_number,cs_quantity,cs_sold_date_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] - Filter [cr_item_sk,cr_order_number,cr_return_amount] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 - WholeStageCodegen - Project [currency_rank,item,return_rank,return_ratio] - Filter [currency_rank,return_rank] - InputAdapter - Window [return_ratio] - WholeStageCodegen - Sort [return_ratio] - Project [currency_rank,item,return_ratio] - InputAdapter - Window [currency_ratio] - WholeStageCodegen - Sort [currency_ratio] - InputAdapter - Exchange #9 - WholeStageCodegen - HashAggregate [ss_item_sk,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] [currency_ratio,item,return_ratio,sum,sum,sum,sum,sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00))] - InputAdapter - Exchange [ss_item_sk] #10 - WholeStageCodegen - HashAggregate [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [sr_return_amt,sr_return_quantity,ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_item_sk,ss_net_paid,ss_quantity,ss_sold_date_sk,ss_ticket_number] - Filter [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] [ss_item_sk,ss_net_paid,ss_net_profit,ss_quantity,ss_sold_date_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - Project [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] - Filter [sr_item_sk,sr_return_amt,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt deleted file mode 100644 index 0bad61dd2..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt +++ /dev/null @@ -1,76 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) -+- *(21) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) - +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) - +- *(20) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) - +- *(20) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] - +- Union - :- *(6) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(sales_price#13)), sum(UnscaledValue(return_amt#14)), sum(UnscaledValue(profit#15)), sum(UnscaledValue(net_loss#16))]) - : +- Exchange hashpartitioning(s_store_id#12, 5) - : +- *(5) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(sales_price#13)), partial_sum(UnscaledValue(return_amt#14)), partial_sum(UnscaledValue(profit#15)), partial_sum(UnscaledValue(net_loss#16))]) - : +- *(5) Project [sales_price#13, profit#15, return_amt#14, net_loss#16, s_store_id#12] - : +- *(5) BroadcastHashJoin [store_sk#17], [cast(s_store_sk#18 as bigint)], Inner, BuildRight - : :- *(5) Project [store_sk#17, sales_price#13, profit#15, return_amt#14, net_loss#16] - : : +- *(5) BroadcastHashJoin [date_sk#19], [cast(d_date_sk#20 as bigint)], Inner, BuildRight - : : :- Union - : : : :- *(1) Project [cast(ss_store_sk#21 as bigint) AS store_sk#17, cast(ss_sold_date_sk#22 as bigint) AS date_sk#19, ss_ext_sales_price#23 AS sales_price#13, ss_net_profit#24 AS profit#15, 0.00 AS return_amt#14, 0.00 AS net_loss#16] - : : : : +- *(1) Filter (isnotnull(cast(ss_sold_date_sk#22 as bigint)) && isnotnull(cast(ss_store_sk#21 as bigint))) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_store_sk#21,ss_ext_sales_price#23,ss_net_profit#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [s_store_sk#18, s_store_id#12] - : +- *(4) Filter isnotnull(s_store_sk#18) - : +- *(4) FileScan parquet default.store[s_store_sk#18,s_store_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - :- *(12) HashAggregate(keys=[cp_catalog_page_id#36], functions=[sum(UnscaledValue(sales_price#37)), sum(UnscaledValue(return_amt#38)), sum(UnscaledValue(profit#39)), sum(UnscaledValue(net_loss#40))]) - : +- Exchange hashpartitioning(cp_catalog_page_id#36, 5) - : +- *(11) HashAggregate(keys=[cp_catalog_page_id#36], functions=[partial_sum(UnscaledValue(sales_price#37)), partial_sum(UnscaledValue(return_amt#38)), partial_sum(UnscaledValue(profit#39)), partial_sum(UnscaledValue(net_loss#40))]) - : +- *(11) Project [sales_price#37, profit#39, return_amt#38, net_loss#40, cp_catalog_page_id#36] - : +- *(11) BroadcastHashJoin [page_sk#41], [cp_catalog_page_sk#42], Inner, BuildRight - : :- *(11) Project [page_sk#41, sales_price#37, profit#39, return_amt#38, net_loss#40] - : : +- *(11) BroadcastHashJoin [date_sk#43], [d_date_sk#20], Inner, BuildRight - : : :- Union - : : : :- *(7) Project [cs_catalog_page_sk#44 AS page_sk#41, cs_sold_date_sk#45 AS date_sk#43, cs_ext_sales_price#46 AS sales_price#37, cs_net_profit#47 AS profit#39, 0.00 AS return_amt#38, 0.00 AS net_loss#40] - : : : : +- *(7) Filter (isnotnull(cs_sold_date_sk#45) && isnotnull(cs_catalog_page_sk#44)) - : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#45,cs_catalog_page_sk#44,cs_ext_sales_price#46,cs_net_profit#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) - : : +- *(9) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(10) Project [cp_catalog_page_sk#42, cp_catalog_page_id#36] - : +- *(10) Filter isnotnull(cp_catalog_page_sk#42) - : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#42,cp_catalog_page_id#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct - +- *(19) HashAggregate(keys=[web_site_id#58], functions=[sum(UnscaledValue(sales_price#59)), sum(UnscaledValue(return_amt#60)), sum(UnscaledValue(profit#61)), sum(UnscaledValue(net_loss#62))]) - +- Exchange hashpartitioning(web_site_id#58, 5) - +- *(18) HashAggregate(keys=[web_site_id#58], functions=[partial_sum(UnscaledValue(sales_price#59)), partial_sum(UnscaledValue(return_amt#60)), partial_sum(UnscaledValue(profit#61)), partial_sum(UnscaledValue(net_loss#62))]) - +- *(18) Project [sales_price#59, profit#61, return_amt#60, net_loss#62, web_site_id#58] - +- *(18) BroadcastHashJoin [wsr_web_site_sk#63], [web_site_sk#64], Inner, BuildRight - :- *(18) Project [wsr_web_site_sk#63, sales_price#59, profit#61, return_amt#60, net_loss#62] - : +- *(18) BroadcastHashJoin [date_sk#65], [cast(d_date_sk#20 as bigint)], Inner, BuildRight - : :- Union - : : :- *(13) Project [ws_web_site_sk#66 AS wsr_web_site_sk#63, cast(ws_sold_date_sk#67 as bigint) AS date_sk#65, ws_ext_sales_price#68 AS sales_price#59, ws_net_profit#69 AS profit#61, 0.00 AS return_amt#60, 0.00 AS net_loss#62] - : : : +- *(13) Filter (isnotnull(cast(ws_sold_date_sk#67 as bigint)) && isnotnull(ws_web_site_sk#66)) - : : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#67,ws_web_site_sk#66,ws_ext_sales_price#68,ws_net_profit#69] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_web_site_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(17) Project [web_site_sk#64, web_site_id#58] - +- *(17) Filter isnotnull(web_site_sk#64) - +- *(17) FileScan parquet default.web_site[web_site_sk#64,web_site_id#58] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt deleted file mode 100644 index 257c8efa6..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt +++ /dev/null @@ -1,110 +0,0 @@ -TakeOrderedAndProject [channel,id,profit,returns,sales] - WholeStageCodegen - HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] - InputAdapter - Exchange [channel,id,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Expand [channel,id,profit,returns,sales] - InputAdapter - Union - WholeStageCodegen - HashAggregate [s_store_id,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] - InputAdapter - Exchange [s_store_id] #2 - WholeStageCodegen - HashAggregate [net_loss,profit,return_amt,s_store_id,sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [net_loss,profit,return_amt,s_store_id,sales_price] - BroadcastHashJoin [s_store_sk,store_sk] - Project [net_loss,profit,return_amt,sales_price,store_sk] - BroadcastHashJoin [d_date_sk,date_sk] - InputAdapter - Union - WholeStageCodegen - Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] - WholeStageCodegen - Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] - Filter [sr_returned_date_sk,sr_store_sk] - Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_store_id,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] - WholeStageCodegen - HashAggregate [cp_catalog_page_id,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] - InputAdapter - Exchange [cp_catalog_page_id] #5 - WholeStageCodegen - HashAggregate [cp_catalog_page_id,net_loss,profit,return_amt,sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] - BroadcastHashJoin [cp_catalog_page_sk,page_sk] - Project [net_loss,page_sk,profit,return_amt,sales_price] - BroadcastHashJoin [d_date_sk,date_sk] - InputAdapter - Union - WholeStageCodegen - Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] - Filter [cs_catalog_page_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] - WholeStageCodegen - Project [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] - Filter [cr_catalog_page_sk,cr_returned_date_sk] - Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [cp_catalog_page_id,cp_catalog_page_sk] - Filter [cp_catalog_page_sk] - Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] [cp_catalog_page_id,cp_catalog_page_sk] - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),web_site_id] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] - InputAdapter - Exchange [web_site_id] #8 - WholeStageCodegen - HashAggregate [net_loss,profit,return_amt,sales_price,sum,sum,sum,sum,sum,sum,sum,sum,web_site_id] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [net_loss,profit,return_amt,sales_price,web_site_id] - BroadcastHashJoin [web_site_sk,wsr_web_site_sk] - Project [net_loss,profit,return_amt,sales_price,wsr_web_site_sk] - BroadcastHashJoin [d_date_sk,date_sk] - InputAdapter - Union - WholeStageCodegen - Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] - Filter [ws_sold_date_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] - WholeStageCodegen - Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,ws_web_site_sk] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] - Filter [wr_returned_date_sk] - Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [ws_item_sk,ws_order_number,ws_web_site_sk] - Filter [ws_item_sk,ws_order_number,ws_web_site_sk] - Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_web_site_sk] [ws_item_sk,ws_order_number,ws_web_site_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [web_site_id,web_site_sk] - Filter [web_site_sk] - Scan parquet default.web_site [web_site_id,web_site_sk] [web_site_id,web_site_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt deleted file mode 100644 index 6716b98d6..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt +++ /dev/null @@ -1,32 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_company_id#2 ASC NULLS FIRST,s_street_number#3 ASC NULLS FIRST,s_street_name#4 ASC NULLS FIRST,s_street_type#5 ASC NULLS FIRST,s_suite_number#6 ASC NULLS FIRST,s_city#7 ASC NULLS FIRST,s_county#8 ASC NULLS FIRST,s_state#9 ASC NULLS FIRST,s_zip#10 ASC NULLS FIRST], output=[s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10,30 days #11,31 - 60 days #12,61 - 90 days #13,91 - 120 days #14,>120 days #15]) -+- *(6) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10, 5) - +- *(5) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) - +- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] - +- *(5) BroadcastHashJoin [sr_returned_date_sk#16], [cast(d_date_sk#18 as bigint)], Inner, BuildRight - :- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] - : +- *(5) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#19], Inner, BuildRight - : :- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] - : : +- *(5) BroadcastHashJoin [ss_store_sk#20], [s_store_sk#21], Inner, BuildRight - : : :- *(5) Project [ss_sold_date_sk#17, ss_store_sk#20, sr_returned_date_sk#16] - : : : +- *(5) BroadcastHashJoin [cast(ss_ticket_number#22 as bigint), cast(ss_item_sk#23 as bigint), cast(ss_customer_sk#24 as bigint)], [sr_ticket_number#25, sr_item_sk#26, sr_customer_sk#27], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#17, ss_item_sk#23, ss_customer_sk#24, ss_store_sk#20, ss_ticket_number#22] - : : : : +- *(5) Filter ((((isnotnull(ss_ticket_number#22) && isnotnull(ss_item_sk#23)) && isnotnull(ss_customer_sk#24)) && isnotnull(ss_store_sk#20)) && isnotnull(ss_sold_date_sk#17)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_item_sk#23,ss_customer_sk#24,ss_store_sk#20,ss_ticket_number#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_stor..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[3, bigint, true], input[1, bigint, true], input[2, bigint, true])) - : : : +- *(1) Project [sr_returned_date_sk#16, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#25] - : : : +- *(1) Filter (((isnotnull(sr_ticket_number#25) && isnotnull(sr_customer_sk#27)) && isnotnull(sr_item_sk#26)) && isnotnull(sr_returned_date_sk#16)) - : : : +- *(1) FileScan parquet default.store_returns[sr_returned_date_sk#16,sr_item_sk#26,sr_customer_sk#27,sr_ticket_number#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_retu..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [s_store_sk#21, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] - : : +- *(2) Filter isnotnull(s_store_sk#21) - : : +- *(2) FileScan parquet default.store[s_store_sk#21,s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [d_date_sk#18] - +- *(4) Filter ((((isnotnull(d_year#28) && isnotnull(d_moy#29)) && (d_year#28 = 2001)) && (d_moy#29 = 8)) && isnotnull(d_date_sk#18)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#28,d_moy#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt deleted file mode 100644 index c3f728d0a..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt +++ /dev/null @@ -1,42 +0,0 @@ -TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] - WholeStageCodegen - HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] - InputAdapter - Exchange [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] #1 - WholeStageCodegen - HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [sr_returned_date_sk,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] - Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] - Filter [s_store_sk] - Scan parquet default.store [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk] [d_date_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt deleted file mode 100644 index 548263929..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt +++ /dev/null @@ -1,41 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[item_sk#1 ASC NULLS FIRST,d_date#2 ASC NULLS FIRST], output=[item_sk#1,d_date#2,web_sales#3,store_sales#4,web_cumulative#5,store_cumulative#6]) -+- *(15) Filter ((isnotnull(web_cumulative#5) && isnotnull(store_cumulative#6)) && (web_cumulative#5 > store_cumulative#6)) - +- Window [max(web_sales#3) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#5, max(store_sales#4) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#6], [item_sk#1], [d_date#2 ASC NULLS FIRST] - +- *(14) Sort [item_sk#1 ASC NULLS FIRST, d_date#2 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(item_sk#1, 5) - +- *(13) Project [CASE WHEN isnotnull(item_sk#7) THEN item_sk#7 ELSE item_sk#8 END AS item_sk#1, CASE WHEN isnotnull(d_date#9) THEN d_date#9 ELSE d_date#10 END AS d_date#2, cume_sales#11 AS web_sales#3, cume_sales#12 AS store_sales#4] - +- SortMergeJoin [item_sk#7, d_date#9], [item_sk#8, d_date#10], FullOuter - :- *(6) Sort [item_sk#7 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(item_sk#7, d_date#9, 5) - : +- *(5) Project [item_sk#7, d_date#9, cume_sales#11] - : +- Window [sum(_w0#13) windowspecdefinition(ws_item_sk#14, d_date#9 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#11], [ws_item_sk#14], [d_date#9 ASC NULLS FIRST] - : +- *(4) Sort [ws_item_sk#14 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(ws_item_sk#14, 5) - : +- *(3) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[sum(UnscaledValue(ws_sales_price#15))]) - : +- Exchange hashpartitioning(ws_item_sk#14, d_date#9, 5) - : +- *(2) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[partial_sum(UnscaledValue(ws_sales_price#15))]) - : +- *(2) Project [ws_item_sk#14, ws_sales_price#15, d_date#9] - : +- *(2) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight - : :- *(2) Project [ws_sold_date_sk#16, ws_item_sk#14, ws_sales_price#15] - : : +- *(2) Filter (isnotnull(ws_item_sk#14) && isnotnull(ws_sold_date_sk#16)) - : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_item_sk#14,ws_sales_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [d_date_sk#17, d_date#9] - : +- *(1) Filter (((isnotnull(d_month_seq#18) && (d_month_seq#18 >= 1200)) && (d_month_seq#18 <= 1211)) && isnotnull(d_date_sk#17)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#17,d_date#9,d_month_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - +- *(12) Sort [item_sk#8 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(item_sk#8, d_date#10, 5) - +- *(11) Project [item_sk#8, d_date#10, cume_sales#12] - +- Window [sum(_w0#19) windowspecdefinition(ss_item_sk#20, d_date#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#12], [ss_item_sk#20], [d_date#10 ASC NULLS FIRST] - +- *(10) Sort [ss_item_sk#20 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(ss_item_sk#20, 5) - +- *(9) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[sum(UnscaledValue(ss_sales_price#21))]) - +- Exchange hashpartitioning(ss_item_sk#20, d_date#10, 5) - +- *(8) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[partial_sum(UnscaledValue(ss_sales_price#21))]) - +- *(8) Project [ss_item_sk#20, ss_sales_price#21, d_date#10] - +- *(8) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight - :- *(8) Project [ss_sold_date_sk#22, ss_item_sk#20, ss_sales_price#21] - : +- *(8) Filter (isnotnull(ss_item_sk#20) && isnotnull(ss_sold_date_sk#22)) - : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_item_sk#20,ss_sales_price#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#23, d_date#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt deleted file mode 100644 index 2b126d38a..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt +++ /dev/null @@ -1,67 +0,0 @@ -TakeOrderedAndProject [d_date,item_sk,store_cumulative,store_sales,web_cumulative,web_sales] - WholeStageCodegen - Filter [store_cumulative,web_cumulative] - InputAdapter - Window [d_date,item_sk,store_sales,web_sales] - WholeStageCodegen - Sort [d_date,item_sk] - InputAdapter - Exchange [item_sk] #1 - WholeStageCodegen - Project [cume_sales,cume_sales,d_date,d_date,item_sk,item_sk] - InputAdapter - SortMergeJoin [d_date,d_date,item_sk,item_sk] - WholeStageCodegen - Sort [d_date,item_sk] - InputAdapter - Exchange [d_date,item_sk] #2 - WholeStageCodegen - Project [cume_sales,d_date,item_sk] - InputAdapter - Window [_w0,d_date,ws_item_sk] - WholeStageCodegen - Sort [d_date,ws_item_sk] - InputAdapter - Exchange [ws_item_sk] #3 - WholeStageCodegen - HashAggregate [d_date,sum,sum(UnscaledValue(ws_sales_price)),ws_item_sk] [_w0,item_sk,sum,sum(UnscaledValue(ws_sales_price))] - InputAdapter - Exchange [d_date,ws_item_sk] #4 - WholeStageCodegen - HashAggregate [d_date,sum,sum,ws_item_sk,ws_sales_price] [sum,sum] - Project [d_date,ws_item_sk,ws_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_item_sk,ws_sales_price,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] [ws_item_sk,ws_sales_price,ws_sold_date_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] - WholeStageCodegen - Sort [d_date,item_sk] - InputAdapter - Exchange [d_date,item_sk] #6 - WholeStageCodegen - Project [cume_sales,d_date,item_sk] - InputAdapter - Window [_w0,d_date,ss_item_sk] - WholeStageCodegen - Sort [d_date,ss_item_sk] - InputAdapter - Exchange [ss_item_sk] #7 - WholeStageCodegen - HashAggregate [d_date,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] [_w0,item_sk,sum,sum(UnscaledValue(ss_sales_price))] - InputAdapter - Exchange [d_date,ss_item_sk] #8 - WholeStageCodegen - HashAggregate [d_date,ss_item_sk,ss_sales_price,sum,sum] [sum,sum] - Project [d_date,ss_item_sk,ss_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk] - InputAdapter - ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt deleted file mode 100644 index 7a9e19e6c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt +++ /dev/null @@ -1,20 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,ext_price#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,ext_price#2]) -+- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) - +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 5) - +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) - +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] - +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight - :- *(3) Project [d_year#1, ss_item_sk#8, ss_ext_sales_price#7] - : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight - : :- *(3) Project [d_date_sk#10, d_year#1] - : : +- *(3) Filter ((((isnotnull(d_moy#12) && isnotnull(d_year#1)) && (d_moy#12 = 11)) && (d_year#1 = 2000)) && isnotnull(d_date_sk#10)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] - : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] - +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 1)) && isnotnull(i_item_sk#9)) - +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manager_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt deleted file mode 100644 index 583e5acae..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt +++ /dev/null @@ -1,26 +0,0 @@ -TakeOrderedAndProject [brand,brand_id,d_year,ext_price] - WholeStageCodegen - HashAggregate [d_year,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] - InputAdapter - Exchange [d_year,i_brand,i_brand_id] #1 - WholeStageCodegen - HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [d_year,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [d_date_sk,d_year] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_brand_id,i_item_sk] - Filter [i_item_sk,i_manager_id] - Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt deleted file mode 100644 index f31d6ec98..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt +++ /dev/null @@ -1,31 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[avg_quarterly_sales#1 ASC NULLS FIRST,sum_sales#2 ASC NULLS FIRST,i_manufact_id#3 ASC NULLS FIRST], output=[i_manufact_id#3,sum_sales#2,avg_quarterly_sales#1]) -+- *(7) Project [i_manufact_id#3, sum_sales#2, avg_quarterly_sales#1] - +- *(7) Filter (CASE WHEN (avg_quarterly_sales#1 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#2 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) - +- Window [avg(_w0#4) windowspecdefinition(i_manufact_id#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#1], [i_manufact_id#3] - +- *(6) Sort [i_manufact_id#3 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_manufact_id#3, 5) - +- *(5) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) - +- Exchange hashpartitioning(i_manufact_id#3, d_qoy#5, 5) - +- *(4) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) - +- *(4) Project [i_manufact_id#3, ss_sales_price#6, d_qoy#5] - +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight - :- *(4) Project [i_manufact_id#3, ss_store_sk#7, ss_sales_price#6, d_qoy#5] - : +- *(4) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight - : :- *(4) Project [i_manufact_id#3, ss_sold_date_sk#9, ss_store_sk#7, ss_sales_price#6] - : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight - : : :- *(4) Project [i_item_sk#11, i_manufact_id#3] - : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,reference,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manufact_id#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] - : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) - : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#10, d_qoy#5] - : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_qoy#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [s_store_sk#8] - +- *(3) Filter isnotnull(s_store_sk#8) - +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt deleted file mode 100644 index ace96279f..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt +++ /dev/null @@ -1,43 +0,0 @@ -TakeOrderedAndProject [avg_quarterly_sales,i_manufact_id,sum_sales] - WholeStageCodegen - Project [avg_quarterly_sales,i_manufact_id,sum_sales] - Filter [avg_quarterly_sales,sum_sales] - InputAdapter - Window [_w0,i_manufact_id] - WholeStageCodegen - Sort [i_manufact_id] - InputAdapter - Exchange [i_manufact_id] #1 - WholeStageCodegen - HashAggregate [d_qoy,i_manufact_id,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] - InputAdapter - Exchange [d_qoy,i_manufact_id] #2 - WholeStageCodegen - HashAggregate [d_qoy,i_manufact_id,ss_sales_price,sum,sum] [sum,sum] - Project [d_qoy,i_manufact_id,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_qoy,i_manufact_id,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_manufact_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_item_sk,i_manufact_id] - Filter [i_brand,i_category,i_class,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manufact_id] [i_brand,i_category,i_class,i_item_sk,i_manufact_id] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk,d_qoy] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] [d_date_sk,d_month_seq,d_qoy] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt deleted file mode 100644 index e94d5f4c0..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt +++ /dev/null @@ -1,88 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customers#2 ASC NULLS FIRST], output=[segment#1,num_customers#2,segment_base#3]) -+- *(13) HashAggregate(keys=[segment#1], functions=[count(1)]) - +- Exchange hashpartitioning(segment#1, 5) - +- *(12) HashAggregate(keys=[segment#1], functions=[partial_count(1)]) - +- *(12) HashAggregate(keys=[c_customer_sk#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) - +- Exchange hashpartitioning(c_customer_sk#4, 5) - +- *(11) HashAggregate(keys=[c_customer_sk#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) - +- *(11) Project [c_customer_sk#4, ss_ext_sales_price#5] - +- *(11) BroadcastHashJoin [ss_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight - :- *(11) Project [c_customer_sk#4, ss_sold_date_sk#6, ss_ext_sales_price#5] - : +- *(11) BroadcastHashJoin [ca_county#8, ca_state#9], [s_county#10, s_state#11], Inner, BuildRight - : :- *(11) Project [c_customer_sk#4, ss_sold_date_sk#6, ss_ext_sales_price#5, ca_county#8, ca_state#9] - : : +- *(11) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight - : : :- *(11) Project [c_customer_sk#4, c_current_addr_sk#12, ss_sold_date_sk#6, ss_ext_sales_price#5] - : : : +- *(11) BroadcastHashJoin [c_customer_sk#4], [ss_customer_sk#14], Inner, BuildRight - : : : :- *(11) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) - : : : : +- Exchange hashpartitioning(c_customer_sk#4, c_current_addr_sk#12, 5) - : : : : +- *(6) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) - : : : : +- *(6) Project [c_customer_sk#4, c_current_addr_sk#12] - : : : : +- *(6) BroadcastHashJoin [customer_sk#15], [c_customer_sk#4], Inner, BuildRight - : : : : :- *(6) Project [customer_sk#15] - : : : : : +- *(6) BroadcastHashJoin [sold_date_sk#16], [d_date_sk#7], Inner, BuildRight - : : : : : :- *(6) Project [sold_date_sk#16, customer_sk#15] - : : : : : : +- *(6) BroadcastHashJoin [item_sk#17], [i_item_sk#18], Inner, BuildRight - : : : : : : :- Union - : : : : : : : :- *(1) Project [cs_sold_date_sk#19 AS sold_date_sk#16, cs_bill_customer_sk#20 AS customer_sk#15, cs_item_sk#21 AS item_sk#17] - : : : : : : : : +- *(1) Filter ((isnotnull(cs_item_sk#21) && isnotnull(cs_sold_date_sk#19)) && isnotnull(cs_bill_customer_sk#20)) - : : : : : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_customer_sk#20,cs_item_sk#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct - : : : : : : : +- *(2) Project [ws_sold_date_sk#22 AS sold_date_sk#23, ws_bill_customer_sk#24 AS customer_sk#25, ws_item_sk#26 AS item_sk#27] - : : : : : : : +- *(2) Filter ((isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#22)) && isnotnull(ws_bill_customer_sk#24)) - : : : : : : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#22,ws_item_sk#26,ws_bill_customer_sk#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct - : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- *(3) Project [i_item_sk#18] - : : : : : : +- *(3) Filter ((((isnotnull(i_category#28) && isnotnull(i_class#29)) && (i_category#28 = Women)) && (i_class#29 = maternity)) && isnotnull(i_item_sk#18)) - : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#18,i_class#29,i_category#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity)..., ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(4) Project [d_date_sk#7] - : : : : : +- *(4) Filter ((((isnotnull(d_moy#30) && isnotnull(d_year#31)) && (d_moy#30 = 12)) && (d_year#31 = 1998)) && isnotnull(d_date_sk#7)) - : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#7,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(5) Project [c_customer_sk#4, c_current_addr_sk#12] - : : : : +- *(5) Filter (isnotnull(c_customer_sk#4) && isnotnull(c_current_addr_sk#12)) - : : : : +- *(5) FileScan parquet default.customer[c_customer_sk#4,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : +- *(7) Project [ss_sold_date_sk#6, ss_customer_sk#14, ss_ext_sales_price#5] - : : : +- *(7) Filter (isnotnull(ss_customer_sk#14) && isnotnull(ss_sold_date_sk#6)) - : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#6,ss_customer_sk#14,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(8) Project [ca_address_sk#13, ca_county#8, ca_state#9] - : : +- *(8) Filter ((isnotnull(ca_address_sk#13) && isnotnull(ca_state#9)) && isnotnull(ca_county#8)) - : : +- *(8) FileScan parquet default.customer_address[ca_address_sk#13,ca_county#8,ca_state#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state), IsNotNull(ca_county)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true])) - : +- *(9) Project [s_county#10, s_state#11] - : +- *(9) Filter (isnotnull(s_state#11) && isnotnull(s_county#10)) - : +- *(9) FileScan parquet default.store[s_county#10,s_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(10) Project [d_date_sk#7] - +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery10089)) && (d_month_seq#32 <= Subquery subquery10090)) && isnotnull(d_date_sk#7)) - : :- Subquery subquery10089 - : : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) - : : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) - : : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) - : : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] - : : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - : : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - : +- Subquery subquery10090 - : +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) - : +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) - : +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) - : +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] - : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - +- *(10) FileScan parquet default.date_dim[d_date_sk#7,d_month_seq#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct - :- Subquery subquery10089 - : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) - : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) - : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) - : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] - : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - +- Subquery subquery10090 - +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) - +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) - +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) - +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] - +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt deleted file mode 100644 index 4abf2165f..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt +++ /dev/null @@ -1,123 +0,0 @@ -TakeOrderedAndProject [num_customers,segment,segment_base] - WholeStageCodegen - HashAggregate [count,count(1),segment] [count,count(1),num_customers,segment_base] - InputAdapter - Exchange [segment] #1 - WholeStageCodegen - HashAggregate [count,count,segment] [count,count] - HashAggregate [c_customer_sk,sum,sum(UnscaledValue(ss_ext_sales_price))] [segment,sum,sum(UnscaledValue(ss_ext_sales_price))] - InputAdapter - Exchange [c_customer_sk] #2 - WholeStageCodegen - HashAggregate [c_customer_sk,ss_ext_sales_price,sum,sum] [sum,sum] - Project [c_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] - BroadcastHashJoin [ca_county,ca_state,s_county,s_state] - Project [c_customer_sk,ca_county,ca_state,ss_ext_sales_price,ss_sold_date_sk] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - HashAggregate [c_current_addr_sk,c_customer_sk] - InputAdapter - Exchange [c_current_addr_sk,c_customer_sk] #3 - WholeStageCodegen - HashAggregate [c_current_addr_sk,c_customer_sk] - Project [c_current_addr_sk,c_customer_sk] - BroadcastHashJoin [c_customer_sk,customer_sk] - Project [customer_sk] - BroadcastHashJoin [d_date_sk,sold_date_sk] - Project [customer_sk,sold_date_sk] - BroadcastHashJoin [i_item_sk,item_sk] - InputAdapter - Union - WholeStageCodegen - Project [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - WholeStageCodegen - Project [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [i_item_sk] - Filter [i_category,i_class,i_item_sk] - Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [ca_address_sk,ca_county,ca_state] - Filter [ca_address_sk,ca_county,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] [ca_address_sk,ca_county,ca_state] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [s_county,s_state] - Filter [s_county,s_state] - Scan parquet default.store [s_county,s_state] [s_county,s_state] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Subquery #1 - WholeStageCodegen - HashAggregate [(d_month_seq + 1)] - InputAdapter - Exchange [(d_month_seq + 1)] #11 - WholeStageCodegen - HashAggregate [(d_month_seq + 1)] - Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] - Subquery #2 - WholeStageCodegen - HashAggregate [(d_month_seq + 3)] - InputAdapter - Exchange [(d_month_seq + 3)] #12 - WholeStageCodegen - HashAggregate [(d_month_seq + 3)] - Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] - Subquery #1 - WholeStageCodegen - HashAggregate [(d_month_seq + 1)] - InputAdapter - Exchange [(d_month_seq + 1)] #11 - WholeStageCodegen - HashAggregate [(d_month_seq + 1)] - Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] - Subquery #2 - WholeStageCodegen - HashAggregate [(d_month_seq + 3)] - InputAdapter - Exchange [(d_month_seq + 3)] #12 - WholeStageCodegen - HashAggregate [(d_month_seq + 3)] - Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt deleted file mode 100644 index 32402fb5f..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt +++ /dev/null @@ -1,20 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand_id#2 ASC NULLS FIRST], output=[brand_id#2,brand#3,ext_price#1]) -+- *(4) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[sum(UnscaledValue(ss_ext_sales_price#6))]) - +- Exchange hashpartitioning(i_brand#4, i_brand_id#5, 5) - +- *(3) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#6))]) - +- *(3) Project [ss_ext_sales_price#6, i_brand_id#5, i_brand#4] - +- *(3) BroadcastHashJoin [ss_item_sk#7], [i_item_sk#8], Inner, BuildRight - :- *(3) Project [ss_item_sk#7, ss_ext_sales_price#6] - : +- *(3) BroadcastHashJoin [d_date_sk#9], [ss_sold_date_sk#10], Inner, BuildRight - : :- *(3) Project [d_date_sk#9] - : : +- *(3) Filter ((((isnotnull(d_moy#11) && isnotnull(d_year#12)) && (d_moy#11 = 11)) && (d_year#12 = 1999)) && isnotnull(d_date_sk#9)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#9,d_year#12,d_moy#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [ss_sold_date_sk#10, ss_item_sk#7, ss_ext_sales_price#6] - : +- *(1) Filter (isnotnull(ss_sold_date_sk#10) && isnotnull(ss_item_sk#7)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#10,ss_item_sk#7,ss_ext_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [i_item_sk#8, i_brand_id#5, i_brand#4] - +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 28)) && isnotnull(i_item_sk#8)) - +- *(2) FileScan parquet default.item[i_item_sk#8,i_brand_id#5,i_brand#4,i_manager_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt deleted file mode 100644 index 478e4a54b..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt +++ /dev/null @@ -1,26 +0,0 @@ -TakeOrderedAndProject [brand,brand_id,ext_price] - WholeStageCodegen - HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] - InputAdapter - Exchange [i_brand,i_brand_id] #1 - WholeStageCodegen - HashAggregate [i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_brand,i_brand_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_brand_id,i_item_sk] - Filter [i_item_sk,i_manager_id] - Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt deleted file mode 100644 index 8d1ef0a64..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt +++ /dev/null @@ -1,65 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_item_id#2,total_sales#1]) -+- *(20) HashAggregate(keys=[i_item_id#2], functions=[sum(total_sales#3)]) - +- Exchange hashpartitioning(i_item_id#2, 5) - +- *(19) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(total_sales#3)]) - +- Union - :- *(6) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- Exchange hashpartitioning(i_item_id#2, 5) - : +- *(5) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- *(5) Project [ss_ext_sales_price#4, i_item_id#2] - : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] - : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight - : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#10] - : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 2001)) && (d_moy#12 = 2)) && isnotnull(d_date_sk#10)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [ca_address_sk#8] - : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) BroadcastHashJoin [i_item_id#2], [i_item_id#2#14], LeftSemi, BuildRight - : :- *(4) Project [i_item_sk#6, i_item_id#2] - : : +- *(4) Filter isnotnull(i_item_sk#6) - : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- *(3) Project [i_item_id#2 AS i_item_id#2#14] - : +- *(3) Filter i_color#15 IN (slate,blanched,burnished) - : +- *(3) FileScan parquet default.item[i_item_id#2,i_color#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_color, [slate,blanched,burnished])], ReadSchema: struct - :- *(12) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- Exchange hashpartitioning(i_item_id#2, 5) - : +- *(11) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- *(11) Project [cs_ext_sales_price#16, i_item_id#2] - : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight - : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] - : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight - : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight - : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(18) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) - +- Exchange hashpartitioning(i_item_id#2, 5) - +- *(17) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) - +- *(17) Project [ws_ext_sales_price#20, i_item_id#2] - +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight - :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] - : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight - : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight - : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) - : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt deleted file mode 100644 index 0433f198d..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt +++ /dev/null @@ -1,91 +0,0 @@ -TakeOrderedAndProject [i_item_id,total_sales] - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] - InputAdapter - Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,total_sales] [sum,sum] - InputAdapter - Union - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] - InputAdapter - Exchange [i_item_id] #2 - WholeStageCodegen - HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_item_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [ca_address_sk] - Filter [ca_address_sk,ca_gmt_offset] - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - BroadcastHashJoin [i_item_id,i_item_id] - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [i_item_id] - Filter [i_color] - Scan parquet default.item [i_color,i_item_id] [i_color,i_item_id] - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] - InputAdapter - Exchange [i_item_id] #7 - WholeStageCodegen - HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] - Project [cs_ext_sales_price,i_item_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk] - BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 - InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] - InputAdapter - Exchange [i_item_id] #8 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] - Project [i_item_id,ws_ext_sales_price] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 - InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt deleted file mode 100644 index 275c06c2d..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt +++ /dev/null @@ -1,51 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,cc_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,cc_name#3,d_year#6,d_moy#7,avg_monthly_sales#2,sum_sales#1,psum#8,nsum#9]) -+- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, avg_monthly_sales#2, sum_sales#1, sum_sales#10 AS psum#8, sum_sales#11 AS nsum#9] - +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, cc_name#3, rn#12], [i_category#13, i_brand#14, cc_name#15, (rn#16 - 1)], Inner, BuildRight - :- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, sum_sales#1, avg_monthly_sales#2, rn#12, sum_sales#10] - : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, cc_name#3, rn#12], [i_category#17, i_brand#18, cc_name#19, (rn#20 + 1)], Inner, BuildRight - : :- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, sum_sales#1, avg_monthly_sales#2, rn#12] - : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#12)) - : : +- Window [avg(_w0#21) windowspecdefinition(i_category#4, i_brand#5, cc_name#3, d_year#6, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, cc_name#3, d_year#6] - : : +- *(7) Filter (isnotnull(d_year#6) && (d_year#6 = 1999)) - : : +- Window [rank(d_year#6, d_moy#7) windowspecdefinition(i_category#4, i_brand#5, cc_name#3, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#12], [i_category#4, i_brand#5, cc_name#3], [d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST] - : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, cc_name#3 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST], false, 0 - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, 5) - : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[sum(UnscaledValue(cs_sales_price#22))]) - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 5) - : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[partial_sum(UnscaledValue(cs_sales_price#22))]) - : : +- *(4) Project [i_brand#5, i_category#4, cs_sales_price#22, d_year#6, d_moy#7, cc_name#3] - : : +- *(4) BroadcastHashJoin [cs_call_center_sk#23], [cc_call_center_sk#24], Inner, BuildRight - : : :- *(4) Project [i_brand#5, i_category#4, cs_call_center_sk#23, cs_sales_price#22, d_year#6, d_moy#7] - : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#26], Inner, BuildRight - : : : :- *(4) Project [i_brand#5, i_category#4, cs_sold_date_sk#25, cs_call_center_sk#23, cs_sales_price#22] - : : : : +- *(4) BroadcastHashJoin [i_item_sk#27], [cs_item_sk#28], Inner, BuildRight - : : : : :- *(4) Project [i_item_sk#27, i_brand#5, i_category#4] - : : : : : +- *(4) Filter ((isnotnull(i_item_sk#27) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#27,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) - : : : : +- *(1) Project [cs_sold_date_sk#25, cs_call_center_sk#23, cs_item_sk#28, cs_sales_price#22] - : : : : +- *(1) Filter ((isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#25)) && isnotnull(cs_call_center_sk#23)) - : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_call_center_sk#23,cs_item_sk#28,cs_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [d_date_sk#26, d_year#6, d_moy#7] - : : : +- *(2) Filter ((((d_year#6 = 1999) || ((d_year#6 = 1998) && (d_moy#7 = 12))) || ((d_year#6 = 2000) && (d_moy#7 = 1))) && isnotnull(d_date_sk#26)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#26,d_year#6,d_moy#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [cc_call_center_sk#24, cc_name#3] - : : +- *(3) Filter (isnotnull(cc_call_center_sk#24) && isnotnull(cc_name#3)) - : : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#24,cc_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] + 1))) - : +- *(14) Project [i_category#17, i_brand#18, cc_name#19, sum_sales#10, rn#20] - : +- *(14) Filter isnotnull(rn#20) - : +- Window [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#17, i_brand#18, cc_name#19, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#20], [i_category#17, i_brand#18, cc_name#19], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] - : +- *(13) Sort [i_category#17 ASC NULLS FIRST, i_brand#18 ASC NULLS FIRST, cc_name#19 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 5) - : +- *(12) HashAggregate(keys=[i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30], functions=[sum(UnscaledValue(cs_sales_price#22))]) - : +- ReusedExchange [i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30, sum#31], Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 5) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] - 1))) - +- *(21) Project [i_category#13, i_brand#14, cc_name#15, sum_sales#11, rn#16] - +- *(21) Filter isnotnull(rn#16) - +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#13, i_brand#14, cc_name#15, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#16], [i_category#13, i_brand#14, cc_name#15], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] - +- *(20) Sort [i_category#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#15 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 - +- ReusedExchange [i_category#13, i_brand#14, cc_name#15, d_year#32, d_moy#33, sum_sales#11], Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 5) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt deleted file mode 100644 index 3d71eb1d9..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt +++ /dev/null @@ -1,77 +0,0 @@ -TakeOrderedAndProject [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,nsum,psum,sum_sales] - WholeStageCodegen - Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,sum_sales,sum_sales,sum_sales] - BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] - Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales,sum_sales] - BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] - Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales] - Filter [avg_monthly_sales,rn,sum_sales] - InputAdapter - Window [_w0,cc_name,d_year,i_brand,i_category] - WholeStageCodegen - Filter [d_year] - InputAdapter - Window [cc_name,d_moy,d_year,i_brand,i_category] - WholeStageCodegen - Sort [cc_name,d_moy,d_year,i_brand,i_category] - InputAdapter - Exchange [cc_name,i_brand,i_category] #1 - WholeStageCodegen - HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum,sum(UnscaledValue(cs_sales_price))] [_w0,sum,sum(UnscaledValue(cs_sales_price)),sum_sales] - InputAdapter - Exchange [cc_name,d_moy,d_year,i_brand,i_category] #2 - WholeStageCodegen - HashAggregate [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category,sum,sum] [sum,sum] - Project [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] - BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] - Project [cs_call_center_sk,cs_sales_price,d_moy,d_year,i_brand,i_category] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_call_center_sk,cs_sales_price,cs_sold_date_sk,i_brand,i_category] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [i_brand,i_category,i_item_sk] - Filter [i_brand,i_category,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] - Filter [cs_call_center_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk,d_moy,d_year] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [cc_call_center_sk,cc_name] - Filter [cc_call_center_sk,cc_name] - Scan parquet default.call_center [cc_call_center_sk,cc_name] [cc_call_center_sk,cc_name] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [cc_name,i_brand,i_category,rn,sum_sales] - Filter [rn] - InputAdapter - Window [cc_name,d_moy,d_year,i_brand,i_category] - WholeStageCodegen - Sort [cc_name,d_moy,d_year,i_brand,i_category] - InputAdapter - Exchange [cc_name,i_brand,i_category] #7 - WholeStageCodegen - HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum,sum(UnscaledValue(cs_sales_price))] [sum,sum(UnscaledValue(cs_sales_price)),sum_sales] - InputAdapter - ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum] [cc_name,d_moy,d_year,i_brand,i_category,sum] #2 - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [cc_name,i_brand,i_category,rn,sum_sales] - Filter [rn] - InputAdapter - Window [cc_name,d_moy,d_year,i_brand,i_category] - WholeStageCodegen - Sort [cc_name,d_moy,d_year,i_brand,i_category] - InputAdapter - ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] #7 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt deleted file mode 100644 index db623ff8f..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt +++ /dev/null @@ -1,101 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev#2 ASC NULLS FIRST], output=[item_id#1,ss_item_rev#2,ss_dev#3,cs_item_rev#4,cs_dev#5,ws_item_rev#6,ws_dev#7,average#8]) -+- *(15) Project [item_id#1, ss_item_rev#2, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#3, cs_item_rev#4, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#4 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#5, ws_item_rev#6, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#6 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#7, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#8] - +- *(15) BroadcastHashJoin [item_id#1], [item_id#9], Inner, BuildRight, ((((((((cast(ss_item_rev#2 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#6)), DecimalType(19,3))) && (cast(ss_item_rev#2 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#6)), DecimalType(20,3)))) && (cast(cs_item_rev#4 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#6)), DecimalType(19,3)))) && (cast(cs_item_rev#4 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#6)), DecimalType(20,3)))) && (cast(ws_item_rev#6 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#2)), DecimalType(19,3)))) && (cast(ws_item_rev#6 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#2)), DecimalType(20,3)))) && (cast(ws_item_rev#6 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#4)), DecimalType(19,3)))) && (cast(ws_item_rev#6 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#4)), DecimalType(20,3)))) - :- *(15) Project [item_id#1, ss_item_rev#2, cs_item_rev#4] - : +- *(15) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight, ((((cast(ss_item_rev#2 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#4)), DecimalType(19,3))) && (cast(ss_item_rev#2 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#4)), DecimalType(20,3)))) && (cast(cs_item_rev#4 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#2)), DecimalType(19,3)))) && (cast(cs_item_rev#4 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#2)), DecimalType(20,3)))) - : :- *(15) Filter isnotnull(ss_item_rev#2) - : : +- *(15) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ss_ext_sales_price#12))]) - : : +- Exchange hashpartitioning(i_item_id#11, 5) - : : +- *(4) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#12))]) - : : +- *(4) Project [ss_ext_sales_price#12, i_item_id#11] - : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight - : : :- *(4) Project [ss_sold_date_sk#13, ss_ext_sales_price#12, i_item_id#11] - : : : +- *(4) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#16], Inner, BuildRight - : : : :- *(4) Project [ss_sold_date_sk#13, ss_item_sk#15, ss_ext_sales_price#12] - : : : : +- *(4) Filter (isnotnull(ss_item_sk#15) && isnotnull(ss_sold_date_sk#13)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#15,ss_ext_sales_price#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [i_item_sk#16, i_item_id#11] - : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) - : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#14] - : : +- *(3) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight - : : :- *(3) Project [d_date_sk#14, d_date#17] - : : : +- *(3) Filter isnotnull(d_date_sk#14) - : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) - : : +- *(2) Project [d_date#17 AS d_date#17#18] - : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12416)) - : : : +- Subquery subquery12416 - : : : +- *(1) Project [d_week_seq#19] - : : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - : : +- Subquery subquery12416 - : : +- *(1) Project [d_week_seq#19] - : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- *(9) Filter isnotnull(cs_item_rev#4) - : +- *(9) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(cs_ext_sales_price#20))]) - : +- Exchange hashpartitioning(i_item_id#11, 5) - : +- *(8) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#20))]) - : +- *(8) Project [cs_ext_sales_price#20, i_item_id#11] - : +- *(8) BroadcastHashJoin [cs_sold_date_sk#21], [d_date_sk#14], Inner, BuildRight - : :- *(8) Project [cs_sold_date_sk#21, cs_ext_sales_price#20, i_item_id#11] - : : +- *(8) BroadcastHashJoin [cs_item_sk#22], [i_item_sk#16], Inner, BuildRight - : : :- *(8) Project [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#20] - : : : +- *(8) Filter (isnotnull(cs_item_sk#22) && isnotnull(cs_sold_date_sk#21)) - : : : +- *(8) FileScan parquet default.catalog_sales[cs_sold_date_sk#21,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [d_date_sk#14] - : +- *(7) BroadcastHashJoin [d_date#17], [d_date#17#23], LeftSemi, BuildRight - : :- *(7) Project [d_date_sk#14, d_date#17] - : : +- *(7) Filter isnotnull(d_date_sk#14) - : : +- *(7) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) - : +- *(6) Project [d_date#17 AS d_date#17#23] - : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12420)) - : : +- Subquery subquery12420 - : : +- *(1) Project [d_week_seq#19] - : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - : +- *(6) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - : +- Subquery subquery12420 - : +- *(1) Project [d_week_seq#19] - : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(14) Filter isnotnull(ws_item_rev#6) - +- *(14) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ws_ext_sales_price#24))]) - +- Exchange hashpartitioning(i_item_id#11, 5) - +- *(13) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#24))]) - +- *(13) Project [ws_ext_sales_price#24, i_item_id#11] - +- *(13) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#14], Inner, BuildRight - :- *(13) Project [ws_sold_date_sk#25, ws_ext_sales_price#24, i_item_id#11] - : +- *(13) BroadcastHashJoin [ws_item_sk#26], [i_item_sk#16], Inner, BuildRight - : :- *(13) Project [ws_sold_date_sk#25, ws_item_sk#26, ws_ext_sales_price#24] - : : +- *(13) Filter (isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#25)) - : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_item_sk#26,ws_ext_sales_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(12) Project [d_date_sk#14] - +- *(12) BroadcastHashJoin [d_date#17], [d_date#17#27], LeftSemi, BuildRight - :- *(12) Project [d_date_sk#14, d_date#17] - : +- *(12) Filter isnotnull(d_date_sk#14) - : +- *(12) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) - +- *(11) Project [d_date#17 AS d_date#17#27] - +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12424)) - : +- Subquery subquery12424 - : +- *(1) Project [d_week_seq#19] - : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - +- *(11) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - +- Subquery subquery12424 - +- *(1) Project [d_week_seq#19] - +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt deleted file mode 100644 index ea45e8665..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt +++ /dev/null @@ -1,133 +0,0 @@ -TakeOrderedAndProject [average,cs_dev,cs_item_rev,item_id,ss_dev,ss_item_rev,ws_dev,ws_item_rev] - WholeStageCodegen - Project [cs_item_rev,item_id,ss_item_rev,ws_item_rev] - BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev,ws_item_rev] - Project [cs_item_rev,item_id,ss_item_rev] - BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev] - Filter [ss_item_rev] - HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [item_id,ss_item_rev,sum,sum(UnscaledValue(ss_ext_sales_price))] - InputAdapter - Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_item_id,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_item_id,ss_ext_sales_price,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_id,i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - BroadcastHashJoin [d_date,d_date] - Project [d_date,d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date] - Filter [d_week_seq] - Subquery #1 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Subquery #1 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Filter [cs_item_rev] - HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [cs_item_rev,item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] - InputAdapter - Exchange [i_item_id] #6 - WholeStageCodegen - HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] - Project [cs_ext_sales_price,i_item_id] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_sales_price,cs_sold_date_sk,i_item_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [d_date_sk] - BroadcastHashJoin [d_date,d_date] - Project [d_date,d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [d_date] - Filter [d_week_seq] - Subquery #2 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Subquery #2 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Filter [ws_item_rev] - HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [item_id,sum,sum(UnscaledValue(ws_ext_sales_price)),ws_item_rev] - InputAdapter - Exchange [i_item_id] #10 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] - Project [i_item_id,ws_ext_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_item_id,ws_ext_sales_price,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - Project [d_date_sk] - BroadcastHashJoin [d_date,d_date] - Project [d_date,d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #12 - WholeStageCodegen - Project [d_date] - Filter [d_week_seq] - Subquery #3 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Subquery #3 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt deleted file mode 100644 index 9236b71dd..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt +++ /dev/null @@ -1,43 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[s_store_name1#1 ASC NULLS FIRST,s_store_id1#2 ASC NULLS FIRST,d_week_seq1#3 ASC NULLS FIRST], output=[s_store_name1#1,s_store_id1#2,d_week_seq1#3,(sun_sales1 / sun_sales2)#4,(mon_sales1 / mon_sales2)#5,(tue_sales1 / tue_sales2)#6,(wed_sales1 / wed_sales2)#7,(thu_sales1 / thu_sales2)#8,(fri_sales1 / fri_sales2)#9,(sat_sales1 / sat_sales2)#10]) -+- *(10) Project [s_store_name1#1, s_store_id1#2, d_week_seq1#3, CheckOverflow((promote_precision(sun_sales1#11) / promote_precision(sun_sales2#12)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#4, CheckOverflow((promote_precision(mon_sales1#13) / promote_precision(mon_sales2#14)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#5, CheckOverflow((promote_precision(tue_sales1#15) / promote_precision(tue_sales2#16)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#6, CheckOverflow((promote_precision(wed_sales1#17) / promote_precision(wed_sales2#18)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#7, CheckOverflow((promote_precision(thu_sales1#19) / promote_precision(thu_sales2#20)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#8, CheckOverflow((promote_precision(fri_sales1#21) / promote_precision(fri_sales2#22)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#9, CheckOverflow((promote_precision(sat_sales1#23) / promote_precision(sat_sales2#24)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#10] - +- *(10) BroadcastHashJoin [s_store_id1#2, d_week_seq1#3], [s_store_id2#25, (d_week_seq2#26 - 52)], Inner, BuildRight - :- *(10) Project [s_store_name#27 AS s_store_name1#1, d_week_seq#28 AS d_week_seq1#3, s_store_id#29 AS s_store_id1#2, sun_sales#30 AS sun_sales1#11, mon_sales#31 AS mon_sales1#13, tue_sales#32 AS tue_sales1#15, wed_sales#33 AS wed_sales1#17, thu_sales#34 AS thu_sales1#19, fri_sales#35 AS fri_sales1#21, sat_sales#36 AS sat_sales1#23] - : +- *(10) BroadcastHashJoin [d_week_seq#28], [d_week_seq#37], Inner, BuildRight - : :- *(10) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29, s_store_name#27] - : : +- *(10) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight - : : :- *(10) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) - : : : +- Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 5) - : : : +- *(2) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) - : : : +- *(2) Project [ss_store_sk#38, ss_sales_price#41, d_week_seq#28, d_day_name#40] - : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#42], [d_date_sk#43], Inner, BuildRight - : : : :- *(2) Project [ss_sold_date_sk#42, ss_store_sk#38, ss_sales_price#41] - : : : : +- *(2) Filter (isnotnull(ss_sold_date_sk#42) && isnotnull(ss_store_sk#38)) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#42,ss_store_sk#38,ss_sales_price#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#43, d_week_seq#28, d_day_name#40] - : : : +- *(1) Filter (isnotnull(d_date_sk#43) && isnotnull(d_week_seq#28)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#43,d_week_seq#28,d_day_name#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [s_store_sk#39, s_store_id#29, s_store_name#27] - : : +- *(3) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) - : : +- *(3) FileScan parquet default.store[s_store_sk#39,s_store_id#29,s_store_name#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [d_week_seq#37] - : +- *(4) Filter (((isnotnull(d_month_seq#44) && (d_month_seq#44 >= 1212)) && (d_month_seq#44 <= 1223)) && isnotnull(d_week_seq#37)) - : +- *(4) FileScan parquet default.date_dim[d_month_seq#44,d_week_seq#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223),..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52))) - +- *(9) Project [d_week_seq#28 AS d_week_seq2#26, s_store_id#29 AS s_store_id2#25, sun_sales#30 AS sun_sales2#12, mon_sales#31 AS mon_sales2#14, tue_sales#32 AS tue_sales2#16, wed_sales#33 AS wed_sales2#18, thu_sales#34 AS thu_sales2#20, fri_sales#35 AS fri_sales2#22, sat_sales#36 AS sat_sales2#24] - +- *(9) BroadcastHashJoin [d_week_seq#28], [d_week_seq#45], Inner, BuildRight - :- *(9) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29] - : +- *(9) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight - : :- *(9) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) - : : +- ReusedExchange [d_week_seq#28, ss_store_sk#38, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51, sum#52], Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 5) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [s_store_sk#39, s_store_id#29] - : +- *(7) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) - : +- *(7) FileScan parquet default.store[s_store_sk#39,s_store_id#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [d_week_seq#45] - +- *(8) Filter (((isnotnull(d_month_seq#53) && (d_month_seq#53 >= 1224)) && (d_month_seq#53 <= 1235)) && isnotnull(d_week_seq#45)) - +- *(8) FileScan parquet default.date_dim[d_month_seq#53,d_week_seq#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt deleted file mode 100644 index 038219a53..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt +++ /dev/null @@ -1,58 +0,0 @@ -TakeOrderedAndProject [(fri_sales1 / fri_sales2),(mon_sales1 / mon_sales2),(sat_sales1 / sat_sales2),(sun_sales1 / sun_sales2),(thu_sales1 / thu_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),d_week_seq1,s_store_id1,s_store_name1] - WholeStageCodegen - Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,s_store_id1,s_store_name1,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] - BroadcastHashJoin [d_week_seq1,d_week_seq2,s_store_id1,s_store_id2] - Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] - BroadcastHashJoin [d_week_seq,d_week_seq] - Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] - BroadcastHashJoin [s_store_sk,ss_store_sk] - HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] - InputAdapter - Exchange [d_week_seq,ss_store_sk] #1 - WholeStageCodegen - HashAggregate [d_day_name,d_week_seq,ss_sales_price,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [d_date_sk,d_day_name,d_week_seq] - Filter [d_date_sk,d_week_seq] - Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] [d_date_sk,d_day_name,d_week_seq] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [s_store_id,s_store_name,s_store_sk] - Filter [s_store_id,s_store_sk] - Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_week_seq] - Filter [d_month_seq,d_week_seq] - Scan parquet default.date_dim [d_month_seq,d_week_seq] [d_month_seq,d_week_seq] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] - BroadcastHashJoin [d_week_seq,d_week_seq] - Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] - BroadcastHashJoin [s_store_sk,ss_store_sk] - HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] - InputAdapter - ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [s_store_id,s_store_sk] - Filter [s_store_id,s_store_sk] - Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [d_week_seq] - Filter [d_month_seq,d_week_seq] - Scan parquet default.date_dim [d_month_seq,d_week_seq] [d_month_seq,d_week_seq] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt deleted file mode 100644 index eb06bf9b7..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt +++ /dev/null @@ -1,58 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state#2,cnt#1]) -+- *(8) Project [state#2, cnt#1] - +- *(8) Filter (count(1)#3 >= 10) - +- *(8) HashAggregate(keys=[ca_state#4], functions=[count(1)]) - +- Exchange hashpartitioning(ca_state#4, 5) - +- *(7) HashAggregate(keys=[ca_state#4], functions=[partial_count(1)]) - +- *(7) Project [ca_state#4] - +- *(7) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - :- *(7) Project [ca_state#4, ss_item_sk#5] - : +- *(7) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight - : :- *(7) Project [ca_state#4, ss_sold_date_sk#7, ss_item_sk#5] - : : +- *(7) BroadcastHashJoin [c_customer_sk#9], [ss_customer_sk#10], Inner, BuildRight - : : :- *(7) Project [ca_state#4, c_customer_sk#9] - : : : +- *(7) BroadcastHashJoin [ca_address_sk#11], [c_current_addr_sk#12], Inner, BuildRight - : : : :- *(7) Project [ca_address_sk#11, ca_state#4] - : : : : +- *(7) Filter isnotnull(ca_address_sk#11) - : : : : +- *(7) FileScan parquet default.customer_address[ca_address_sk#11,ca_state#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : +- *(1) Project [c_customer_sk#9, c_current_addr_sk#12] - : : : +- *(1) Filter (isnotnull(c_current_addr_sk#12) && isnotnull(c_customer_sk#9)) - : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) - : : +- *(2) Project [ss_sold_date_sk#7, ss_item_sk#5, ss_customer_sk#10] - : : +- *(2) Filter ((isnotnull(ss_customer_sk#10) && isnotnull(ss_sold_date_sk#7)) && isnotnull(ss_item_sk#5)) - : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#5,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [d_date_sk#8] - : +- *(3) Filter ((isnotnull(d_month_seq#13) && (d_month_seq#13 = Subquery subquery982)) && isnotnull(d_date_sk#8)) - : : +- Subquery subquery982 - : : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) - : : +- Exchange hashpartitioning(d_month_seq#13, 5) - : : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) - : : +- *(1) Project [d_month_seq#13] - : : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) - : : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct - : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct - : +- Subquery subquery982 - : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) - : +- Exchange hashpartitioning(d_month_seq#13, 5) - : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) - : +- *(1) Project [d_month_seq#13] - : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) - : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(6) Project [i_item_sk#6] - +- *(6) Filter (cast(i_current_price#16 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#17)), DecimalType(14,7))) - +- *(6) BroadcastHashJoin [i_category#18], [i_category#18#19], LeftOuter, BuildRight - :- *(6) Project [i_item_sk#6, i_current_price#16, i_category#18] - : +- *(6) Filter (isnotnull(i_current_price#16) && isnotnull(i_item_sk#6)) - : +- *(6) FileScan parquet default.item[i_item_sk#6,i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) - +- *(5) HashAggregate(keys=[i_category#18], functions=[avg(UnscaledValue(i_current_price#16))]) - +- Exchange hashpartitioning(i_category#18, 5) - +- *(4) HashAggregate(keys=[i_category#18], functions=[partial_avg(UnscaledValue(i_current_price#16))]) - +- *(4) Project [i_current_price#16, i_category#18] - +- *(4) Filter isnotnull(i_category#18) - +- *(4) FileScan parquet default.item[i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt deleted file mode 100644 index 2bf134280..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt +++ /dev/null @@ -1,78 +0,0 @@ -TakeOrderedAndProject [cnt,state] - WholeStageCodegen - Project [cnt,state] - Filter [count(1)] - HashAggregate [ca_state,count,count(1)] [cnt,count,count(1),count(1),state] - InputAdapter - Exchange [ca_state] #1 - WholeStageCodegen - HashAggregate [ca_state,count,count] [count,count] - Project [ca_state] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ca_state,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ca_state,ss_item_sk,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_sk,ca_state] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ca_address_sk,ca_state] - Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] [ss_customer_sk,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Subquery #1 - WholeStageCodegen - HashAggregate [d_month_seq] - InputAdapter - Exchange [d_month_seq] #5 - WholeStageCodegen - HashAggregate [d_month_seq] - Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] - Subquery #1 - WholeStageCodegen - HashAggregate [d_month_seq] - InputAdapter - Exchange [d_month_seq] #5 - WholeStageCodegen - HashAggregate [d_month_seq] - Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [i_item_sk] - Filter [avg(i_current_price),i_current_price] - BroadcastHashJoin [i_category,i_category] - Project [i_category,i_current_price,i_item_sk] - Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_category,i_current_price,i_item_sk] [i_category,i_current_price,i_item_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(i_current_price)),count,i_category,sum] [avg(UnscaledValue(i_current_price)),avg(i_current_price),count,i_category,sum] - InputAdapter - Exchange [i_category] #8 - WholeStageCodegen - HashAggregate [count,count,i_category,i_current_price,sum,sum] [count,count,sum,sum] - Project [i_category,i_current_price] - Filter [i_category] - Scan parquet default.item [i_category,i_current_price] [i_category,i_current_price] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt deleted file mode 100644 index e4e212165..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt +++ /dev/null @@ -1,65 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,total_sales#2 ASC NULLS FIRST], output=[i_item_id#1,total_sales#2]) -+- *(20) HashAggregate(keys=[i_item_id#1], functions=[sum(total_sales#3)]) - +- Exchange hashpartitioning(i_item_id#1, 5) - +- *(19) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(total_sales#3)]) - +- Union - :- *(6) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- Exchange hashpartitioning(i_item_id#1, 5) - : +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- *(5) Project [ss_ext_sales_price#4, i_item_id#1] - : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] - : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight - : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#10] - : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 9)) && isnotnull(d_date_sk#10)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [ca_address_sk#8] - : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) BroadcastHashJoin [i_item_id#1], [i_item_id#1#14], LeftSemi, BuildRight - : :- *(4) Project [i_item_sk#6, i_item_id#1] - : : +- *(4) Filter isnotnull(i_item_sk#6) - : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- *(3) Project [i_item_id#1 AS i_item_id#1#14] - : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Music)) - : +- *(3) FileScan parquet default.item[i_item_id#1,i_category#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)], ReadSchema: struct - :- *(12) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- Exchange hashpartitioning(i_item_id#1, 5) - : +- *(11) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- *(11) Project [cs_ext_sales_price#16, i_item_id#1] - : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight - : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] - : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight - : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight - : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(18) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) - +- Exchange hashpartitioning(i_item_id#1, 5) - +- *(17) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) - +- *(17) Project [ws_ext_sales_price#20, i_item_id#1] - +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight - :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] - : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight - : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight - : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) - : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt deleted file mode 100644 index 01ea963ce..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt +++ /dev/null @@ -1,91 +0,0 @@ -TakeOrderedAndProject [i_item_id,total_sales] - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] - InputAdapter - Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,total_sales] [sum,sum] - InputAdapter - Union - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] - InputAdapter - Exchange [i_item_id] #2 - WholeStageCodegen - HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_item_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [ca_address_sk] - Filter [ca_address_sk,ca_gmt_offset] - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - BroadcastHashJoin [i_item_id,i_item_id] - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [i_item_id] - Filter [i_category] - Scan parquet default.item [i_category,i_item_id] [i_category,i_item_id] - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] - InputAdapter - Exchange [i_item_id] #7 - WholeStageCodegen - HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] - Project [cs_ext_sales_price,i_item_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk] - BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 - InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] - InputAdapter - Exchange [i_item_id] #8 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] - Project [i_item_id,ws_ext_sales_price] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 - InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt deleted file mode 100644 index 49bd37330..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt +++ /dev/null @@ -1,68 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[promotions#1 ASC NULLS FIRST,total#2 ASC NULLS FIRST], output=[promotions#1,total#2,(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#3]) -+- *(16) Project [promotions#1, total#2, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#1 as decimal(15,4))) / promote_precision(cast(total#2 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#3] - +- BroadcastNestedLoopJoin BuildRight, Inner - :- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- Exchange SinglePartition - : +- *(7) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- *(7) Project [ss_ext_sales_price#4] - : +- *(7) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - : :- *(7) Project [ss_item_sk#5, ss_ext_sales_price#4] - : : +- *(7) BroadcastHashJoin [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight - : : :- *(7) Project [ss_item_sk#5, ss_ext_sales_price#4, c_current_addr_sk#7] - : : : +- *(7) BroadcastHashJoin [ss_customer_sk#9], [c_customer_sk#10], Inner, BuildRight - : : : :- *(7) Project [ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] - : : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight - : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] - : : : : : +- *(7) BroadcastHashJoin [ss_promo_sk#13], [p_promo_sk#14], Inner, BuildRight - : : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_promo_sk#13, ss_ext_sales_price#4] - : : : : : : +- *(7) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight - : : : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_promo_sk#13, ss_ext_sales_price#4] - : : : : : : : +- *(7) Filter ((((isnotnull(ss_store_sk#15) && isnotnull(ss_promo_sk#13)) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) - : : : : : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_promo_sk#13,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(2) Project [p_promo_sk#14] - : : : : : +- *(2) Filter ((((p_channel_dmail#18 = Y) || (p_channel_email#19 = Y)) || (p_channel_tv#20 = Y)) && isnotnull(p_promo_sk#14)) - : : : : : +- *(2) FileScan parquet default.promotion[p_promo_sk#14,p_channel_dmail#18,p_channel_email#19,p_channel_tv#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(3) Project [d_date_sk#12] - : : : : +- *(3) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#22)) && (d_year#21 = 1998)) && (d_moy#22 = 11)) && isnotnull(d_date_sk#12)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_year#21,d_moy#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [c_customer_sk#10, c_current_addr_sk#7] - : : : +- *(4) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#7)) - : : : +- *(4) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [ca_address_sk#8] - : : +- *(5) Filter ((isnotnull(ca_gmt_offset#23) && (ca_gmt_offset#23 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [i_item_sk#6] - : +- *(6) Filter ((isnotnull(i_category#24) && (i_category#24 = Jewelry)) && isnotnull(i_item_sk#6)) - : +- *(6) FileScan parquet default.item[i_item_sk#6,i_category#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange IdentityBroadcastMode - +- *(15) HashAggregate(keys=[], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - +- Exchange SinglePartition - +- *(14) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) - +- *(14) Project [ss_ext_sales_price#4] - +- *(14) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - :- *(14) Project [ss_item_sk#5, ss_ext_sales_price#4] - : +- *(14) BroadcastHashJoin [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight - : :- *(14) Project [ss_item_sk#5, ss_ext_sales_price#4, c_current_addr_sk#7] - : : +- *(14) BroadcastHashJoin [ss_customer_sk#9], [c_customer_sk#10], Inner, BuildRight - : : :- *(14) Project [ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] - : : : +- *(14) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight - : : : :- *(14) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] - : : : : +- *(14) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight - : : : : :- *(14) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_ext_sales_price#4] - : : : : : +- *(14) Filter (((isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) - : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item..., ReadSchema: struct120 days #8]) -+- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3, 5) - +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) - +- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, w_warehouse_name#9, sm_type#2, web_name#3] - +- *(5) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight - :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, w_warehouse_name#9, sm_type#2, web_name#3] - : +- *(5) BroadcastHashJoin [ws_web_site_sk#14], [web_site_sk#15], Inner, BuildRight - : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, w_warehouse_name#9, sm_type#2] - : : +- *(5) BroadcastHashJoin [ws_ship_mode_sk#16], [sm_ship_mode_sk#17], Inner, BuildRight - : : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, ws_ship_mode_sk#16, w_warehouse_name#9] - : : : +- *(5) BroadcastHashJoin [ws_warehouse_sk#18], [w_warehouse_sk#19], Inner, BuildRight - : : : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, ws_ship_mode_sk#16, ws_warehouse_sk#18] - : : : : +- *(5) Filter (((isnotnull(ws_warehouse_sk#18) && isnotnull(ws_ship_mode_sk#16)) && isnotnull(ws_web_site_sk#14)) && isnotnull(ws_ship_date_sk#11)) - : : : : +- *(5) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_ship_date_sk#11,ws_web_site_sk#14,ws_ship_mode_sk#16,ws_warehouse_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] - : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) - : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [web_site_sk#15, web_name#3] - : +- *(3) Filter isnotnull(web_site_sk#15) - : +- *(3) FileScan parquet default.web_site[web_site_sk#15,web_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [d_date_sk#13] - +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt deleted file mode 100644 index b8879730e..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt +++ /dev/null @@ -1,42 +0,0 @@ -TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sm_type,substring(w_warehouse_name, 1, 20),web_name] - WholeStageCodegen - HashAggregate [sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),web_name] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] - InputAdapter - Exchange [sm_type,substring(w_warehouse_name, 1, 20),web_name] #1 - WholeStageCodegen - HashAggregate [sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] [substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] - BroadcastHashJoin [d_date_sk,ws_ship_date_sk] - Project [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] - BroadcastHashJoin [web_site_sk,ws_web_site_sk] - Project [sm_type,w_warehouse_name,ws_ship_date_sk,ws_sold_date_sk,ws_web_site_sk] - BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk] - Project [w_warehouse_name,ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_web_site_sk] - BroadcastHashJoin [w_warehouse_sk,ws_warehouse_sk] - Project [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] - Filter [ws_ship_date_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [sm_ship_mode_sk,sm_type] - Filter [sm_ship_mode_sk] - Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] [sm_ship_mode_sk,sm_type] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [web_name,web_site_sk] - Filter [web_site_sk] - Scan parquet default.web_site [web_name,web_site_sk] [web_name,web_site_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt deleted file mode 100644 index 98e27704a..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt +++ /dev/null @@ -1,31 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_manager_id#1 ASC NULLS FIRST,avg_monthly_sales#2 ASC NULLS FIRST,sum_sales#3 ASC NULLS FIRST], output=[i_manager_id#1,sum_sales#3,avg_monthly_sales#2]) -+- *(7) Project [i_manager_id#1, sum_sales#3, avg_monthly_sales#2] - +- *(7) Filter (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#3 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) - +- Window [avg(_w0#4) windowspecdefinition(i_manager_id#1, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_manager_id#1] - +- *(6) Sort [i_manager_id#1 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_manager_id#1, 5) - +- *(5) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) - +- Exchange hashpartitioning(i_manager_id#1, d_moy#5, 5) - +- *(4) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) - +- *(4) Project [i_manager_id#1, ss_sales_price#6, d_moy#5] - +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight - :- *(4) Project [i_manager_id#1, ss_store_sk#7, ss_sales_price#6, d_moy#5] - : +- *(4) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight - : :- *(4) Project [i_manager_id#1, ss_sold_date_sk#9, ss_store_sk#7, ss_sales_price#6] - : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight - : : :- *(4) Project [i_item_sk#11, i_manager_id#1] - : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,refernece,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manager_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,refernece..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] - : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) - : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#10, d_moy#5] - : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_moy#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [s_store_sk#8] - +- *(3) Filter isnotnull(s_store_sk#8) - +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt deleted file mode 100644 index fc602334c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt +++ /dev/null @@ -1,43 +0,0 @@ -TakeOrderedAndProject [avg_monthly_sales,i_manager_id,sum_sales] - WholeStageCodegen - Project [avg_monthly_sales,i_manager_id,sum_sales] - Filter [avg_monthly_sales,sum_sales] - InputAdapter - Window [_w0,i_manager_id] - WholeStageCodegen - Sort [i_manager_id] - InputAdapter - Exchange [i_manager_id] #1 - WholeStageCodegen - HashAggregate [d_moy,i_manager_id,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] - InputAdapter - Exchange [d_moy,i_manager_id] #2 - WholeStageCodegen - HashAggregate [d_moy,i_manager_id,ss_sales_price,sum,sum] [sum,sum] - Project [d_moy,i_manager_id,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_moy,i_manager_id,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_manager_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_item_sk,i_manager_id] - Filter [i_brand,i_category,i_class,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manager_id] [i_brand,i_category,i_class,i_item_sk,i_manager_id] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk,d_moy] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] [d_date_sk,d_month_seq,d_moy] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt deleted file mode 100644 index 9319775c4..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt +++ /dev/null @@ -1,170 +0,0 @@ -== Physical Plan == -*(43) Sort [product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST, 5) - +- *(42) Project [product_name#1, store_name#2, store_zip#4, b_street_number#5, b_streen_name#6, b_city#7, b_zip#8, c_street_number#9, c_street_name#10, c_city#11, c_zip#12, syear#13, cnt#14, s1#15, s2#16, s3#17, s1#18, s2#19, s3#20, syear#21, cnt#3] - +- *(42) BroadcastHashJoin [item_sk#22, store_name#2, store_zip#4], [item_sk#23, store_name#24, store_zip#25], Inner, BuildRight, (cnt#3 <= cnt#14) - :- *(42) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) - : +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 5) - : +- *(20) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) - : +- *(20) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] - : +- *(20) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight - : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : : +- *(20) BroadcastHashJoin [hd_income_band_sk#45], [ib_income_band_sk#46], Inner, BuildRight - : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : : : +- *(20) BroadcastHashJoin [hd_income_band_sk#47], [ib_income_band_sk#48], Inner, BuildRight - : : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : : : : +- *(20) BroadcastHashJoin [c_current_addr_sk#49], [ca_address_sk#50], Inner, BuildRight - : : : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] - : : : : : +- *(20) BroadcastHashJoin [ss_addr_sk#51], [ca_address_sk#52], Inner, BuildRight - : : : : : :- *(20) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45] - : : : : : : +- *(20) BroadcastHashJoin [c_current_hdemo_sk#53], [hd_demo_sk#54], Inner, BuildRight - : : : : : : :- *(20) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47] - : : : : : : : +- *(20) BroadcastHashJoin [ss_hdemo_sk#55], [hd_demo_sk#56], Inner, BuildRight - : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : : +- *(20) BroadcastHashJoin [ss_promo_sk#57], [p_promo_sk#58], Inner, BuildRight - : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : : : +- *(20) BroadcastHashJoin [c_current_cdemo_sk#59], [cd_demo_sk#60], Inner, BuildRight, NOT (cd_marital_status#61 = cd_marital_status#62) - : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, cd_marital_status#61] - : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_cdemo_sk#63], [cd_demo_sk#64], Inner, BuildRight - : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : : : : : +- *(20) BroadcastHashJoin [c_first_shipto_date_sk#65], [d_date_sk#66], Inner, BuildRight - : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, d_year#39] - : : : : : : : : : : : : +- *(20) BroadcastHashJoin [c_first_sales_date_sk#67], [d_date_sk#68], Inner, BuildRight - : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] - : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_customer_sk#69], [c_customer_sk#70], Inner, BuildRight - : : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29] - : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_store_sk#71], [s_store_sk#72], Inner, BuildRight - : : : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38] - : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_sold_date_sk#73], [d_date_sk#74], Inner, BuildRight - : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_item_sk#44], [cs_item_sk#75], Inner, BuildRight - : : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight - : : : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : : : : +- *(20) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) - : : : : : : : : : : : : : : : : : : +- *(20) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct - : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : : : : : : : +- *(4) Project [cs_item_sk#75] - : : : : : : : : : : : : : : : : +- *(4) Filter (isnotnull(sum(cs_ext_list_price#79)#80) && (cast(sum(cs_ext_list_price#79)#80 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))#84)), DecimalType(21,2)))) - : : : : : : : : : : : : : : : : +- *(4) HashAggregate(keys=[cs_item_sk#75], functions=[sum(UnscaledValue(cs_ext_list_price#79)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) - : : : : : : : : : : : : : : : : +- Exchange hashpartitioning(cs_item_sk#75, 5) - : : : : : : : : : : : : : : : : +- *(3) HashAggregate(keys=[cs_item_sk#75], functions=[partial_sum(UnscaledValue(cs_ext_list_price#79)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) - : : : : : : : : : : : : : : : : +- *(3) Project [cs_item_sk#75, cs_ext_list_price#79, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] - : : : : : : : : : : : : : : : : +- *(3) BroadcastHashJoin [cs_item_sk#75, cs_order_number#85], [cr_item_sk#86, cr_order_number#87], Inner, BuildRight - : : : : : : : : : : : : : : : : :- *(3) Project [cs_item_sk#75, cs_order_number#85, cs_ext_list_price#79] - : : : : : : : : : : : : : : : : : +- *(3) Filter (isnotnull(cs_item_sk#75) && isnotnull(cs_order_number#85)) - : : : : : : : : : : : : : : : : : +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#75,cs_order_number#85,cs_ext_list_price#79] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)], ReadSchema: struct - : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) - : : : : : : : : : : : : : : : : +- *(2) Project [cr_item_sk#86, cr_order_number#87, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] - : : : : : : : : : : : : : : : : +- *(2) Filter (isnotnull(cr_item_sk#86) && isnotnull(cr_order_number#87)) - : : : : : : : : : : : : : : : : +- *(2) FileScan parquet default.catalog_returns[cr_item_sk#86,cr_order_number#87,cr_refunded_cash#81,cr_reversed_charge#82,cr_store_credit#83] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct - : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : : : : : +- *(6) Project [s_store_sk#72, s_store_name#28, s_zip#29] - : : : : : : : : : : : : : : +- *(6) Filter ((isnotnull(s_store_sk#72) && isnotnull(s_zip#29)) && isnotnull(s_store_name#28)) - : : : : : : : : : : : : : : +- *(6) FileScan parquet default.store[s_store_sk#72,s_store_name#28,s_zip#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip), IsNotNull(s_store_name)], ReadSchema: struct - : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : : : : +- *(7) Project [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] - : : : : : : : : : : : : : +- *(7) Filter (((((isnotnull(c_customer_sk#70) && isnotnull(c_first_sales_date_sk#67)) && isnotnull(c_first_shipto_date_sk#65)) && isnotnull(c_current_cdemo_sk#59)) && isnotnull(c_current_hdemo_sk#53)) && isnotnull(c_current_addr_sk#49)) - : : : : : : : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#70,c_current_cdemo_sk#59,c_current_hdemo_sk#53,c_current_addr_sk#49,c_first_shipto_date_sk#65,c_first_sales_date_sk#67] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), I..., ReadSchema: struct - : : : : : : : : : : : +- ReusedExchange [d_date_sk#66, d_year#40], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : +- *(10) Project [cd_demo_sk#64, cd_marital_status#61] - : : : : : : : : : : +- *(10) Filter (isnotnull(cd_demo_sk#64) && isnotnull(cd_marital_status#61)) - : : : : : : : : : : +- *(10) FileScan parquet default.customer_demographics[cd_demo_sk#64,cd_marital_status#61] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)], ReadSchema: struct - : : : : : : : : : +- ReusedExchange [cd_demo_sk#60, cd_marital_status#62], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : +- *(12) Project [p_promo_sk#58] - : : : : : : : : +- *(12) Filter isnotnull(p_promo_sk#58) - : : : : : : : : +- *(12) FileScan parquet default.promotion[p_promo_sk#58] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct - : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : +- *(13) Project [hd_demo_sk#56, hd_income_band_sk#47] - : : : : : : : +- *(13) Filter (isnotnull(hd_demo_sk#56) && isnotnull(hd_income_band_sk#47)) - : : : : : : : +- *(13) FileScan parquet default.household_demographics[hd_demo_sk#56,hd_income_band_sk#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct - : : : : : : +- ReusedExchange [hd_demo_sk#54, hd_income_band_sk#45], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(15) Project [ca_address_sk#52, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] - : : : : : +- *(15) Filter isnotnull(ca_address_sk#52) - : : : : : +- *(15) FileScan parquet default.customer_address[ca_address_sk#52,ca_street_number#30,ca_street_name#31,ca_city#32,ca_zip#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - : : : : +- ReusedExchange [ca_address_sk#50, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(17) Project [ib_income_band_sk#48] - : : : +- *(17) Filter isnotnull(ib_income_band_sk#48) - : : : +- *(17) FileScan parquet default.income_band[ib_income_band_sk#48] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_income_band_sk)], ReadSchema: struct - : : +- ReusedExchange [ib_income_band_sk#46], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(19) Project [i_item_sk#27, i_product_name#26] - : +- *(19) Filter ((((((isnotnull(i_current_price#88) && i_color#89 IN (purple,burlywood,indian,spring,floral,medium)) && (i_current_price#88 >= 64.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 74.00)) && (cast(i_current_price#88 as decimal(12,2)) >= 65.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 79.00)) && isnotnull(i_item_sk#27)) - : +- *(19) FileScan parquet default.item[i_item_sk#27,i_current_price#88,i_color#89,i_product_name#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), Greater..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, string, true], input[2, string, true])) - +- *(41) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) - +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 5) - +- *(40) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) - +- *(40) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] - +- *(40) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight - :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : +- *(40) BroadcastHashJoin [hd_income_band_sk#45], [ib_income_band_sk#46], Inner, BuildRight - : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : : +- *(40) BroadcastHashJoin [hd_income_band_sk#47], [ib_income_band_sk#48], Inner, BuildRight - : : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : : : +- *(40) BroadcastHashJoin [c_current_addr_sk#49], [ca_address_sk#50], Inner, BuildRight - : : : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] - : : : : +- *(40) BroadcastHashJoin [ss_addr_sk#51], [ca_address_sk#52], Inner, BuildRight - : : : : :- *(40) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45] - : : : : : +- *(40) BroadcastHashJoin [c_current_hdemo_sk#53], [hd_demo_sk#54], Inner, BuildRight - : : : : : :- *(40) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47] - : : : : : : +- *(40) BroadcastHashJoin [ss_hdemo_sk#55], [hd_demo_sk#56], Inner, BuildRight - : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : +- *(40) BroadcastHashJoin [ss_promo_sk#57], [p_promo_sk#58], Inner, BuildRight - : : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : : +- *(40) BroadcastHashJoin [c_current_cdemo_sk#59], [cd_demo_sk#60], Inner, BuildRight, NOT (cd_marital_status#61 = cd_marital_status#62) - : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, cd_marital_status#61] - : : : : : : : : : +- *(40) BroadcastHashJoin [ss_cdemo_sk#63], [cd_demo_sk#64], Inner, BuildRight - : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : : : : +- *(40) BroadcastHashJoin [c_first_shipto_date_sk#65], [d_date_sk#66], Inner, BuildRight - : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, d_year#39] - : : : : : : : : : : : +- *(40) BroadcastHashJoin [c_first_sales_date_sk#67], [d_date_sk#68], Inner, BuildRight - : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] - : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_customer_sk#69], [c_customer_sk#70], Inner, BuildRight - : : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29] - : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_store_sk#71], [s_store_sk#72], Inner, BuildRight - : : : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38] - : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_sold_date_sk#73], [d_date_sk#74], Inner, BuildRight - : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_item_sk#44], [cs_item_sk#75], Inner, BuildRight - : : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight - : : : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : : : +- *(40) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) - : : : : : : : : : : : : : : : : : +- *(40) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct - : : : : : : : : : : : : : +- ReusedExchange [s_store_sk#72, s_store_name#28, s_zip#29], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : : : +- ReusedExchange [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : : +- ReusedExchange [d_date_sk#68, d_year#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : +- ReusedExchange [d_date_sk#66, d_year#40], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : +- ReusedExchange [cd_demo_sk#64, cd_marital_status#61], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : +- ReusedExchange [cd_demo_sk#60, cd_marital_status#62], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : +- ReusedExchange [p_promo_sk#58], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- ReusedExchange [hd_demo_sk#56, hd_income_band_sk#47], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- ReusedExchange [hd_demo_sk#54, hd_income_band_sk#45], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- ReusedExchange [ca_address_sk#52, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- ReusedExchange [ca_address_sk#50, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [ib_income_band_sk#48], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [ib_income_band_sk#46], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [i_item_sk#27, i_product_name#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt deleted file mode 100644 index 2955e0f78..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt +++ /dev/null @@ -1,229 +0,0 @@ -WholeStageCodegen - Sort [cnt,product_name,store_name] - InputAdapter - Exchange [cnt,product_name,store_name] #1 - WholeStageCodegen - Project [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,cnt,product_name,s1,s1,s2,s2,s3,s3,store_name,store_zip,syear,syear] - BroadcastHashJoin [cnt,cnt,item_sk,item_sk,store_name,store_name,store_zip,store_zip] - HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count(1),d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost))] [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,count,count(1),item_sk,product_name,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] - InputAdapter - Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #2 - WholeStageCodegen - HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [count,count,sum,sum,sum,sum,sum,sum] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [p_promo_sk,ss_promo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [cs_item_sk,ss_item_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] - Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [cs_item_sk] - Filter [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(cs_ext_list_price)] - HashAggregate [cs_item_sk,sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(UnscaledValue(cs_ext_list_price))] [sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(UnscaledValue(cs_ext_list_price)),sum(cs_ext_list_price)] - InputAdapter - Exchange [cs_item_sk] #5 - WholeStageCodegen - HashAggregate [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_ext_list_price,cs_item_sk,cs_order_number] - Filter [cs_item_sk,cs_order_number] - Scan parquet default.catalog_sales [cs_ext_list_price,cs_item_sk,cs_order_number] [cs_ext_list_price,cs_item_sk,cs_order_number] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] - Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [s_store_name,s_store_sk,s_zip] - Filter [s_store_name,s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] - Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] - Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - Project [cd_demo_sk,cd_marital_status] - Filter [cd_demo_sk,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] - InputAdapter - ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 - InputAdapter - BroadcastExchange #12 - WholeStageCodegen - Project [p_promo_sk] - Filter [p_promo_sk] - Scan parquet default.promotion [p_promo_sk] [p_promo_sk] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen - Project [hd_demo_sk,hd_income_band_sk] - Filter [hd_demo_sk,hd_income_band_sk] - Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] - InputAdapter - ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 - InputAdapter - BroadcastExchange #14 - WholeStageCodegen - Project [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] - Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] - InputAdapter - ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 - InputAdapter - BroadcastExchange #15 - WholeStageCodegen - Project [ib_income_band_sk] - Filter [ib_income_band_sk] - Scan parquet default.income_band [ib_income_band_sk] [ib_income_band_sk] - InputAdapter - ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 - InputAdapter - BroadcastExchange #16 - WholeStageCodegen - Project [i_item_sk,i_product_name] - Filter [i_color,i_current_price,i_item_sk] - Scan parquet default.item [i_color,i_current_price,i_item_sk,i_product_name] [i_color,i_current_price,i_item_sk,i_product_name] - InputAdapter - BroadcastExchange #17 - WholeStageCodegen - HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count(1),d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost))] [cnt,count,count(1),item_sk,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] - InputAdapter - Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #18 - WholeStageCodegen - HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [count,count,sum,sum,sum,sum,sum,sum] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [p_promo_sk,ss_promo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [cs_item_sk,ss_item_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] - Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] - InputAdapter - ReusedExchange [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] #3 - InputAdapter - ReusedExchange [cs_item_sk] [cs_item_sk] #4 - InputAdapter - BroadcastExchange #19 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - ReusedExchange [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] #8 - InputAdapter - ReusedExchange [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] #9 - InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 - InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 - InputAdapter - ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 - InputAdapter - ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 - InputAdapter - ReusedExchange [p_promo_sk] [p_promo_sk] #12 - InputAdapter - ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 - InputAdapter - ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 - InputAdapter - ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 - InputAdapter - ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 - InputAdapter - ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 - InputAdapter - ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 - InputAdapter - ReusedExchange [i_item_sk,i_product_name] [i_item_sk,i_product_name] #16 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt deleted file mode 100644 index 90e6d0221..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt +++ /dev/null @@ -1,42 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST], output=[s_store_name#1,i_item_desc#2,revenue#3,i_current_price#4,i_wholesale_cost#5,i_brand#6]) -+- *(9) Project [s_store_name#1, i_item_desc#2, revenue#3, i_current_price#4, i_wholesale_cost#5, i_brand#6] - +- *(9) BroadcastHashJoin [ss_store_sk#7], [ss_store_sk#8], Inner, BuildRight, (cast(revenue#3 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#9)), DecimalType(23,7))) - :- *(9) Project [s_store_name#1, ss_store_sk#7, revenue#3, i_item_desc#2, i_current_price#4, i_wholesale_cost#5, i_brand#6] - : +- *(9) BroadcastHashJoin [ss_item_sk#10], [i_item_sk#11], Inner, BuildRight - : :- *(9) Project [s_store_name#1, ss_store_sk#7, ss_item_sk#10, revenue#3] - : : +- *(9) BroadcastHashJoin [s_store_sk#12], [ss_store_sk#7], Inner, BuildRight - : : :- *(9) Project [s_store_sk#12, s_store_name#1] - : : : +- *(9) Filter isnotnull(s_store_sk#12) - : : : +- *(9) FileScan parquet default.store[s_store_sk#12,s_store_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Filter isnotnull(revenue#3) - : : +- *(3) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[sum(UnscaledValue(ss_sales_price#13))]) - : : +- Exchange hashpartitioning(ss_store_sk#7, ss_item_sk#10, 5) - : : +- *(2) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[partial_sum(UnscaledValue(ss_sales_price#13))]) - : : +- *(2) Project [ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] - : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight - : : :- *(2) Project [ss_sold_date_sk#14, ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] - : : : +- *(2) Filter ((isnotnull(ss_sold_date_sk#14) && isnotnull(ss_store_sk#7)) && isnotnull(ss_item_sk#10)) - : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#14,ss_item_sk#10,ss_store_sk#7,ss_sales_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [d_date_sk#15] - : : +- *(1) Filter (((isnotnull(d_month_seq#16) && (d_month_seq#16 >= 1176)) && (d_month_seq#16 <= 1187)) && isnotnull(d_date_sk#15)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#15,d_month_seq#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187),..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [i_item_sk#11, i_item_desc#2, i_current_price#4, i_wholesale_cost#5, i_brand#6] - : +- *(4) Filter isnotnull(i_item_sk#11) - : +- *(4) FileScan parquet default.item[i_item_sk#11,i_item_desc#2,i_current_price#4,i_wholesale_cost#5,i_brand#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt deleted file mode 100644 index 2250a433c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt +++ /dev/null @@ -1,57 +0,0 @@ -TakeOrderedAndProject [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] - WholeStageCodegen - Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] - BroadcastHashJoin [ave,revenue,ss_store_sk,ss_store_sk] - Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [revenue,s_store_name,ss_item_sk,ss_store_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [s_store_name,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] - InputAdapter - BroadcastExchange #1 - WholeStageCodegen - Filter [revenue] - HashAggregate [ss_item_sk,ss_store_sk,sum,sum(UnscaledValue(ss_sales_price))] [revenue,sum,sum(UnscaledValue(ss_sales_price))] - InputAdapter - Exchange [ss_item_sk,ss_store_sk] #2 - WholeStageCodegen - HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk,sum,sum] [sum,sum] - Project [ss_item_sk,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] - Filter [i_item_sk] - Scan parquet default.item [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - HashAggregate [avg(revenue),count,ss_store_sk,sum] [ave,avg(revenue),count,sum] - InputAdapter - Exchange [ss_store_sk] #6 - WholeStageCodegen - HashAggregate [count,count,revenue,ss_store_sk,sum,sum] [count,count,sum,sum] - HashAggregate [ss_item_sk,ss_store_sk,sum,sum(UnscaledValue(ss_sales_price))] [revenue,sum,sum(UnscaledValue(ss_sales_price))] - InputAdapter - Exchange [ss_item_sk,ss_store_sk] #7 - WholeStageCodegen - HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk,sum,sum] [sum,sum] - Project [ss_item_sk,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt deleted file mode 100644 index fb220ece8..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt +++ /dev/null @@ -1,54 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST], output=[w_warehouse_name#1,w_warehouse_sq_ft#2,w_city#3,w_county#4,w_state#5,w_country#6,ship_carriers#7,year#8,jan_sales#9,feb_sales#10,mar_sales#11,apr_sales#12,may_sales#13,jun_sales#14,jul_sales#15,aug_sales#16,sep_sales#17,oct_sales#18,nov_sales#19,dec_sales#20,jan_sales_per_sq_foot#21,feb_sales_per_sq_foot#22,mar_sales_per_sq_foot#23,apr_sales_per_sq_foot#24,... 20 more fields]) -+- *(14) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8], functions=[sum(jan_sales#25), sum(feb_sales#26), sum(mar_sales#27), sum(apr_sales#28), sum(may_sales#29), sum(jun_sales#30), sum(jul_sales#31), sum(aug_sales#32), sum(sep_sales#33), sum(oct_sales#34), sum(nov_sales#35), sum(dec_sales#36), sum(CheckOverflow((promote_precision(jan_sales#25) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#26) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#27) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#28) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#29) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#30) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#31) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#32) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#33) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#34) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#35) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#36) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), ... 12 more fields]) - +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8, 5) - +- *(13) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8], functions=[partial_sum(jan_sales#25), partial_sum(feb_sales#26), partial_sum(mar_sales#27), partial_sum(apr_sales#28), partial_sum(may_sales#29), partial_sum(jun_sales#30), partial_sum(jul_sales#31), partial_sum(aug_sales#32), partial_sum(sep_sales#33), partial_sum(oct_sales#34), partial_sum(nov_sales#35), partial_sum(dec_sales#36), partial_sum(CheckOverflow((promote_precision(jan_sales#25) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#26) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#27) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#28) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#29) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#30) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#31) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#32) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#33) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#34) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#35) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#36) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), ... 12 more fields]) - +- Union - :- *(6) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) - : +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 5) - : +- *(5) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) - : +- *(5) Project [ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - : +- *(5) BroadcastHashJoin [ws_ship_mode_sk#42], [sm_ship_mode_sk#43], Inner, BuildRight - : :- *(5) Project [ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - : : +- *(5) BroadcastHashJoin [ws_sold_time_sk#44], [t_time_sk#45], Inner, BuildRight - : : :- *(5) Project [ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - : : : +- *(5) BroadcastHashJoin [ws_sold_date_sk#46], [d_date_sk#47], Inner, BuildRight - : : : :- *(5) Project [ws_sold_date_sk#46, ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6] - : : : : +- *(5) BroadcastHashJoin [ws_warehouse_sk#48], [w_warehouse_sk#49], Inner, BuildRight - : : : : :- *(5) Project [ws_sold_date_sk#46, ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_warehouse_sk#48, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41] - : : : : : +- *(5) Filter (((isnotnull(ws_warehouse_sk#48) && isnotnull(ws_sold_date_sk#46)) && isnotnull(ws_sold_time_sk#44)) && isnotnull(ws_ship_mode_sk#42)) - : : : : : +- *(5) FileScan parquet default.web_sales[ws_sold_date_sk#46,ws_sold_time_sk#44,ws_ship_mode_sk#42,ws_warehouse_sk#48,ws_quantity#40,ws_ext_sales_price#39,ws_net_paid#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [t_time_sk#45] - : : +- *(3) Filter (((isnotnull(t_time#50) && (t_time#50 >= 30838)) && (t_time#50 <= 59638)) && isnotnull(t_time_sk#45)) - : : +- *(3) FileScan parquet default.time_dim[t_time_sk#45,t_time#50] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [sm_ship_mode_sk#43] - : +- *(4) Filter (sm_carrier#51 IN (DHL,BARIAN) && isnotnull(sm_ship_mode_sk#43)) - : +- *(4) FileScan parquet default.ship_mode[sm_ship_mode_sk#43,sm_carrier#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)], ReadSchema: struct - +- *(12) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) - +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 5) - +- *(11) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) - +- *(11) Project [cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - +- *(11) BroadcastHashJoin [cs_ship_mode_sk#55], [sm_ship_mode_sk#43], Inner, BuildRight - :- *(11) Project [cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - : +- *(11) BroadcastHashJoin [cs_sold_time_sk#56], [t_time_sk#45], Inner, BuildRight - : :- *(11) Project [cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#57], [d_date_sk#47], Inner, BuildRight - : : :- *(11) Project [cs_sold_date_sk#57, cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6] - : : : +- *(11) BroadcastHashJoin [cs_warehouse_sk#58], [w_warehouse_sk#49], Inner, BuildRight - : : : :- *(11) Project [cs_sold_date_sk#57, cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_warehouse_sk#58, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54] - : : : : +- *(11) Filter (((isnotnull(cs_warehouse_sk#58) && isnotnull(cs_sold_date_sk#57)) && isnotnull(cs_sold_time_sk#56)) && isnotnull(cs_ship_mode_sk#55)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#57,cs_sold_time_sk#56,cs_ship_mode_sk#55,cs_warehouse_sk#58,cs_quantity#53,cs_sales_price#52,cs_net_paid_inc_tax#54] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs..., ReadSchema: struct= 1200)) && (d_month_seq#33 <= 1211)) && isnotnull(d_date_sk#32)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#32,d_month_seq#33,d_year#18,d_moy#20,d_qoy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [s_store_sk#30, s_store_id#26] - : +- *(2) Filter isnotnull(s_store_sk#30) - : +- *(2) FileScan parquet default.store[s_store_sk#30,s_store_id#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [i_item_sk#28, i_brand#24, i_class#23, i_category#22, i_product_name#25] - +- *(3) Filter isnotnull(i_item_sk#28) - +- *(3) FileScan parquet default.item[i_item_sk#28,i_brand#24,i_class#23,i_category#22,i_product_name#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt deleted file mode 100644 index 49dac167f..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt +++ /dev/null @@ -1,43 +0,0 @@ -TakeOrderedAndProject [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,rk,s_store_id,sumsales] - WholeStageCodegen - Filter [rk] - InputAdapter - Window [i_category,sumsales] - WholeStageCodegen - Sort [i_category,sumsales] - InputAdapter - Exchange [i_category] #1 - WholeStageCodegen - HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00))] [sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales] - InputAdapter - Exchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id] #2 - WholeStageCodegen - HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id,ss_quantity,ss_sales_price,sum,sum] [sum,sum] - Expand [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] - Project [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [d_moy,d_qoy,d_year,s_store_id,ss_item_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_moy,d_qoy,d_year,ss_item_sk,ss_quantity,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_moy,d_qoy,d_year] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_store_id,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [i_brand,i_category,i_class,i_item_sk,i_product_name] - Filter [i_item_sk] - Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] [i_brand,i_category,i_class,i_item_sk,i_product_name] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt deleted file mode 100644 index 3d2fcd498..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt +++ /dev/null @@ -1,41 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,ss_ticket_number#2 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#3,ca_city#4,bought_city#5,ss_ticket_number#2,extended_price#6,extended_tax#7,list_price#8]) -+- *(8) Project [c_last_name#1, c_first_name#3, ca_city#4, bought_city#5, ss_ticket_number#2, extended_price#6, extended_tax#7, list_price#8] - +- *(8) BroadcastHashJoin [c_current_addr_sk#9], [ca_address_sk#10], Inner, BuildRight, NOT (ca_city#4 = bought_city#5) - :- *(8) Project [ss_ticket_number#2, bought_city#5, extended_price#6, list_price#8, extended_tax#7, c_current_addr_sk#9, c_first_name#3, c_last_name#1] - : +- *(8) BroadcastHashJoin [ss_customer_sk#11], [c_customer_sk#12], Inner, BuildRight - : :- *(8) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[sum(UnscaledValue(ss_ext_sales_price#14)), sum(UnscaledValue(ss_ext_list_price#15)), sum(UnscaledValue(ss_ext_tax#16))]) - : : +- Exchange hashpartitioning(ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4, 5) - : : +- *(5) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#14)), partial_sum(UnscaledValue(ss_ext_list_price#15)), partial_sum(UnscaledValue(ss_ext_tax#16))]) - : : +- *(5) Project [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16, ca_city#4] - : : +- *(5) BroadcastHashJoin [ss_addr_sk#13], [ca_address_sk#10], Inner, BuildRight - : : :- *(5) Project [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] - : : : +- *(5) BroadcastHashJoin [ss_hdemo_sk#17], [hd_demo_sk#18], Inner, BuildRight - : : : :- *(5) Project [ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] - : : : : +- *(5) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#20], Inner, BuildRight - : : : : :- *(5) Project [ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_store_sk#19, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] - : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight - : : : : : :- *(5) Project [ss_sold_date_sk#21, ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_store_sk#19, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] - : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#21) && isnotnull(ss_store_sk#19)) && isnotnull(ss_hdemo_sk#17)) && isnotnull(ss_addr_sk#13)) && isnotnull(ss_customer_sk#11)) - : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_customer_sk#11,ss_hdemo_sk#17,ss_addr_sk#13,ss_store_sk#19,ss_ticket_number#2,ss_ext_sales_price#14,ss_ext_list_price#15,ss_ext_tax#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct= 1)) && (d_dom#23 <= 2)) && d_year#24 IN (1999,2000,2001)) && isnotnull(d_date_sk#22)) - : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#22,d_year#24,d_dom#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [s_store_sk#20] - : : : : +- *(2) Filter (s_city#25 IN (Midway,Fairview) && isnotnull(s_store_sk#20)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#20,s_city#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [hd_demo_sk#18] - : : : +- *(3) Filter (((hd_dep_count#26 = 4) || (hd_vehicle_count#27 = 3)) && isnotnull(hd_demo_sk#18)) - : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#18,hd_dep_count#26,hd_vehicle_count#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [ca_address_sk#10, ca_city#4] - : : +- *(4) Filter (isnotnull(ca_address_sk#10) && isnotnull(ca_city#4)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_city#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [c_customer_sk#12, c_current_addr_sk#9, c_first_name#3, c_last_name#1] - : +- *(6) Filter (isnotnull(c_customer_sk#12) && isnotnull(c_current_addr_sk#9)) - : +- *(6) FileScan parquet default.customer[c_customer_sk#12,c_current_addr_sk#9,c_first_name#3,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - +- ReusedExchange [ca_address_sk#10, ca_city#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt deleted file mode 100644 index 579970f83..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt +++ /dev/null @@ -1,54 +0,0 @@ -TakeOrderedAndProject [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] - WholeStageCodegen - Project [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] - BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] - Project [bought_city,c_current_addr_sk,c_first_name,c_last_name,extended_price,extended_tax,list_price,ss_ticket_number] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] [bought_city,extended_price,extended_tax,list_price,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] - InputAdapter - Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 - WholeStageCodegen - HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_ticket_number] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_store_sk,ss_ticket_number] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_dom,d_year] - Scan parquet default.date_dim [d_date_sk,d_dom,d_year] [d_date_sk,d_dom,d_year] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [s_store_sk] - Filter [s_city,s_store_sk] - Scan parquet default.store [s_city,s_store_sk] [s_city,s_store_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [ca_address_sk,ca_city] - Filter [ca_address_sk,ca_city] - Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] - InputAdapter - ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt deleted file mode 100644 index ad4a155f0..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt +++ /dev/null @@ -1,48 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#6,cd_purchase_estimate#4,cnt2#7,cd_credit_rating#5,cnt3#8]) -+- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[count(1)]) - +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, 5) - +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[partial_count(1)]) - +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] - +- *(9) BroadcastHashJoin [c_current_cdemo_sk#9], [cd_demo_sk#10], Inner, BuildRight - :- *(9) Project [c_current_cdemo_sk#9] - : +- *(9) BroadcastHashJoin [c_current_addr_sk#11], [ca_address_sk#12], Inner, BuildRight - : :- *(9) Project [c_current_cdemo_sk#9, c_current_addr_sk#11] - : : +- *(9) BroadcastHashJoin [c_customer_sk#13], [cs_ship_customer_sk#14], LeftAnti, BuildRight - : : :- *(9) BroadcastHashJoin [c_customer_sk#13], [ws_bill_customer_sk#15], LeftAnti, BuildRight - : : : :- *(9) BroadcastHashJoin [c_customer_sk#13], [ss_customer_sk#16], LeftSemi, BuildRight - : : : : :- *(9) Project [c_customer_sk#13, c_current_cdemo_sk#9, c_current_addr_sk#11] - : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#11) && isnotnull(c_current_cdemo_sk#9)) - : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#13,c_current_cdemo_sk#9,c_current_addr_sk#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [ss_customer_sk#16] - : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : : : : :- *(2) Project [ss_sold_date_sk#17, ss_customer_sk#16] - : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#17) - : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [d_date_sk#18] - : : : : +- *(1) Filter (((((isnotnull(d_year#19) && isnotnull(d_moy#20)) && (d_year#19 = 2001)) && (d_moy#20 >= 4)) && (d_moy#20 <= 6)) && isnotnull(d_date_sk#18)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_year#19,d_moy#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThan..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [ws_bill_customer_sk#15] - : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#18], Inner, BuildRight - : : : :- *(4) Project [ws_sold_date_sk#21, ws_bill_customer_sk#15] - : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#21) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [cs_ship_customer_sk#14] - : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#22], [d_date_sk#18], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#22, cs_ship_customer_sk#14] - : : : +- *(6) Filter isnotnull(cs_sold_date_sk#22) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#22,cs_ship_customer_sk#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [ca_address_sk#12] - : +- *(7) Filter (ca_state#23 IN (KY,GA,NM) && isnotnull(ca_address_sk#12)) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#12,ca_state#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [KY,GA,NM]), IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [cd_demo_sk#10, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] - +- *(8) Filter isnotnull(cd_demo_sk#10) - +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#10,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_date_sk#15] - : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [i_item_sk#13, i_item_id#1] - : +- *(3) Filter isnotnull(i_item_sk#13) - : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [p_promo_sk#11] - +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) - +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt deleted file mode 100644 index 2cf9df4fd..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt +++ /dev/null @@ -1,42 +0,0 @@ -TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] - InputAdapter - Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [count,count,count,count,count,count,count,count,i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] - Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] - BroadcastHashJoin [p_promo_sk,ss_promo_sk] - Project [i_item_id,ss_coupon_amt,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] - Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] - Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [cd_demo_sk] - Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [p_promo_sk] - Filter [p_channel_email,p_channel_event,p_promo_sk] - Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] [p_channel_email,p_channel_event,p_promo_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt deleted file mode 100644 index e0113e54d..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt +++ /dev/null @@ -1,46 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN s_state#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[total_sum#4,s_state#2,s_county#5,lochierarchy#1,rank_within_parent#3]) -+- *(11) Project [total_sum#4, s_state#2, s_county#5, lochierarchy#1, rank_within_parent#3] - +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] - +- *(10) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 - +- Exchange hashpartitioning(_w1#7, _w2#8, 5) - +- *(9) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ss_net_profit#10))]) - +- Exchange hashpartitioning(s_state#2, s_county#5, spark_grouping_id#9, 5) - +- *(8) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) - +- *(8) Expand [List(ss_net_profit#10, s_state#11, s_county#12, 0), List(ss_net_profit#10, s_state#11, null, 1), List(ss_net_profit#10, null, null, 3)], [ss_net_profit#10, s_state#2, s_county#5, spark_grouping_id#9] - +- *(8) Project [ss_net_profit#10, s_state#13 AS s_state#11, s_county#14 AS s_county#12] - +- *(8) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight - :- *(8) Project [ss_store_sk#15, ss_net_profit#10] - : +- *(8) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : :- *(8) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] - : : +- *(8) Filter (isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) - : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [d_date_sk#18] - : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) BroadcastHashJoin [s_state#13], [s_state#20], LeftSemi, BuildRight - :- *(7) Project [s_store_sk#16, s_county#14, s_state#13] - : +- *(7) Filter isnotnull(s_store_sk#16) - : +- *(7) FileScan parquet default.store[s_store_sk#16,s_county#14,s_state#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(6) Project [s_state#20] - +- *(6) Filter (isnotnull(ranking#21) && (ranking#21 <= 5)) - +- Window [rank(_w2#22) windowspecdefinition(s_state#13, _w2#22 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#21], [s_state#13], [_w2#22 DESC NULLS LAST] - +- *(5) Sort [s_state#13 ASC NULLS FIRST, _w2#22 DESC NULLS LAST], false, 0 - +- *(5) HashAggregate(keys=[s_state#13], functions=[sum(UnscaledValue(ss_net_profit#10))]) - +- Exchange hashpartitioning(s_state#13, 5) - +- *(4) HashAggregate(keys=[s_state#13], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) - +- *(4) Project [ss_net_profit#10, s_state#13] - +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - :- *(4) Project [ss_sold_date_sk#17, ss_net_profit#10, s_state#13] - : +- *(4) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight - : :- *(4) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] - : : +- *(4) Filter (isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#17)) - : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [s_store_sk#16, s_state#13] - : +- *(2) Filter isnotnull(s_store_sk#16) - : +- *(2) FileScan parquet default.store[s_store_sk#16,s_state#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt deleted file mode 100644 index d2c001785..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt +++ /dev/null @@ -1,65 +0,0 @@ -TakeOrderedAndProject [lochierarchy,rank_within_parent,s_county,s_state,total_sum] - WholeStageCodegen - Project [lochierarchy,rank_within_parent,s_county,s_state,total_sum] - InputAdapter - Window [_w1,_w2,_w3] - WholeStageCodegen - Sort [_w1,_w2,_w3] - InputAdapter - Exchange [_w1,_w2] #1 - WholeStageCodegen - HashAggregate [s_county,s_state,spark_grouping_id,sum,sum(UnscaledValue(ss_net_profit))] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ss_net_profit)),total_sum] - InputAdapter - Exchange [s_county,s_state,spark_grouping_id] #2 - WholeStageCodegen - HashAggregate [s_county,s_state,spark_grouping_id,ss_net_profit,sum,sum] [sum,sum] - Expand [s_county,s_state,ss_net_profit] - Project [s_county,s_state,ss_net_profit] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - BroadcastHashJoin [s_state,s_state] - Project [s_county,s_state,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_county,s_state,s_store_sk] [s_county,s_state,s_store_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [s_state] - Filter [ranking] - InputAdapter - Window [_w2,s_state] - WholeStageCodegen - Sort [_w2,s_state] - HashAggregate [s_state,sum,sum(UnscaledValue(ss_net_profit))] [_w2,s_state,sum,sum(UnscaledValue(ss_net_profit))] - InputAdapter - Exchange [s_state] #6 - WholeStageCodegen - HashAggregate [s_state,ss_net_profit,sum,sum] [sum,sum] - Project [s_state,ss_net_profit] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [s_state,ss_net_profit,ss_sold_date_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [s_state,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt deleted file mode 100644 index f11328994..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt +++ /dev/null @@ -1,40 +0,0 @@ -== Physical Plan == -*(11) Sort [ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST, 5) - +- *(10) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[sum(UnscaledValue(ext_price#7))]) - +- Exchange hashpartitioning(i_brand#3, i_brand_id#4, t_hour#5, t_minute#6, 5) - +- *(9) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[partial_sum(UnscaledValue(ext_price#7))]) - +- *(9) Project [i_brand_id#4, i_brand#3, ext_price#7, t_hour#5, t_minute#6] - +- *(9) BroadcastHashJoin [time_sk#8], [t_time_sk#9], Inner, BuildRight - :- *(9) Project [i_brand_id#4, i_brand#3, ext_price#7, time_sk#8] - : +- *(9) BroadcastHashJoin [i_item_sk#10], [sold_item_sk#11], Inner, BuildLeft - : :- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [i_item_sk#10, i_brand_id#4, i_brand#3] - : : +- *(1) Filter ((isnotnull(i_manager_id#12) && (i_manager_id#12 = 1)) && isnotnull(i_item_sk#10)) - : : +- *(1) FileScan parquet default.item[i_item_sk#10,i_brand_id#4,i_brand#3,i_manager_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct - : +- Union - : :- *(3) Project [ws_ext_sales_price#13 AS ext_price#7, ws_item_sk#14 AS sold_item_sk#11, ws_sold_time_sk#15 AS time_sk#8] - : : +- *(3) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight - : : :- *(3) Project [ws_sold_date_sk#16, ws_sold_time_sk#15, ws_item_sk#14, ws_ext_sales_price#13] - : : : +- *(3) Filter ((isnotnull(ws_sold_date_sk#16) && isnotnull(ws_item_sk#14)) && isnotnull(ws_sold_time_sk#15)) - : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_sold_time_sk#15,ws_item_sk#14,ws_ext_sales_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_date_sk#17] - : : +- *(2) Filter ((((isnotnull(d_moy#18) && isnotnull(d_year#19)) && (d_moy#18 = 11)) && (d_year#19 = 1999)) && isnotnull(d_date_sk#17)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#19,d_moy#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct - : :- *(5) Project [cs_ext_sales_price#20 AS ext_price#21, cs_item_sk#22 AS sold_item_sk#23, cs_sold_time_sk#24 AS time_sk#25] - : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#17], Inner, BuildRight - : : :- *(5) Project [cs_sold_date_sk#26, cs_sold_time_sk#24, cs_item_sk#22, cs_ext_sales_price#20] - : : : +- *(5) Filter ((isnotnull(cs_sold_date_sk#26) && isnotnull(cs_item_sk#22)) && isnotnull(cs_sold_time_sk#24)) - : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_sold_time_sk#24,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [ss_ext_sales_price#27 AS ext_price#28, ss_item_sk#29 AS sold_item_sk#30, ss_sold_time_sk#31 AS time_sk#32] - : +- *(7) BroadcastHashJoin [ss_sold_date_sk#33], [d_date_sk#17], Inner, BuildRight - : :- *(7) Project [ss_sold_date_sk#33, ss_sold_time_sk#31, ss_item_sk#29, ss_ext_sales_price#27] - : : +- *(7) Filter ((isnotnull(ss_sold_date_sk#33) && isnotnull(ss_item_sk#29)) && isnotnull(ss_sold_time_sk#31)) - : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#33,ss_sold_time_sk#31,ss_item_sk#29,ss_ext_sales_price#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [t_time_sk#9, t_hour#5, t_minute#6] - +- *(8) Filter (((t_meal_time#34 = breakfast) || (t_meal_time#34 = dinner)) && isnotnull(t_time_sk#9)) - +- *(8) FileScan parquet default.time_dim[t_time_sk#9,t_hour#5,t_minute#6,t_meal_time#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [Or(EqualTo(t_meal_time,breakfast),EqualTo(t_meal_time,dinner)), IsNotNull(t_time_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt deleted file mode 100644 index c905e2e5d..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt +++ /dev/null @@ -1,56 +0,0 @@ -WholeStageCodegen - Sort [brand_id,ext_price] - InputAdapter - Exchange [brand_id,ext_price] #1 - WholeStageCodegen - HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ext_price)),t_hour,t_minute] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ext_price))] - InputAdapter - Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 - WholeStageCodegen - HashAggregate [ext_price,i_brand,i_brand_id,sum,sum,t_hour,t_minute] [sum,sum] - Project [ext_price,i_brand,i_brand_id,t_hour,t_minute] - BroadcastHashJoin [t_time_sk,time_sk] - Project [ext_price,i_brand,i_brand_id,time_sk] - BroadcastHashJoin [i_item_sk,sold_item_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_brand_id,i_item_sk] - Filter [i_item_sk,i_manager_id] - Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] - InputAdapter - Union - WholeStageCodegen - Project [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] - Filter [ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - WholeStageCodegen - Project [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] - Filter [cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 - WholeStageCodegen - Project [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [t_hour,t_minute,t_time_sk] - Filter [t_meal_time,t_time_sk] - Scan parquet default.time_dim [t_hour,t_meal_time,t_minute,t_time_sk] [t_hour,t_meal_time,t_minute,t_time_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt deleted file mode 100644 index 0ef6ff570..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt +++ /dev/null @@ -1,68 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[total_cnt#1 DESC NULLS LAST,i_item_desc#2 ASC NULLS FIRST,w_warehouse_name#3 ASC NULLS FIRST,d_week_seq#4 ASC NULLS FIRST], output=[i_item_desc#2,w_warehouse_name#3,d_week_seq#4,no_promo#5,promo#6,total_cnt#1]) -+- *(12) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[count(1)]) - +- Exchange hashpartitioning(i_item_desc#2, w_warehouse_name#3, d_week_seq#4, 5) - +- *(11) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[partial_count(1)]) - +- *(11) Project [w_warehouse_name#3, i_item_desc#2, d_week_seq#4] - +- *(11) BroadcastHashJoin [cs_item_sk#7, cs_order_number#8], [cr_item_sk#9, cr_order_number#10], LeftOuter, BuildRight - :- *(11) Project [cs_item_sk#7, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_week_seq#4] - : +- *(11) BroadcastHashJoin [cs_promo_sk#11], [p_promo_sk#12], LeftOuter, BuildRight - : :- *(11) Project [cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_week_seq#4] - : : +- *(11) BroadcastHashJoin [cs_ship_date_sk#13], [d_date_sk#14], Inner, BuildRight, (d_date#15 > cast(cast(d_date#16 as timestamp) + interval 5 days as date)) - : : :- *(11) Project [cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_date#16, d_week_seq#4] - : : : +- *(11) BroadcastHashJoin [d_week_seq#4, inv_date_sk#17], [d_week_seq#18, d_date_sk#19], Inner, BuildRight - : : : :- *(11) Project [cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2, d_date#16, d_week_seq#4] - : : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight - : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] - : : : : : +- *(11) BroadcastHashJoin [cs_bill_hdemo_sk#22], [hd_demo_sk#23], Inner, BuildRight - : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] - : : : : : : +- *(11) BroadcastHashJoin [cs_bill_cdemo_sk#24], [cd_demo_sk#25], Inner, BuildRight - : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] - : : : : : : : +- *(11) BroadcastHashJoin [cs_item_sk#7], [i_item_sk#26], Inner, BuildRight - : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3] - : : : : : : : : +- *(11) BroadcastHashJoin [inv_warehouse_sk#27], [w_warehouse_sk#28], Inner, BuildRight - : : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, inv_warehouse_sk#27] - : : : : : : : : : +- *(11) BroadcastHashJoin [cs_item_sk#7], [inv_item_sk#29], Inner, BuildRight, (inv_quantity_on_hand#30 < cs_quantity#31) - : : : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, cs_quantity#31] - : : : : : : : : : : +- *(11) Filter (((((isnotnull(cs_quantity#31) && isnotnull(cs_item_sk#7)) && isnotnull(cs_bill_cdemo_sk#24)) && isnotnull(cs_bill_hdemo_sk#22)) && isnotnull(cs_sold_date_sk#20)) && isnotnull(cs_ship_date_sk#13)) - : : : : : : : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#20,cs_ship_date_sk#13,cs_bill_cdemo_sk#24,cs_bill_hdemo_sk#22,cs_item_sk#7,cs_promo_sk#11,cs_order_number#8,cs_quantity#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hd..., ReadSchema: struct - : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : +- *(2) Project [w_warehouse_sk#28, w_warehouse_name#3] - : : : : : : : : +- *(2) Filter isnotnull(w_warehouse_sk#28) - : : : : : : : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#28,w_warehouse_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct - : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : +- *(3) Project [i_item_sk#26, i_item_desc#2] - : : : : : : : +- *(3) Filter isnotnull(i_item_sk#26) - : : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#26,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- *(4) Project [cd_demo_sk#25] - : : : : : : +- *(4) Filter ((isnotnull(cd_marital_status#32) && (cd_marital_status#32 = D)) && isnotnull(cd_demo_sk#25)) - : : : : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(5) Project [hd_demo_sk#23] - : : : : : +- *(5) Filter ((isnotnull(hd_buy_potential#33) && (hd_buy_potential#33 = >10000)) && isnotnull(hd_demo_sk#23)) - : : : : : +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#23,hd_buy_potential#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(6) Project [d_date_sk#21, d_date#16, d_week_seq#4] - : : : : +- *(6) Filter (((isnotnull(d_year#34) && (d_year#34 = 1999)) && isnotnull(d_date_sk#21)) && isnotnull(d_week_seq#4)) - : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#21,d_date#16,d_week_seq#4,d_year#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) - : : : +- *(7) Project [d_date_sk#19, d_week_seq#18] - : : : +- *(7) Filter (isnotnull(d_week_seq#18) && isnotnull(d_date_sk#19)) - : : : +- *(7) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(8) Project [d_date_sk#14, d_date#15] - : : +- *(8) Filter (isnotnull(d_date#15) && isnotnull(d_date_sk#14)) - : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(9) Project [p_promo_sk#12] - : +- *(9) Filter isnotnull(p_promo_sk#12) - : +- *(9) FileScan parquet default.promotion[p_promo_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) - +- *(10) Project [cr_item_sk#9, cr_order_number#10] - +- *(10) Filter (isnotnull(cr_item_sk#9) && isnotnull(cr_order_number#10)) - +- *(10) FileScan parquet default.catalog_returns[cr_item_sk#9,cr_order_number#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt deleted file mode 100644 index 5a0b8c54a..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt +++ /dev/null @@ -1,90 +0,0 @@ -TakeOrderedAndProject [d_week_seq,i_item_desc,no_promo,promo,total_cnt,w_warehouse_name] - WholeStageCodegen - HashAggregate [count,count(1),d_week_seq,i_item_desc,w_warehouse_name] [count,count(1),no_promo,promo,total_cnt] - InputAdapter - Exchange [d_week_seq,i_item_desc,w_warehouse_name] #1 - WholeStageCodegen - HashAggregate [count,count,d_week_seq,i_item_desc,w_warehouse_name] [count,count] - Project [d_week_seq,i_item_desc,w_warehouse_name] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_item_sk,cs_order_number,d_week_seq,i_item_desc,w_warehouse_name] - BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_item_sk,cs_order_number,cs_promo_sk,d_week_seq,i_item_desc,w_warehouse_name] - BroadcastHashJoin [cs_ship_date_sk,d_date,d_date,d_date_sk] - Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,w_warehouse_name] - BroadcastHashJoin [d_date_sk,d_week_seq,d_week_seq,inv_date_sk] - Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,inv_date_sk,w_warehouse_name] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] - BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] - Project [cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] - BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] - Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,w_warehouse_name] - BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,inv_warehouse_sk] - BroadcastHashJoin [cs_item_sk,cs_quantity,inv_item_sk,inv_quantity_on_hand] - Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] - Filter [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [i_item_desc,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_desc,i_item_sk] [i_item_desc,i_item_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [cd_demo_sk] - Filter [cd_demo_sk,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [hd_demo_sk] - Filter [hd_buy_potential,hd_demo_sk] - Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] [hd_buy_potential,hd_demo_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [d_date,d_date_sk,d_week_seq] - Filter [d_date_sk,d_week_seq,d_year] - Scan parquet default.date_dim [d_date,d_date_sk,d_week_seq,d_year] [d_date,d_date_sk,d_week_seq,d_year] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [d_date_sk,d_week_seq] - Filter [d_date_sk,d_week_seq] - Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [p_promo_sk] - Filter [p_promo_sk] - Scan parquet default.promotion [p_promo_sk] [p_promo_sk] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - Project [cr_item_sk,cr_order_number] - Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] [cr_item_sk,cr_order_number] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt deleted file mode 100644 index 898ff3075..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt +++ /dev/null @@ -1,34 +0,0 @@ -== Physical Plan == -*(7) Sort [cnt#1 DESC NULLS LAST], true, 0 -+- Exchange rangepartitioning(cnt#1 DESC NULLS LAST, 5) - +- *(6) Project [c_last_name#2, c_first_name#3, c_salutation#4, c_preferred_cust_flag#5, ss_ticket_number#6, cnt#1] - +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight - :- *(6) Filter ((cnt#1 >= 1) && (cnt#1 <= 5)) - : +- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[count(1)]) - : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#7, 5) - : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[partial_count(1)]) - : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#6] - : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight - : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_ticket_number#6] - : : +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight - : : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#6] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight - : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#6] - : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#14] - : : : +- *(1) Filter ((((isnotnull(d_dom#15) && (d_dom#15 >= 1)) && (d_dom#15 <= 2)) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [s_store_sk#12] - : : +- *(2) Filter (s_county#17 IN (Williamson County,Franklin Parish,Bronx County,Orange County) && isnotnull(s_store_sk#12)) - : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_county, [Williamson County,Franklin Parish,Bronx County,Orange County]), IsNotNull(s_store_..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [hd_demo_sk#10] - : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.0)) && isnotnull(hd_demo_sk#10)) - : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [c_customer_sk#8, c_salutation#4, c_first_name#3, c_last_name#2, c_preferred_cust_flag#5] - +- *(5) Filter isnotnull(c_customer_sk#8) - +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#4,c_first_name#3,c_last_name#2,c_preferred_cust_flag#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct 0.00) THEN CheckOverflow((promote_precision(year_total#7) / promote_precision(year_total#6)), DecimalType(37,20)) ELSE null END > CASE WHEN (year_total#8 > 0.00) THEN CheckOverflow((promote_precision(year_total#9) / promote_precision(year_total#8)), DecimalType(37,20)) ELSE null END) - :- *(17) Project [customer_id#4, year_total#8, customer_id#1, customer_first_name#2, customer_last_name#3, year_total#9, year_total#6] - : +- *(17) BroadcastHashJoin [customer_id#4], [customer_id#10], Inner, BuildRight - : :- *(17) BroadcastHashJoin [customer_id#4], [customer_id#1], Inner, BuildRight - : : :- Union - : : : :- *(4) Filter (isnotnull(year_total#8) && (year_total#8 > 0.00)) - : : : : +- *(4) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) - : : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) - : : : : +- *(3) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) - : : : : +- *(3) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] - : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight - : : : : :- *(3) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_sold_date_sk#16, ss_net_paid#15] - : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#18], [ss_customer_sk#19], Inner, BuildRight - : : : : : :- *(3) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] - : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) - : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : : : +- *(1) Project [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15] - : : : : : +- *(1) Filter (isnotnull(ss_customer_sk#19) && isnotnull(ss_sold_date_sk#16)) - : : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#19,ss_net_paid#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [d_date_sk#17, d_year#14] - : : : : +- *(2) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2001)) && isnotnull(d_date_sk#17)) - : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- LocalTableScan , [customer_id#20, year_total#21] - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : +- Union - : : :- *(8) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) - : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) - : : : +- *(7) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) - : : : +- *(7) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] - : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight - : : : :- *(7) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_sold_date_sk#16, ss_net_paid#15] - : : : : +- *(7) BroadcastHashJoin [c_customer_sk#18], [ss_customer_sk#19], Inner, BuildRight - : : : : :- *(7) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] - : : : : : +- *(7) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) - : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : : : +- ReusedExchange [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(6) Project [d_date_sk#17, d_year#14] - : : : +- *(6) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2002)) && isnotnull(d_date_sk#17)) - : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- LocalTableScan , [customer_id#20, customer_first_name#22, customer_last_name#23, year_total#21] - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- Union - : :- LocalTableScan , [customer_id#10, year_total#6] - : +- *(12) Filter (isnotnull(year_total#21) && (year_total#21 > 0.00)) - : +- *(12) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) - : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) - : +- *(11) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) - : +- *(11) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] - : +- *(11) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight - : :- *(11) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_sold_date_sk#25, ws_net_paid#24] - : : +- *(11) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight - : : :- *(11) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] - : : : +- *(11) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) - : : : +- *(11) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- *(9) Project [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24] - : : +- *(9) Filter (isnotnull(ws_bill_customer_sk#26) && isnotnull(ws_sold_date_sk#25)) - : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_bill_customer_sk#26,ws_net_paid#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- Union - :- LocalTableScan , [customer_id#5, year_total#7] - +- *(16) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) - +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) - +- *(15) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) - +- *(15) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] - +- *(15) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight - :- *(15) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_sold_date_sk#25, ws_net_paid#24] - : +- *(15) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight - : :- *(15) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] - : : +- *(15) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) - : : +- *(15) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : +- ReusedExchange [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt deleted file mode 100644 index 5167a92d5..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt +++ /dev/null @@ -1,108 +0,0 @@ -TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name] - WholeStageCodegen - Project [customer_first_name,customer_id,customer_last_name] - BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] - Project [customer_first_name,customer_id,customer_id,customer_last_name,year_total,year_total,year_total] - BroadcastHashJoin [customer_id,customer_id] - BroadcastHashJoin [customer_id,customer_id] - InputAdapter - Union - WholeStageCodegen - Filter [year_total] - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] [customer_id,sum,sum(UnscaledValue(ss_net_paid)),year_total] - InputAdapter - Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid,sum,sum] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [ss_customer_sk,ss_net_paid,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] [ss_customer_sk,ss_net_paid,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [customer_id,year_total] [customer_id,year_total] - InputAdapter - BroadcastExchange #4 - Union - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] [customer_first_name,customer_id,customer_last_name,sum,sum(UnscaledValue(ss_net_paid)),year_total] - InputAdapter - Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid,sum,sum] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] - InputAdapter - ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [customer_first_name,customer_id,customer_last_name,year_total] [customer_first_name,customer_id,customer_last_name,year_total] - InputAdapter - BroadcastExchange #7 - Union - LocalTableScan [customer_id,year_total] [customer_id,year_total] - WholeStageCodegen - Filter [year_total] - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] - InputAdapter - Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum,ws_net_paid] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 - InputAdapter - BroadcastExchange #10 - Union - LocalTableScan [customer_id,year_total] [customer_id,year_total] - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] - InputAdapter - Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum,ws_net_paid] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] - InputAdapter - ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 - InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt deleted file mode 100644 index 5f9929f2d..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt +++ /dev/null @@ -1,117 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], output=[prev_year#2,year#3,i_brand_id#4,i_class_id#5,i_category_id#6,i_manufact_id#7,prev_yr_cnt#8,curr_yr_cnt#9,sales_cnt_diff#1,sales_amt_diff#10]) -+- *(34) Project [d_year#11 AS prev_year#2, d_year#12 AS year#3, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#13 AS prev_yr_cnt#8, sales_cnt#14 AS curr_yr_cnt#9, (sales_cnt#14 - sales_cnt#13) AS sales_cnt_diff#1, CheckOverflow((promote_precision(cast(sales_amt#15 as decimal(19,2))) - promote_precision(cast(sales_amt#16 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#10] - +- *(34) BroadcastHashJoin [i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], [i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], Inner, BuildRight, (CheckOverflow((promote_precision(cast(sales_cnt#14 as decimal(17,2))) / promote_precision(cast(sales_cnt#13 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000) - :- *(34) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) - : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, 5) - : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) - : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) - : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 5) - : +- *(15) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) - : +- Union - : :- *(10) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) - : : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 5) - : : +- *(9) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) - : : +- Union - : : :- *(4) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] - : : : +- *(4) BroadcastHashJoin [cs_order_number#27, cs_item_sk#28], [cr_order_number#29, cr_item_sk#30], LeftOuter, BuildRight - : : : :- *(4) Project [cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] - : : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight - : : : : :- *(4) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] - : : : : : +- *(4) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#33], Inner, BuildRight - : : : : : :- *(4) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] - : : : : : : +- *(4) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) - : : : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) - : : : +- *(3) Project [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26] - : : : +- *(3) Filter (isnotnull(cr_item_sk#30) && isnotnull(cr_order_number#29)) - : : : +- *(3) FileScan parquet default.catalog_returns[cr_item_sk#30,cr_order_number#29,cr_return_quantity#24,cr_return_amount#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct - : : +- *(8) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] - : : +- *(8) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight - : : :- *(8) Project [ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] - : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#45], [d_date_sk#32], Inner, BuildRight - : : : :- *(8) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] - : : : : +- *(8) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#33], Inner, BuildRight - : : : : :- *(8) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] - : : : : : +- *(8) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) - : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : +- *(14) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ws_quantity#46 - coalesce(wr_return_quantity#47, 0)) AS sales_cnt#48, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#50, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#51] - : +- *(14) BroadcastHashJoin [cast(ws_order_number#52 as bigint), cast(ws_item_sk#53 as bigint)], [wr_order_number#54, wr_item_sk#55], LeftOuter, BuildRight - : :- *(14) Project [ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] - : : +- *(14) BroadcastHashJoin [ws_sold_date_sk#56], [d_date_sk#32], Inner, BuildRight - : : :- *(14) Project [ws_sold_date_sk#56, ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] - : : : +- *(14) BroadcastHashJoin [ws_item_sk#53], [i_item_sk#33], Inner, BuildRight - : : : :- *(14) Project [ws_sold_date_sk#56, ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49] - : : : : +- *(14) Filter (isnotnull(ws_item_sk#53) && isnotnull(ws_sold_date_sk#56)) - : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#56,ws_item_sk#53,ws_order_number#52,ws_quantity#46,ws_ext_sales_price#49] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true])) - +- *(33) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) - +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, 5) - +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) - +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) - +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 5) - +- *(31) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) - +- Union - :- *(26) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) - : +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 5) - : +- *(25) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) - : +- Union - : :- *(20) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] - : : +- *(20) BroadcastHashJoin [cs_order_number#27, cs_item_sk#28], [cr_order_number#29, cr_item_sk#30], LeftOuter, BuildRight - : : :- *(20) Project [cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, d_year#11] - : : : +- *(20) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#57], Inner, BuildRight - : : : :- *(20) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20] - : : : : +- *(20) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#58], Inner, BuildRight - : : : : :- *(20) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] - : : : : : +- *(20) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) - : : : : : +- *(20) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26], BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) - : +- *(24) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] - : +- *(24) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight - : :- *(24) Project [ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, d_year#11] - : : +- *(24) BroadcastHashJoin [ss_sold_date_sk#45], [d_date_sk#57], Inner, BuildRight - : : :- *(24) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20] - : : : +- *(24) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#58], Inner, BuildRight - : : : :- *(24) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] - : : : : +- *(24) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) - : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [i_item_sk#14, i_category#5] - : : +- *(1) Filter isnotnull(i_item_sk#14) - : : +- *(1) FileScan parquet default.item[i_item_sk#14,i_category#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#12, d_year#3, d_qoy#4] - : +- *(2) Filter isnotnull(d_date_sk#12) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#12,d_year#3,d_qoy#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - :- *(6) Project [web AS channel#15, ws_ship_customer_sk#16 AS col_name#17, d_year#3, d_qoy#4, i_category#5, ws_ext_sales_price#18 AS ext_sales_price#19] - : +- *(6) BroadcastHashJoin [ws_sold_date_sk#20], [d_date_sk#12], Inner, BuildRight - : :- *(6) Project [ws_sold_date_sk#20, ws_ship_customer_sk#16, ws_ext_sales_price#18, i_category#5] - : : +- *(6) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#14], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#20, ws_item_sk#21, ws_ship_customer_sk#16, ws_ext_sales_price#18] - : : : +- *(6) Filter ((isnull(ws_ship_customer_sk#16) && isnotnull(ws_item_sk#21)) && isnotnull(ws_sold_date_sk#20)) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#20,ws_item_sk#21,ws_ship_customer_sk#16,ws_ext_sales_price#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(9) Project [catalog AS channel#22, cs_ship_addr_sk#23 AS col_name#24, d_year#3, d_qoy#4, i_category#5, cs_ext_sales_price#25 AS ext_sales_price#26] - +- *(9) BroadcastHashJoin [cs_sold_date_sk#27], [d_date_sk#12], Inner, BuildRight - :- *(9) Project [cs_sold_date_sk#27, cs_ship_addr_sk#23, cs_ext_sales_price#25, i_category#5] - : +- *(9) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#14], Inner, BuildRight - : :- *(9) Project [cs_sold_date_sk#27, cs_ship_addr_sk#23, cs_item_sk#28, cs_ext_sales_price#25] - : : +- *(9) Filter ((isnull(cs_ship_addr_sk#23) && isnotnull(cs_item_sk#28)) && isnotnull(cs_sold_date_sk#27)) - : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#27,cs_ship_addr_sk#23,cs_item_sk#28,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt deleted file mode 100644 index 5b7872ad7..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt +++ /dev/null @@ -1,53 +0,0 @@ -TakeOrderedAndProject [channel,col_name,d_qoy,d_year,i_category,sales_amt,sales_cnt] - WholeStageCodegen - HashAggregate [channel,col_name,count,count(1),d_qoy,d_year,i_category,sum,sum(UnscaledValue(ext_sales_price))] [count,count(1),sales_amt,sales_cnt,sum,sum(UnscaledValue(ext_sales_price))] - InputAdapter - Exchange [channel,col_name,d_qoy,d_year,i_category] #1 - WholeStageCodegen - HashAggregate [channel,col_name,count,count,d_qoy,d_year,ext_sales_price,i_category,sum,sum] [count,count,sum,sum] - InputAdapter - Union - WholeStageCodegen - Project [d_qoy,d_year,i_category,ss_ext_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_category,ss_ext_sales_price,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [i_category,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_category,i_item_sk] [i_category,i_item_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_qoy,d_year] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] - WholeStageCodegen - Project [d_qoy,d_year,i_category,ws_ext_sales_price,ws_ship_customer_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_category,ws_ext_sales_price,ws_ship_customer_sk,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [i_category,i_item_sk] [i_category,i_item_sk] #2 - InputAdapter - ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 - WholeStageCodegen - Project [cs_ext_sales_price,cs_ship_addr_sk,d_qoy,d_year,i_category] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_sales_price,cs_ship_addr_sk,cs_sold_date_sk,i_category] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] - InputAdapter - ReusedExchange [i_category,i_item_sk] [i_category,i_item_sk] #2 - InputAdapter - ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt deleted file mode 100644 index 0b9dd6982..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt +++ /dev/null @@ -1,100 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) -+- *(25) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) - +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) - +- *(24) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) - +- *(24) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] - +- Union - :- *(8) Project [sales#7, coalesce(returns#12, 0.00) AS returns#8, CheckOverflow((promote_precision(cast(profit#13 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#9, store channel AS channel#10, s_store_sk#15 AS id#11] - : +- *(8) BroadcastHashJoin [s_store_sk#15], [s_store_sk#16], LeftOuter, BuildRight - : :- *(8) HashAggregate(keys=[s_store_sk#15], functions=[sum(UnscaledValue(ss_ext_sales_price#17)), sum(UnscaledValue(ss_net_profit#18))]) - : : +- Exchange hashpartitioning(s_store_sk#15, 5) - : : +- *(3) HashAggregate(keys=[s_store_sk#15], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#17)), partial_sum(UnscaledValue(ss_net_profit#18))]) - : : +- *(3) Project [ss_ext_sales_price#17, ss_net_profit#18, s_store_sk#15] - : : +- *(3) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#15], Inner, BuildRight - : : :- *(3) Project [ss_store_sk#19, ss_ext_sales_price#17, ss_net_profit#18] - : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight - : : : :- *(3) Project [ss_sold_date_sk#20, ss_store_sk#19, ss_ext_sales_price#17, ss_net_profit#18] - : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#20) && isnotnull(ss_store_sk#19)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_store_sk#19,ss_ext_sales_price#17,ss_net_profit#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [s_store_sk#15] - : : +- *(2) Filter isnotnull(s_store_sk#15) - : : +- *(2) FileScan parquet default.store[s_store_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) HashAggregate(keys=[s_store_sk#16], functions=[sum(UnscaledValue(sr_return_amt#23)), sum(UnscaledValue(sr_net_loss#24))]) - : +- Exchange hashpartitioning(s_store_sk#16, 5) - : +- *(6) HashAggregate(keys=[s_store_sk#16], functions=[partial_sum(UnscaledValue(sr_return_amt#23)), partial_sum(UnscaledValue(sr_net_loss#24))]) - : +- *(6) Project [sr_return_amt#23, sr_net_loss#24, s_store_sk#16] - : +- *(6) BroadcastHashJoin [sr_store_sk#25], [cast(s_store_sk#16 as bigint)], Inner, BuildRight - : :- *(6) Project [sr_store_sk#25, sr_return_amt#23, sr_net_loss#24] - : : +- *(6) BroadcastHashJoin [sr_returned_date_sk#26], [cast(d_date_sk#21 as bigint)], Inner, BuildRight - : : :- *(6) Project [sr_returned_date_sk#26, sr_store_sk#25, sr_return_amt#23, sr_net_loss#24] - : : : +- *(6) Filter (isnotnull(sr_returned_date_sk#26) && isnotnull(sr_store_sk#25)) - : : : +- *(6) FileScan parquet default.store_returns[sr_returned_date_sk#26,sr_store_sk#25,sr_return_amt#23,sr_net_loss#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) - : : +- *(4) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(5) Project [s_store_sk#16] - : +- *(5) Filter isnotnull(s_store_sk#16) - : +- *(5) FileScan parquet default.store[s_store_sk#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - :- *(15) Project [sales#27, returns#28, CheckOverflow((promote_precision(cast(profit#29 as decimal(18,2))) - promote_precision(cast(profit_loss#30 as decimal(18,2)))), DecimalType(18,2)) AS profit#31, catalog channel AS channel#32, cs_call_center_sk#33 AS id#34] - : +- BroadcastNestedLoopJoin BuildLeft, Inner - : :- BroadcastExchange IdentityBroadcastMode - : : +- *(11) HashAggregate(keys=[cs_call_center_sk#33], functions=[sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))]) - : : +- Exchange hashpartitioning(cs_call_center_sk#33, 5) - : : +- *(10) HashAggregate(keys=[cs_call_center_sk#33], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))]) - : : +- *(10) Project [cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] - : : +- *(10) BroadcastHashJoin [cs_sold_date_sk#37], [d_date_sk#21], Inner, BuildRight - : : :- *(10) Project [cs_sold_date_sk#37, cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] - : : : +- *(10) Filter isnotnull(cs_sold_date_sk#37) - : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#37,cs_call_center_sk#33,cs_ext_sales_price#35,cs_net_profit#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#21], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(23) Project [sales#41, coalesce(returns#42, 0.00) AS returns#43, CheckOverflow((promote_precision(cast(profit#44 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#45, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#46, web channel AS channel#47, wp_web_page_sk#48 AS id#49] - +- *(23) BroadcastHashJoin [wp_web_page_sk#48], [wp_web_page_sk#50], LeftOuter, BuildRight - :- *(23) HashAggregate(keys=[wp_web_page_sk#48], functions=[sum(UnscaledValue(ws_ext_sales_price#51)), sum(UnscaledValue(ws_net_profit#52))]) - : +- Exchange hashpartitioning(wp_web_page_sk#48, 5) - : +- *(18) HashAggregate(keys=[wp_web_page_sk#48], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#51)), partial_sum(UnscaledValue(ws_net_profit#52))]) - : +- *(18) Project [ws_ext_sales_price#51, ws_net_profit#52, wp_web_page_sk#48] - : +- *(18) BroadcastHashJoin [ws_web_page_sk#53], [wp_web_page_sk#48], Inner, BuildRight - : :- *(18) Project [ws_web_page_sk#53, ws_ext_sales_price#51, ws_net_profit#52] - : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#54], [d_date_sk#21], Inner, BuildRight - : : :- *(18) Project [ws_sold_date_sk#54, ws_web_page_sk#53, ws_ext_sales_price#51, ws_net_profit#52] - : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#54) && isnotnull(ws_web_page_sk#53)) - : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#54,ws_web_page_sk#53,ws_ext_sales_price#51,ws_net_profit#52] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(22) HashAggregate(keys=[wp_web_page_sk#50], functions=[sum(UnscaledValue(wr_return_amt#55)), sum(UnscaledValue(wr_net_loss#56))]) - +- Exchange hashpartitioning(wp_web_page_sk#50, 5) - +- *(21) HashAggregate(keys=[wp_web_page_sk#50], functions=[partial_sum(UnscaledValue(wr_return_amt#55)), partial_sum(UnscaledValue(wr_net_loss#56))]) - +- *(21) Project [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#50] - +- *(21) BroadcastHashJoin [wr_web_page_sk#57], [cast(wp_web_page_sk#50 as bigint)], Inner, BuildRight - :- *(21) Project [wr_web_page_sk#57, wr_return_amt#55, wr_net_loss#56] - : +- *(21) BroadcastHashJoin [wr_returned_date_sk#58], [cast(d_date_sk#21 as bigint)], Inner, BuildRight - : :- *(21) Project [wr_returned_date_sk#58, wr_web_page_sk#57, wr_return_amt#55, wr_net_loss#56] - : : +- *(21) Filter (isnotnull(wr_returned_date_sk#58) && isnotnull(wr_web_page_sk#57)) - : : +- *(21) FileScan parquet default.web_returns[wr_returned_date_sk#58,wr_web_page_sk#57,wr_return_amt#55,wr_net_loss#56] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt deleted file mode 100644 index ef07e38e8..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt +++ /dev/null @@ -1,141 +0,0 @@ -TakeOrderedAndProject [channel,id,profit,returns,sales] - WholeStageCodegen - HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] - InputAdapter - Exchange [channel,id,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Expand [channel,id,profit,returns,sales] - InputAdapter - Union - WholeStageCodegen - Project [profit,profit_loss,returns,s_store_sk,sales] - BroadcastHashJoin [s_store_sk,s_store_sk] - HashAggregate [s_store_sk,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] [profit,sales,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] - InputAdapter - Exchange [s_store_sk] #2 - WholeStageCodegen - HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [s_store_sk,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_ext_sales_price,ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - HashAggregate [s_store_sk,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] [profit_loss,returns,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] - InputAdapter - Exchange [s_store_sk] #6 - WholeStageCodegen - HashAggregate [s_store_sk,sr_net_loss,sr_return_amt,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [s_store_sk,sr_net_loss,sr_return_amt] - BroadcastHashJoin [s_store_sk,sr_store_sk] - Project [sr_net_loss,sr_return_amt,sr_store_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] - Filter [sr_returned_date_sk,sr_store_sk] - Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] - WholeStageCodegen - Project [cs_call_center_sk,profit,profit_loss,returns,sales] - InputAdapter - BroadcastNestedLoopJoin - BroadcastExchange #9 - WholeStageCodegen - HashAggregate [cs_call_center_sk,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] [profit,sales,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] - InputAdapter - Exchange [cs_call_center_sk] #10 - WholeStageCodegen - HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] [profit_loss,returns,sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] - InputAdapter - Exchange #11 - WholeStageCodegen - HashAggregate [cr_net_loss,cr_return_amount,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [cr_net_loss,cr_return_amount] - BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_net_loss,cr_return_amount,cr_returned_date_sk] - Filter [cr_returned_date_sk] - Scan parquet default.catalog_returns [cr_net_loss,cr_return_amount,cr_returned_date_sk] [cr_net_loss,cr_return_amount,cr_returned_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - WholeStageCodegen - Project [profit,profit_loss,returns,sales,wp_web_page_sk] - BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] - HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),wp_web_page_sk] [profit,sales,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] - InputAdapter - Exchange [wp_web_page_sk] #12 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] - Project [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] - BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] - Project [ws_ext_sales_price,ws_net_profit,ws_web_page_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] - Filter [ws_sold_date_sk,ws_web_page_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - InputAdapter - BroadcastExchange #13 - WholeStageCodegen - Project [wp_web_page_sk] - Filter [wp_web_page_sk] - Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] - InputAdapter - BroadcastExchange #14 - WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt)),wp_web_page_sk] [profit_loss,returns,sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt))] - InputAdapter - Exchange [wp_web_page_sk] #15 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,wp_web_page_sk,wr_net_loss,wr_return_amt] [sum,sum,sum,sum] - Project [wp_web_page_sk,wr_net_loss,wr_return_amt] - BroadcastHashJoin [wp_web_page_sk,wr_web_page_sk] - Project [wr_net_loss,wr_return_amt,wr_web_page_sk] - BroadcastHashJoin [d_date_sk,wr_returned_date_sk] - Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] - Filter [wr_returned_date_sk,wr_web_page_sk] - Scan parquet default.web_returns [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #7 - InputAdapter - BroadcastExchange #16 - WholeStageCodegen - Project [wp_web_page_sk] - Filter [wp_web_page_sk] - Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt deleted file mode 100644 index 4c22f6a08..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt +++ /dev/null @@ -1,61 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC NULLS LAST,ss_wc#3 DESC NULLS LAST,ss_sp#4 DESC NULLS LAST,other_chan_qty#5 ASC NULLS FIRST,other_chan_wholesale_cost#6 ASC NULLS FIRST,other_chan_sales_price#7 ASC NULLS FIRST,round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) ASC NULLS FIRST], output=[ratio#1,store_qty#10,store_wholesale_cost#11,store_sales_price#12,other_chan_qty#5,other_chan_wholesale_cost#6,other_chan_sales_price#7]) -+- *(12) Project [round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) AS ratio#1, ss_qty#2 AS store_qty#10, ss_wc#3 AS store_wholesale_cost#11, ss_sp#4 AS store_sales_price#12, (coalesce(ws_qty#8, 0) + coalesce(cs_qty#9, 0)) AS other_chan_qty#5, CheckOverflow((promote_precision(cast(coalesce(ws_wc#13, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#6, CheckOverflow((promote_precision(cast(coalesce(ws_sp#15, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#16, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#7, cs_qty#9, ws_qty#8, ss_qty#2, ss_sp#4, ss_wc#3] - +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [cs_sold_year#20, cs_item_sk#21, cs_customer_sk#22], Inner, BuildRight - :- *(12) Project [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19, ss_qty#2, ss_wc#3, ss_sp#4, ws_qty#8, ws_wc#13, ws_sp#15] - : +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [ws_sold_year#23, ws_item_sk#24, ws_customer_sk#25], Inner, BuildRight - : :- *(12) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[sum(cast(ss_quantity#27 as bigint)), sum(UnscaledValue(ss_wholesale_cost#28)), sum(UnscaledValue(ss_sales_price#29))]) - : : +- Exchange hashpartitioning(d_year#26, ss_item_sk#18, ss_customer_sk#19, 5) - : : +- *(3) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[partial_sum(cast(ss_quantity#27 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#28)), partial_sum(UnscaledValue(ss_sales_price#29))]) - : : +- *(3) Project [ss_item_sk#18, ss_customer_sk#19, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29, d_year#26] - : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight - : : :- *(3) Project [ss_sold_date_sk#30, ss_item_sk#18, ss_customer_sk#19, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29] - : : : +- *(3) Filter isnull(sr_ticket_number#32) - : : : +- *(3) BroadcastHashJoin [cast(ss_ticket_number#33 as bigint), cast(ss_item_sk#18 as bigint)], [sr_ticket_number#32, sr_item_sk#34], LeftOuter, BuildRight - : : : :- *(3) Project [ss_sold_date_sk#30, ss_item_sk#18, ss_customer_sk#19, ss_ticket_number#33, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29] - : : : : +- *(3) Filter ((isnotnull(ss_sold_date_sk#30) && isnotnull(ss_item_sk#18)) && isnotnull(ss_customer_sk#19)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_item_sk#18,ss_customer_sk#19,ss_ticket_number#33,ss_quantity#27,ss_wholesale_cost#28,ss_sales_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_date_sk#31, d_year#26] - : : +- *(2) Filter ((isnotnull(d_year#26) && (d_year#26 = 2000)) && isnotnull(d_date_sk#31)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#31,d_year#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) - : +- *(7) Filter (coalesce(ws_qty#8, 0) > 0) - : +- *(7) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[sum(cast(ws_quantity#36 as bigint)), sum(UnscaledValue(ws_wholesale_cost#37)), sum(UnscaledValue(ws_sales_price#38))]) - : +- Exchange hashpartitioning(d_year#26, ws_item_sk#24, ws_bill_customer_sk#35, 5) - : +- *(6) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[partial_sum(cast(ws_quantity#36 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#37)), partial_sum(UnscaledValue(ws_sales_price#38))]) - : +- *(6) Project [ws_item_sk#24, ws_bill_customer_sk#35, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38, d_year#26] - : +- *(6) BroadcastHashJoin [ws_sold_date_sk#39], [d_date_sk#31], Inner, BuildRight - : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] - : : +- *(6) Filter isnull(wr_order_number#40) - : : +- *(6) BroadcastHashJoin [cast(ws_order_number#41 as bigint), cast(ws_item_sk#24 as bigint)], [wr_order_number#40, wr_item_sk#42], LeftOuter, BuildRight - : : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_order_number#41, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] - : : : +- *(6) Filter ((isnotnull(ws_sold_date_sk#39) && isnotnull(ws_item_sk#24)) && isnotnull(ws_bill_customer_sk#35)) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#39,ws_item_sk#24,ws_bill_customer_sk#35,ws_order_number#41,ws_quantity#36,ws_wholesale_cost#37,ws_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) - +- *(11) Filter (coalesce(cs_qty#9, 0) > 0) - +- *(11) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[sum(cast(cs_quantity#44 as bigint)), sum(UnscaledValue(cs_wholesale_cost#45)), sum(UnscaledValue(cs_sales_price#46))]) - +- Exchange hashpartitioning(d_year#26, cs_item_sk#21, cs_bill_customer_sk#43, 5) - +- *(10) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[partial_sum(cast(cs_quantity#44 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#45)), partial_sum(UnscaledValue(cs_sales_price#46))]) - +- *(10) Project [cs_bill_customer_sk#43, cs_item_sk#21, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46, d_year#26] - +- *(10) BroadcastHashJoin [cs_sold_date_sk#47], [d_date_sk#31], Inner, BuildRight - :- *(10) Project [cs_sold_date_sk#47, cs_bill_customer_sk#43, cs_item_sk#21, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46] - : +- *(10) Filter isnull(cr_order_number#48) - : +- *(10) BroadcastHashJoin [cs_order_number#49, cs_item_sk#21], [cr_order_number#48, cr_item_sk#50], LeftOuter, BuildRight - : :- *(10) Project [cs_sold_date_sk#47, cs_bill_customer_sk#43, cs_item_sk#21, cs_order_number#49, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46] - : : +- *(10) Filter ((isnotnull(cs_sold_date_sk#47) && isnotnull(cs_item_sk#21)) && isnotnull(cs_bill_customer_sk#43)) - : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#47,cs_bill_customer_sk#43,cs_item_sk#21,cs_order_number#49,cs_quantity#44,cs_wholesale_cost#45,cs_sales_price#46] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt deleted file mode 100644 index fc8bc4f3f..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt +++ /dev/null @@ -1,81 +0,0 @@ -TakeOrderedAndProject [cs_qty,other_chan_qty,other_chan_sales_price,other_chan_wholesale_cost,ratio,ss_qty,ss_sp,ss_wc,store_qty,store_sales_price,store_wholesale_cost,ws_qty] - WholeStageCodegen - Project [cs_qty,cs_sp,cs_wc,ss_qty,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] - BroadcastHashJoin [cs_customer_sk,cs_item_sk,cs_sold_year,ss_customer_sk,ss_item_sk,ss_sold_year] - Project [ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] - BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_sold_year,ws_customer_sk,ws_item_sk,ws_sold_year] - HashAggregate [d_year,ss_customer_sk,ss_item_sk,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] [ss_qty,ss_sold_year,ss_sp,ss_wc,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] - InputAdapter - Exchange [d_year,ss_customer_sk,ss_item_sk] #1 - WholeStageCodegen - HashAggregate [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] - Filter [sr_ticket_number] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Filter [ws_qty] - HashAggregate [d_year,sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_bill_customer_sk,ws_item_sk] [sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_customer_sk,ws_qty,ws_sold_year,ws_sp,ws_wc] - InputAdapter - Exchange [d_year,ws_bill_customer_sk,ws_item_sk] #5 - WholeStageCodegen - HashAggregate [d_year,sum,sum,sum,sum,sum,sum,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] [sum,sum,sum,sum,sum,sum] - Project [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] - Filter [wr_order_number] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] - Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [wr_item_sk,wr_order_number] - Filter [wr_item_sk,wr_order_number] - Scan parquet default.web_returns [wr_item_sk,wr_order_number] [wr_item_sk,wr_order_number] - InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Filter [cs_qty] - HashAggregate [cs_bill_customer_sk,cs_item_sk,d_year,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] [cs_customer_sk,cs_qty,cs_sold_year,cs_sp,cs_wc,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] - InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk,d_year] #8 - WholeStageCodegen - HashAggregate [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] - Filter [cr_order_number] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [cr_item_sk,cr_order_number] - Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] [cr_item_sk,cr_order_number] - InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt deleted file mode 100644 index 8e43f862c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt +++ /dev/null @@ -1,32 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,substring(s_city#3, 1, 30) ASC NULLS FIRST,profit#4 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,substring(s_city, 1, 30)#5,ss_ticket_number#6,amt#7,profit#4]) -+- *(6) Project [c_last_name#1, c_first_name#2, substring(s_city#3, 1, 30) AS substring(s_city, 1, 30)#5, ss_ticket_number#6, amt#7, profit#4, s_city#3] - +- *(6) BroadcastHashJoin [ss_customer_sk#8], [c_customer_sk#9], Inner, BuildRight - :- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#11)), sum(UnscaledValue(ss_net_profit#12))]) - : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3, 5) - : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#11)), partial_sum(UnscaledValue(ss_net_profit#12))]) - : +- *(4) Project [ss_customer_sk#8, ss_addr_sk#10, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12, s_city#3] - : +- *(4) BroadcastHashJoin [ss_hdemo_sk#13], [hd_demo_sk#14], Inner, BuildRight - : :- *(4) Project [ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12, s_city#3] - : : +- *(4) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight - : : :- *(4) Project [ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_store_sk#15, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : : : :- *(4) Project [ss_sold_date_sk#17, ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_store_sk#15, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12] - : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) && isnotnull(ss_hdemo_sk#13)) && isnotnull(ss_customer_sk#8)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#8,ss_hdemo_sk#13,ss_addr_sk#10,ss_store_sk#15,ss_ticket_number#6,ss_coupon_amt#11,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [s_store_sk#16, s_city#3] - : : +- *(2) Filter (((isnotnull(s_number_employees#21) && (s_number_employees#21 >= 200)) && (s_number_employees#21 <= 295)) && isnotnull(s_store_sk#16)) - : : +- *(2) FileScan parquet default.store[s_store_sk#16,s_number_employees#21,s_city#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_num..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [hd_demo_sk#14] - : +- *(3) Filter (((hd_dep_count#22 = 6) || (hd_vehicle_count#23 > 2)) && isnotnull(hd_demo_sk#14)) - : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#14,hd_dep_count#22,hd_vehicle_count#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] - +- *(5) Filter isnotnull(c_customer_sk#9) - +- *(5) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt deleted file mode 100644 index 01ce92730..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt +++ /dev/null @@ -1,42 +0,0 @@ -TakeOrderedAndProject [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number,substring(s_city, 1, 30)] - WholeStageCodegen - Project [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - HashAggregate [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] [amt,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] - InputAdapter - Exchange [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 - WholeStageCodegen - HashAggregate [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_dow,d_year] - Scan parquet default.date_dim [d_date_sk,d_dow,d_year] [d_date_sk,d_dow,d_year] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [s_city,s_store_sk] - Filter [s_number_employees,s_store_sk] - Scan parquet default.store [s_city,s_number_employees,s_store_sk] [s_city,s_number_employees,s_store_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt deleted file mode 100644 index 482275fd7..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt +++ /dev/null @@ -1,45 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST], output=[s_store_name#1,sum(ss_net_profit)#2]) -+- *(9) HashAggregate(keys=[s_store_name#1], functions=[sum(UnscaledValue(ss_net_profit#3))]) - +- Exchange hashpartitioning(s_store_name#1, 5) - +- *(8) HashAggregate(keys=[s_store_name#1], functions=[partial_sum(UnscaledValue(ss_net_profit#3))]) - +- *(8) Project [ss_net_profit#3, s_store_name#1] - +- *(8) BroadcastHashJoin [substring(s_zip#4, 1, 2)], [substring(ca_zip#5, 1, 2)], Inner, BuildRight - :- *(8) Project [ss_net_profit#3, s_store_name#1, s_zip#4] - : +- *(8) BroadcastHashJoin [ss_store_sk#6], [s_store_sk#7], Inner, BuildRight - : :- *(8) Project [ss_store_sk#6, ss_net_profit#3] - : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight - : : :- *(8) Project [ss_sold_date_sk#8, ss_store_sk#6, ss_net_profit#3] - : : : +- *(8) Filter (isnotnull(ss_sold_date_sk#8) && isnotnull(ss_store_sk#6)) - : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#8,ss_store_sk#6,ss_net_profit#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [d_date_sk#9] - : : +- *(1) Filter ((((isnotnull(d_qoy#10) && isnotnull(d_year#11)) && (d_qoy#10 = 2)) && (d_year#11 = 1998)) && isnotnull(d_date_sk#9)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#9,d_year#11,d_qoy#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [s_store_sk#7, s_store_name#1, s_zip#4] - : +- *(2) Filter (isnotnull(s_store_sk#7) && isnotnull(s_zip#4)) - : +- *(2) FileScan parquet default.store[s_store_sk#7,s_store_name#1,s_zip#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(substring(input[0, string, true], 1, 2))) - +- *(7) HashAggregate(keys=[ca_zip#5], functions=[]) - +- Exchange hashpartitioning(ca_zip#5, 5) - +- *(6) HashAggregate(keys=[ca_zip#5], functions=[]) - +- *(6) BroadcastHashJoin [coalesce(ca_zip#5, )], [coalesce(ca_zip#12, )], LeftSemi, BuildRight, (ca_zip#5 <=> ca_zip#12) - :- *(6) Project [substring(ca_zip#13, 1, 5) AS ca_zip#5] - : +- *(6) Filter (substring(ca_zip#13, 1, 5) INSET (56910,69952,63792,39371,74351,11101,25003,97189,57834,73134,62377,51200,32754,22752,86379,14171,91110,40162,98569,28709,13394,66162,25733,25782,26065,18383,51949,87343,50298,83849,33786,64528,23470,67030,46136,25280,46820,77721,99076,18426,31880,17871,98235,45748,49156,18652,72013,51622,43848,78567,41248,13695,44165,67853,54917,53179,64034,10567,71791,68908,55565,59402,64147,85816,57855,61547,27700,68100,28810,58263,15723,83933,51103,58058,90578,82276,81096,81426,96451,77556,38607,76638,18906,62971,57047,48425,35576,11928,30625,83444,73520,51650,57647,60099,30122,94983,24128,10445,41368,26233,26859,21756,24676,19849,36420,38193,58470,39127,13595,87501,24317,15455,69399,98025,81019,48033,11376,39516,67875,92712,14867,38122,29741,42961,30469,51211,56458,15559,16021,33123,33282,33515,72823,54601,76698,56240,72175,60279,20004,68806,72325,28488,43933,50412,45200,22246,78668,79777,96765,67301,73273,49448,82636,23932,47305,29839,39192,18799,61265,37125,58943,64457,88424,24610,84935,89360,68893,30431,28898,10336,90257,59166,46081,26105,96888,36634,86284,35258,39972,22927,73241,53268,24206,27385,99543,31671,14663,30903,39861,24996,63089,88086,83921,21076,67897,66708,45721,60576,25103,52867,30450,36233,30010,96576,73171,56571,56575,64544,13955,78451,43285,18119,16725,83041,76107,79994,54364,35942,56691,19769,63435,34102,18845,22744,13354,75691,45549,23968,31387,83144,13375,15765,28577,88190,19736,73650,37930,25989,83926,94898,51798,39736,22437,55253,38415,71256,18376,42029,25858,44438,19515,38935,51649,71954,15882,18767,63193,25486,49130,37126,40604,34425,17043,12305,11634,26653,94167,36446,10516,67473,66864,72425,63981,18842,22461,42666,47770,69035,70372,28587,45266,15371,15798,45375,90225,16807,31016,68014,21337,19505,50016,10144,84093,21286,19430,34322,91068,94945,72305,24671,58048,65084,28545,21195,20548,22245,77191,96976,48583,76231,15734,61810,11356,68621,68786,98359,41367,26689,69913,76614,68101,88885,50308,79077,18270,28915,29178,53672,62878,10390,14922,68341,56529,41766,68309,56616,15126,61860,97789,11489,45692,41918,72151,72550,27156,36495,70738,17879,53535,17920,68880,78890,35850,14089,58078,65164,27068,26231,13376,57665,32213,77610,87816,21309,15146,86198,91137,55307,67467,40558,94627,82136,22351,89091,20260,23006,91393,47537,62496,98294,18840,71286,81312,31029,70466,35458,14060,22685,28286,25631,19512,40081,63837,14328,35474,22152,76232,51061,86057,17183) && isnotnull(substring(ca_zip#13, 1, 5))) - : +- *(6) FileScan parquet default.customer_address[ca_zip#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ))) - +- *(5) Project [ca_zip#12] - +- *(5) Filter (count(1)#14 > 10) - +- *(5) HashAggregate(keys=[ca_zip#13], functions=[count(1)]) - +- Exchange hashpartitioning(ca_zip#13, 5) - +- *(4) HashAggregate(keys=[ca_zip#13], functions=[partial_count(1)]) - +- *(4) Project [ca_zip#13] - +- *(4) BroadcastHashJoin [ca_address_sk#15], [c_current_addr_sk#16], Inner, BuildRight - :- *(4) Project [ca_address_sk#15, ca_zip#13] - : +- *(4) Filter isnotnull(ca_address_sk#15) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#15,ca_zip#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [c_current_addr_sk#16] - +- *(3) Filter ((isnotnull(c_preferred_cust_flag#17) && (c_preferred_cust_flag#17 = Y)) && isnotnull(c_current_addr_sk#16)) - +- *(3) FileScan parquet default.customer[c_current_addr_sk#16,c_preferred_cust_flag#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt deleted file mode 100644 index fd00e42e4..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt +++ /dev/null @@ -1,61 +0,0 @@ -TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] - WholeStageCodegen - HashAggregate [s_store_name,sum,sum(UnscaledValue(ss_net_profit))] [sum,sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit)] - InputAdapter - Exchange [s_store_name] #1 - WholeStageCodegen - HashAggregate [s_store_name,ss_net_profit,sum,sum] [sum,sum] - Project [s_store_name,ss_net_profit] - BroadcastHashJoin [ca_zip,s_zip] - Project [s_store_name,s_zip,ss_net_profit] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [s_store_name,s_store_sk,s_zip] - Filter [s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - HashAggregate [ca_zip] - InputAdapter - Exchange [ca_zip] #5 - WholeStageCodegen - HashAggregate [ca_zip] - BroadcastHashJoin [ca_zip,ca_zip] - Project [ca_zip] - Filter [ca_zip] - Scan parquet default.customer_address [ca_zip] [ca_zip] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [ca_zip] - Filter [count(1)] - HashAggregate [ca_zip,count,count(1)] [ca_zip,count,count(1),count(1)] - InputAdapter - Exchange [ca_zip] #7 - WholeStageCodegen - HashAggregate [ca_zip,count,count] [count,count] - Project [ca_zip] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ca_address_sk,ca_zip] - Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_zip] [ca_address_sk,ca_zip] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [c_current_addr_sk] - Filter [c_current_addr_sk,c_preferred_cust_flag] - Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag] [c_current_addr_sk,c_preferred_cust_flag] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt deleted file mode 100644 index 41613515c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt +++ /dev/null @@ -1,97 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) -+- *(23) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) - +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) - +- *(22) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) - +- *(22) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] - +- Union - :- *(7) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(ss_ext_sales_price#13)), sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - : +- Exchange hashpartitioning(s_store_id#12, 5) - : +- *(6) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#13)), partial_sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - : +- *(6) Project [ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] - : +- *(6) BroadcastHashJoin [ss_promo_sk#17], [p_promo_sk#18], Inner, BuildRight - : :- *(6) Project [ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] - : : +- *(6) BroadcastHashJoin [ss_item_sk#19], [i_item_sk#20], Inner, BuildRight - : : :- *(6) Project [ss_item_sk#19, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] - : : : +- *(6) BroadcastHashJoin [ss_store_sk#21], [s_store_sk#22], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16] - : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight - : : : : :- *(6) Project [ss_sold_date_sk#23, ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16] - : : : : : +- *(6) BroadcastHashJoin [cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#25 as bigint)], [sr_item_sk#26, sr_ticket_number#27], LeftOuter, BuildRight - : : : : : :- *(6) Project [ss_sold_date_sk#23, ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ticket_number#25, ss_ext_sales_price#13, ss_net_profit#15] - : : : : : : +- *(6) Filter (((isnotnull(ss_sold_date_sk#23) && isnotnull(ss_store_sk#21)) && isnotnull(ss_item_sk#19)) && isnotnull(ss_promo_sk#17)) - : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#23,ss_item_sk#19,ss_store_sk#21,ss_promo_sk#17,ss_ticket_number#25,ss_ext_sales_price#13,ss_net_profit#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)], ReadSchema: struct= 11192)) && (d_date#28 <= 11222)) && isnotnull(d_date_sk#24)) - : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#24,d_date#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), Is..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [s_store_sk#22, s_store_id#12] - : : : +- *(3) Filter isnotnull(s_store_sk#22) - : : : +- *(3) FileScan parquet default.store[s_store_sk#22,s_store_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [i_item_sk#20] - : : +- *(4) Filter ((isnotnull(i_current_price#29) && (i_current_price#29 > 50.00)) && isnotnull(i_item_sk#20)) - : : +- *(4) FileScan parquet default.item[i_item_sk#20,i_current_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(5) Project [p_promo_sk#18] - : +- *(5) Filter ((isnotnull(p_channel_tv#30) && (p_channel_tv#30 = N)) && isnotnull(p_promo_sk#18)) - : +- *(5) FileScan parquet default.promotion[p_promo_sk#18,p_channel_tv#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)], ReadSchema: struct - :- *(14) HashAggregate(keys=[cp_catalog_page_id#31], functions=[sum(UnscaledValue(cs_ext_sales_price#32)), sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - : +- Exchange hashpartitioning(cp_catalog_page_id#31, 5) - : +- *(13) HashAggregate(keys=[cp_catalog_page_id#31], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#32)), partial_sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - : +- *(13) Project [cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] - : +- *(13) BroadcastHashJoin [cs_promo_sk#36], [p_promo_sk#18], Inner, BuildRight - : :- *(13) Project [cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] - : : +- *(13) BroadcastHashJoin [cs_item_sk#37], [i_item_sk#20], Inner, BuildRight - : : :- *(13) Project [cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] - : : : +- *(13) BroadcastHashJoin [cs_catalog_page_sk#38], [cp_catalog_page_sk#39], Inner, BuildRight - : : : :- *(13) Project [cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35] - : : : : +- *(13) BroadcastHashJoin [cs_sold_date_sk#40], [d_date_sk#24], Inner, BuildRight - : : : : :- *(13) Project [cs_sold_date_sk#40, cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35] - : : : : : +- *(13) BroadcastHashJoin [cs_item_sk#37, cs_order_number#41], [cr_item_sk#42, cr_order_number#43], LeftOuter, BuildRight - : : : : : :- *(13) Project [cs_sold_date_sk#40, cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_order_number#41, cs_ext_sales_price#32, cs_net_profit#34] - : : : : : : +- *(13) Filter (((isnotnull(cs_sold_date_sk#40) && isnotnull(cs_catalog_page_sk#38)) && isnotnull(cs_item_sk#37)) && isnotnull(cs_promo_sk#36)) - : : : : : : +- *(13) FileScan parquet default.catalog_sales[cs_sold_date_sk#40,cs_catalog_page_sk#38,cs_item_sk#37,cs_promo_sk#36,cs_order_number#41,cs_ext_sales_price#32,cs_net_profit#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_p..., ReadSchema: struct - : : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(10) Project [cp_catalog_page_sk#39, cp_catalog_page_id#31] - : : : +- *(10) Filter isnotnull(cp_catalog_page_sk#39) - : : : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#39,cp_catalog_page_id#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct - : : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(21) HashAggregate(keys=[web_site_id#44], functions=[sum(UnscaledValue(ws_ext_sales_price#45)), sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - +- Exchange hashpartitioning(web_site_id#44, 5) - +- *(20) HashAggregate(keys=[web_site_id#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#45)), partial_sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - +- *(20) Project [ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] - +- *(20) BroadcastHashJoin [ws_promo_sk#49], [p_promo_sk#18], Inner, BuildRight - :- *(20) Project [ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] - : +- *(20) BroadcastHashJoin [ws_item_sk#50], [i_item_sk#20], Inner, BuildRight - : :- *(20) Project [ws_item_sk#50, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] - : : +- *(20) BroadcastHashJoin [ws_web_site_sk#51], [web_site_sk#52], Inner, BuildRight - : : :- *(20) Project [ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48] - : : : +- *(20) BroadcastHashJoin [ws_sold_date_sk#53], [d_date_sk#24], Inner, BuildRight - : : : :- *(20) Project [ws_sold_date_sk#53, ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48] - : : : : +- *(20) BroadcastHashJoin [cast(ws_item_sk#50 as bigint), cast(ws_order_number#54 as bigint)], [wr_item_sk#55, wr_order_number#56], LeftOuter, BuildRight - : : : : :- *(20) Project [ws_sold_date_sk#53, ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_order_number#54, ws_ext_sales_price#45, ws_net_profit#47] - : : : : : +- *(20) Filter (((isnotnull(ws_sold_date_sk#53) && isnotnull(ws_web_site_sk#51)) && isnotnull(ws_item_sk#50)) && isnotnull(ws_promo_sk#49)) - : : : : : +- *(20) FileScan parquet default.web_sales[ws_sold_date_sk#53,ws_item_sk#50,ws_web_site_sk#51,ws_promo_sk#49,ws_order_number#54,ws_ext_sales_price#45,ws_net_profit#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo..., ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(17) Project [web_site_sk#52, web_site_id#44] - : : +- *(17) Filter isnotnull(web_site_sk#52) - : : +- *(17) FileScan parquet default.web_site[web_site_sk#52,web_site_id#44] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct - : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt deleted file mode 100644 index 28b1a009b..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt +++ /dev/null @@ -1,133 +0,0 @@ -TakeOrderedAndProject [channel,id,profit,returns,sales] - WholeStageCodegen - HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] - InputAdapter - Exchange [channel,id,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Expand [channel,id,profit,returns,sales] - InputAdapter - Union - WholeStageCodegen - HashAggregate [s_store_id,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] - InputAdapter - Exchange [s_store_id] #2 - WholeStageCodegen - HashAggregate [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [p_promo_sk,ss_promo_sk] - Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_promo_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [s_store_id,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [i_item_sk] - Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_current_price,i_item_sk] [i_current_price,i_item_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [p_promo_sk] - Filter [p_channel_tv,p_promo_sk] - Scan parquet default.promotion [p_channel_tv,p_promo_sk] [p_channel_tv,p_promo_sk] - WholeStageCodegen - HashAggregate [cp_catalog_page_id,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] - InputAdapter - Exchange [cp_catalog_page_id] #8 - WholeStageCodegen - HashAggregate [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] - BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit,cs_promo_sk] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] - BroadcastHashJoin [cp_catalog_page_sk,cs_catalog_page_sk] - Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk,cs_sold_date_sk] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] - Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] - Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [cp_catalog_page_id,cp_catalog_page_sk] - Filter [cp_catalog_page_sk] - Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] [cp_catalog_page_id,cp_catalog_page_sk] - InputAdapter - ReusedExchange [i_item_sk] [i_item_sk] #6 - InputAdapter - ReusedExchange [p_promo_sk] [p_promo_sk] #7 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),web_site_id] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] - InputAdapter - Exchange [web_site_id] #11 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum,sum,web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum,sum,sum] - Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] - BroadcastHashJoin [p_promo_sk,ws_promo_sk] - Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_promo_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk] - BroadcastHashJoin [web_site_sk,ws_web_site_sk] - Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_web_site_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] - Filter [ws_item_sk,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] - InputAdapter - BroadcastExchange #12 - WholeStageCodegen - Project [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] - Filter [wr_item_sk,wr_order_number] - Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 - InputAdapter - BroadcastExchange #13 - WholeStageCodegen - Project [web_site_id,web_site_sk] - Filter [web_site_sk] - Scan parquet default.web_site [web_site_id,web_site_sk] [web_site_id,web_site_sk] - InputAdapter - ReusedExchange [i_item_sk] [i_item_sk] #6 - InputAdapter - ReusedExchange [p_promo_sk] [p_promo_sk] #7 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt deleted file mode 100644 index 93900040c..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt +++ /dev/null @@ -1,52 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salutation#2 ASC NULLS FIRST,c_first_name#3 ASC NULLS FIRST,c_last_name#4 ASC NULLS FIRST,ca_street_number#5 ASC NULLS FIRST,ca_street_name#6 ASC NULLS FIRST,ca_street_type#7 ASC NULLS FIRST,ca_suite_number#8 ASC NULLS FIRST,ca_city#9 ASC NULLS FIRST,ca_county#10 ASC NULLS FIRST,ca_state#11 ASC NULLS FIRST,ca_zip#12 ASC NULLS FIRST,ca_country#13 ASC NULLS FIRST,ca_gmt_offset#14 ASC NULLS FIRST,ca_location_type#15 ASC NULLS FIRST,ctr_total_return#16 ASC NULLS FIRST], output=[c_customer_id#1,c_salutation#2,c_first_name#3,c_last_name#4,ca_street_number#5,ca_street_name#6,ca_street_type#7,ca_suite_number#8,ca_city#9,ca_county#10,ca_state#11,ca_zip#12,ca_country#13,ca_gmt_offset#14,ca_location_type#15,ctr_total_return#16]) -+- *(11) Project [c_customer_id#1, c_salutation#2, c_first_name#3, c_last_name#4, ca_street_number#5, ca_street_name#6, ca_street_type#7, ca_suite_number#8, ca_city#9, ca_county#10, ca_state#11, ca_zip#12, ca_country#13, ca_gmt_offset#14, ca_location_type#15, ctr_total_return#16] - +- *(11) BroadcastHashJoin [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight - :- *(11) Project [ctr_total_return#16, c_customer_id#1, c_current_addr_sk#17, c_salutation#2, c_first_name#3, c_last_name#4] - : +- *(11) BroadcastHashJoin [ctr_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - : :- *(11) Project [ctr_customer_sk#19, ctr_total_return#16] - : : +- *(11) BroadcastHashJoin [ctr_state#21], [ctr_state#21#22], Inner, BuildRight, (cast(ctr_total_return#16 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) - : : :- *(11) Filter isnotnull(ctr_total_return#16) - : : : +- *(11) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) - : : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 5) - : : : +- *(3) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) - : : : +- *(3) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] - : : : +- *(3) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight - : : : :- *(3) Project [cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] - : : : : +- *(3) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight - : : : : :- *(3) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] - : : : : : +- *(3) Filter ((isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) && isnotnull(cr_returning_customer_sk#24)) - : : : : : +- *(3) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [ca_address_sk#18, ca_state#11] - : : : +- *(2) Filter (isnotnull(ca_address_sk#18) && isnotnull(ca_state#11)) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#18,ca_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) - : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) - : : +- *(8) HashAggregate(keys=[ctr_state#21], functions=[avg(ctr_total_return#16)]) - : : +- Exchange hashpartitioning(ctr_state#21, 5) - : : +- *(7) HashAggregate(keys=[ctr_state#21], functions=[partial_avg(ctr_total_return#16)]) - : : +- *(7) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) - : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 5) - : : +- *(6) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) - : : +- *(6) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] - : : +- *(6) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight - : : :- *(6) Project [cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] - : : : +- *(6) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight - : : : :- *(6) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] - : : : : +- *(6) Filter (isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) - : : : : +- *(6) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)], ReadSchema: struct= 62.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 92.00)) && i_manufact_id#9 IN (129,270,821,423)) && isnotnull(i_item_sk#4)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), In(i_manufact_id, [129,27..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) - : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#7] - : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 11102)) && (d_date#11 <= 11162)) && isnotnull(d_date_sk#7)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), Is..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [ss_item_sk#5] - +- *(3) Filter isnotnull(ss_item_sk#5) - +- *(3) FileScan parquet default.store_sales[ss_item_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt deleted file mode 100644 index e0e54c6c0..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt +++ /dev/null @@ -1,34 +0,0 @@ -TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] - WholeStageCodegen - HashAggregate [i_current_price,i_item_desc,i_item_id] - InputAdapter - Exchange [i_current_price,i_item_desc,i_item_id] #1 - WholeStageCodegen - HashAggregate [i_current_price,i_item_desc,i_item_id] - Project [i_current_price,i_item_desc,i_item_id] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk] - Filter [i_current_price,i_item_sk,i_manufact_id] - Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [inv_date_sk,inv_item_sk] - Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [ss_item_sk] - Filter [ss_item_sk] - Scan parquet default.store_sales [ss_item_sk] [ss_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt deleted file mode 100644 index 8b674b50e..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt +++ /dev/null @@ -1,69 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,sr_item_qty#2 ASC NULLS FIRST], output=[item_id#1,sr_item_qty#2,sr_dev#3,cr_item_qty#4,cr_dev#5,wr_item_qty#6,wr_dev#7,average#8]) -+- *(18) Project [item_id#1, sr_item_qty#2, (((cast(sr_item_qty#2 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS sr_dev#3, cr_item_qty#4, (((cast(cr_item_qty#4 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS cr_dev#5, wr_item_qty#6, (((cast(wr_item_qty#6 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS wr_dev#7, CheckOverflow((promote_precision(cast(cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as decimal(20,0)) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#8] - +- *(18) BroadcastHashJoin [item_id#1], [item_id#9], Inner, BuildRight - :- *(18) Project [item_id#1, sr_item_qty#2, cr_item_qty#4] - : +- *(18) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight - : :- *(18) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(sr_return_quantity#12 as bigint))]) - : : +- Exchange hashpartitioning(i_item_id#11, 5) - : : +- *(5) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(sr_return_quantity#12 as bigint))]) - : : +- *(5) Project [sr_return_quantity#12, i_item_id#11] - : : +- *(5) BroadcastHashJoin [sr_returned_date_sk#13], [cast(d_date_sk#14 as bigint)], Inner, BuildRight - : : :- *(5) Project [sr_returned_date_sk#13, sr_return_quantity#12, i_item_id#11] - : : : +- *(5) BroadcastHashJoin [sr_item_sk#15], [cast(i_item_sk#16 as bigint)], Inner, BuildRight - : : : :- *(5) Project [sr_returned_date_sk#13, sr_item_sk#15, sr_return_quantity#12] - : : : : +- *(5) Filter (isnotnull(sr_item_sk#15) && isnotnull(sr_returned_date_sk#13)) - : : : : +- *(5) FileScan parquet default.store_returns[sr_returned_date_sk#13,sr_item_sk#15,sr_return_quantity#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [i_item_sk#16, i_item_id#11] - : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) - : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [d_date_sk#14] - : : +- *(4) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight - : : :- *(4) Project [d_date_sk#14, d_date#17] - : : : +- *(4) Filter isnotnull(d_date_sk#14) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) - : : +- *(3) Project [d_date#17 AS d_date#17#18] - : : +- *(3) BroadcastHashJoin [d_week_seq#19], [d_week_seq#19#20], LeftSemi, BuildRight - : : :- *(3) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_week_seq#19 AS d_week_seq#19#20] - : : +- *(2) Filter cast(d_date#17 as string) IN (2000-06-30,2000-09-27,2000-11-17) - : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- *(11) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(cr_return_quantity#21 as bigint))]) - : +- Exchange hashpartitioning(i_item_id#11, 5) - : +- *(10) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(cr_return_quantity#21 as bigint))]) - : +- *(10) Project [cr_return_quantity#21, i_item_id#11] - : +- *(10) BroadcastHashJoin [cr_returned_date_sk#22], [d_date_sk#14], Inner, BuildRight - : :- *(10) Project [cr_returned_date_sk#22, cr_return_quantity#21, i_item_id#11] - : : +- *(10) BroadcastHashJoin [cr_item_sk#23], [i_item_sk#16], Inner, BuildRight - : : :- *(10) Project [cr_returned_date_sk#22, cr_item_sk#23, cr_return_quantity#21] - : : : +- *(10) Filter (isnotnull(cr_item_sk#23) && isnotnull(cr_returned_date_sk#22)) - : : : +- *(10) FileScan parquet default.catalog_returns[cr_returned_date_sk#22,cr_item_sk#23,cr_return_quantity#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_returned_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [i_item_sk#16, i_item_id#11] - : : +- *(6) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) - : : +- *(6) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(9) Project [d_date_sk#14] - : +- *(9) BroadcastHashJoin [d_date#17], [d_date#17#24], LeftSemi, BuildRight - : :- *(9) Project [d_date_sk#14, d_date#17] - : : +- *(9) Filter isnotnull(d_date_sk#14) - : : +- *(9) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - : +- ReusedExchange [d_date#17#24], BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(17) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(wr_return_quantity#25 as bigint))]) - +- Exchange hashpartitioning(i_item_id#11, 5) - +- *(16) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(wr_return_quantity#25 as bigint))]) - +- *(16) Project [wr_return_quantity#25, i_item_id#11] - +- *(16) BroadcastHashJoin [wr_returned_date_sk#26], [cast(d_date_sk#14 as bigint)], Inner, BuildRight - :- *(16) Project [wr_returned_date_sk#26, wr_return_quantity#25, i_item_id#11] - : +- *(16) BroadcastHashJoin [wr_item_sk#27], [cast(i_item_sk#16 as bigint)], Inner, BuildRight - : :- *(16) Project [wr_returned_date_sk#26, wr_item_sk#27, wr_return_quantity#25] - : : +- *(16) Filter (isnotnull(wr_item_sk#27) && isnotnull(wr_returned_date_sk#26)) - : : +- *(16) FileScan parquet default.web_returns[wr_returned_date_sk#26,wr_item_sk#27,wr_return_quantity#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_returned_date_sk)], ReadSchema: struct - : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [d_date_sk#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt deleted file mode 100644 index 94ffc5da9..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt +++ /dev/null @@ -1,94 +0,0 @@ -TakeOrderedAndProject [average,cr_dev,cr_item_qty,item_id,sr_dev,sr_item_qty,wr_dev,wr_item_qty] - WholeStageCodegen - Project [cr_item_qty,item_id,sr_item_qty,wr_item_qty] - BroadcastHashJoin [item_id,item_id] - Project [cr_item_qty,item_id,sr_item_qty] - BroadcastHashJoin [item_id,item_id] - HashAggregate [i_item_id,sum,sum(cast(sr_return_quantity as bigint))] [item_id,sr_item_qty,sum,sum(cast(sr_return_quantity as bigint))] - InputAdapter - Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [i_item_id,sr_return_quantity,sum,sum] [sum,sum] - Project [i_item_id,sr_return_quantity] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [i_item_id,sr_return_quantity,sr_returned_date_sk] - BroadcastHashJoin [i_item_sk,sr_item_sk] - Project [sr_item_sk,sr_return_quantity,sr_returned_date_sk] - Filter [sr_item_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk] [sr_item_sk,sr_return_quantity,sr_returned_date_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_id,i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - BroadcastHashJoin [d_date,d_date] - Project [d_date,d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date] - BroadcastHashJoin [d_week_seq,d_week_seq] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(cast(cr_return_quantity as bigint))] [cr_item_qty,item_id,sum,sum(cast(cr_return_quantity as bigint))] - InputAdapter - Exchange [i_item_id] #7 - WholeStageCodegen - HashAggregate [cr_return_quantity,i_item_id,sum,sum] [sum,sum] - Project [cr_return_quantity,i_item_id] - BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_return_quantity,cr_returned_date_sk,i_item_id] - BroadcastHashJoin [cr_item_sk,i_item_sk] - Project [cr_item_sk,cr_return_quantity,cr_returned_date_sk] - Filter [cr_item_sk,cr_returned_date_sk] - Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk] [cr_item_sk,cr_return_quantity,cr_returned_date_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_id,i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [d_date_sk] - BroadcastHashJoin [d_date,d_date] - Project [d_date,d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - ReusedExchange [d_date] [d_date] #4 - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(cast(wr_return_quantity as bigint))] [item_id,sum,sum(cast(wr_return_quantity as bigint)),wr_item_qty] - InputAdapter - Exchange [i_item_id] #11 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,wr_return_quantity] [sum,sum] - Project [i_item_id,wr_return_quantity] - BroadcastHashJoin [d_date_sk,wr_returned_date_sk] - Project [i_item_id,wr_return_quantity,wr_returned_date_sk] - BroadcastHashJoin [i_item_sk,wr_item_sk] - Project [wr_item_sk,wr_return_quantity,wr_returned_date_sk] - Filter [wr_item_sk,wr_returned_date_sk] - Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk] [wr_item_sk,wr_return_quantity,wr_returned_date_sk] - InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt deleted file mode 100644 index c0cde7a5d..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt +++ /dev/null @@ -1,35 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], output=[customer_id#2,customername#3]) -+- *(6) Project [c_customer_id#1 AS customer_id#2, concat(c_last_name#4, , , c_first_name#5) AS customername#3, c_customer_id#1] - +- *(6) BroadcastHashJoin [cast(cd_demo_sk#6 as bigint)], [sr_cdemo_sk#7], Inner, BuildRight - :- *(6) Project [c_customer_id#1, c_first_name#5, c_last_name#4, cd_demo_sk#6] - : +- *(6) BroadcastHashJoin [hd_income_band_sk#8], [ib_income_band_sk#9], Inner, BuildRight - : :- *(6) Project [c_customer_id#1, c_first_name#5, c_last_name#4, cd_demo_sk#6, hd_income_band_sk#8] - : : +- *(6) BroadcastHashJoin [c_current_hdemo_sk#10], [hd_demo_sk#11], Inner, BuildRight - : : :- *(6) Project [c_customer_id#1, c_current_hdemo_sk#10, c_first_name#5, c_last_name#4, cd_demo_sk#6] - : : : +- *(6) BroadcastHashJoin [c_current_cdemo_sk#12], [cd_demo_sk#6], Inner, BuildRight - : : : :- *(6) Project [c_customer_id#1, c_current_cdemo_sk#12, c_current_hdemo_sk#10, c_first_name#5, c_last_name#4] - : : : : +- *(6) BroadcastHashJoin [c_current_addr_sk#13], [ca_address_sk#14], Inner, BuildRight - : : : : :- *(6) Project [c_customer_id#1, c_current_cdemo_sk#12, c_current_hdemo_sk#10, c_current_addr_sk#13, c_first_name#5, c_last_name#4] - : : : : : +- *(6) Filter ((isnotnull(c_current_addr_sk#13) && isnotnull(c_current_cdemo_sk#12)) && isnotnull(c_current_hdemo_sk#10)) - : : : : : +- *(6) FileScan parquet default.customer[c_customer_id#1,c_current_cdemo_sk#12,c_current_hdemo_sk#10,c_current_addr_sk#13,c_first_name#5,c_last_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [cd_demo_sk#6] - : : : +- *(2) Filter isnotnull(cd_demo_sk#6) - : : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [hd_demo_sk#11, hd_income_band_sk#8] - : : +- *(3) Filter (isnotnull(hd_demo_sk#11) && isnotnull(hd_income_band_sk#8)) - : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#11,hd_income_band_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [ib_income_band_sk#9] - : +- *(4) Filter ((((isnotnull(ib_lower_bound#16) && isnotnull(ib_upper_bound#17)) && (ib_lower_bound#16 >= 38128)) && (ib_upper_bound#17 <= 88128)) && isnotnull(ib_income_band_sk#9)) - : +- *(4) FileScan parquet default.income_band[ib_income_band_sk#9,ib_lower_bound#16,ib_upper_bound#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), ..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) - +- *(5) Project [sr_cdemo_sk#7] - +- *(5) Filter isnotnull(sr_cdemo_sk#7) - +- *(5) FileScan parquet default.store_returns[sr_cdemo_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_cdemo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt deleted file mode 100644 index 894b15298..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt +++ /dev/null @@ -1,45 +0,0 @@ -TakeOrderedAndProject [c_customer_id,customer_id,customername] - WholeStageCodegen - Project [c_customer_id,c_first_name,c_last_name] - BroadcastHashJoin [cd_demo_sk,sr_cdemo_sk] - Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk] - BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk] - BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name,cd_demo_sk] - BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] - Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] - Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #1 - WholeStageCodegen - Project [ca_address_sk] - Filter [ca_address_sk,ca_city] - Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [cd_demo_sk] - Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk] [cd_demo_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [hd_demo_sk,hd_income_band_sk] - Filter [hd_demo_sk,hd_income_band_sk] - Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [ib_income_band_sk] - Filter [ib_income_band_sk,ib_lower_bound,ib_upper_bound] - Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] [ib_income_band_sk,ib_lower_bound,ib_upper_bound] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [sr_cdemo_sk] - Filter [sr_cdemo_sk] - Scan parquet default.store_returns [sr_cdemo_sk] [sr_cdemo_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt deleted file mode 100644 index 574dc3012..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt +++ /dev/null @@ -1,50 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[substring(r_reason_desc, 1, 20)#1 ASC NULLS FIRST,aggOrder#2 ASC NULLS FIRST,avg(wr_refunded_cash)#3 ASC NULLS FIRST,avg(wr_fee)#4 ASC NULLS FIRST], output=[substring(r_reason_desc, 1, 20)#1,avg(ws_quantity)#5,avg(wr_refunded_cash)#3,avg(wr_fee)#4]) -+- *(9) HashAggregate(keys=[r_reason_desc#6], functions=[avg(cast(ws_quantity#7 as bigint)), avg(UnscaledValue(wr_refunded_cash#8)), avg(UnscaledValue(wr_fee#9))]) - +- Exchange hashpartitioning(r_reason_desc#6, 5) - +- *(8) HashAggregate(keys=[r_reason_desc#6], functions=[partial_avg(cast(ws_quantity#7 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#8)), partial_avg(UnscaledValue(wr_fee#9))]) - +- *(8) Project [ws_quantity#7, wr_fee#9, wr_refunded_cash#8, r_reason_desc#6] - +- *(8) BroadcastHashJoin [wr_reason_sk#10], [cast(r_reason_sk#11 as bigint)], Inner, BuildRight - :- *(8) Project [ws_quantity#7, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] - : +- *(8) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] - : : +- *(8) BroadcastHashJoin [wr_refunded_addr_sk#14], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight, ((((ca_state#16 IN (IN,OH,NJ) && (ws_net_profit#17 >= 100.00)) && (ws_net_profit#17 <= 200.00)) || ((ca_state#16 IN (WI,CT,KY) && (ws_net_profit#17 >= 150.00)) && (ws_net_profit#17 <= 300.00))) || ((ca_state#16 IN (LA,IA,AR) && (ws_net_profit#17 >= 50.00)) && (ws_net_profit#17 <= 250.00))) - : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_net_profit#17, wr_refunded_addr_sk#14, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] - : : : +- *(8) BroadcastHashJoin [wr_returning_cdemo_sk#18, cd_marital_status#19, cd_education_status#20], [cast(cd_demo_sk#21 as bigint), cd_marital_status#22, cd_education_status#23], Inner, BuildRight - : : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_net_profit#17, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8, cd_marital_status#19, cd_education_status#20] - : : : : +- *(8) BroadcastHashJoin [wr_refunded_cdemo_sk#24], [cast(cd_demo_sk#25 as bigint)], Inner, BuildRight, ((((((cd_marital_status#19 = M) && (cd_education_status#20 = Advanced Degree)) && (ws_sales_price#26 >= 100.00)) && (ws_sales_price#26 <= 150.00)) || ((((cd_marital_status#19 = S) && (cd_education_status#20 = College)) && (ws_sales_price#26 >= 50.00)) && (ws_sales_price#26 <= 100.00))) || ((((cd_marital_status#19 = W) && (cd_education_status#20 = 2 yr Degree)) && (ws_sales_price#26 >= 150.00)) && (ws_sales_price#26 <= 200.00))) - : : : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_sales_price#26, ws_net_profit#17, wr_refunded_cdemo_sk#24, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] - : : : : : +- *(8) BroadcastHashJoin [ws_web_page_sk#27], [wp_web_page_sk#28], Inner, BuildRight - : : : : : :- *(8) Project [ws_sold_date_sk#12, ws_web_page_sk#27, ws_quantity#7, ws_sales_price#26, ws_net_profit#17, wr_refunded_cdemo_sk#24, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] - : : : : : : +- *(8) BroadcastHashJoin [cast(ws_item_sk#29 as bigint), cast(ws_order_number#30 as bigint)], [wr_item_sk#31, wr_order_number#32], Inner, BuildRight - : : : : : : :- *(8) Project [ws_sold_date_sk#12, ws_item_sk#29, ws_web_page_sk#27, ws_order_number#30, ws_quantity#7, ws_sales_price#26, ws_net_profit#17] - : : : : : : : +- *(8) Filter (((isnotnull(ws_item_sk#29) && isnotnull(ws_order_number#30)) && isnotnull(ws_web_page_sk#27)) && isnotnull(ws_sold_date_sk#12)) - : : : : : : : +- *(8) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#29,ws_web_page_sk#27,ws_order_number#30,ws_quantity#7,ws_sales_price#26,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(3) Project [cd_demo_sk#25, cd_marital_status#19, cd_education_status#20] - : : : : +- *(3) Filter ((isnotnull(cd_demo_sk#25) && isnotnull(cd_education_status#20)) && isnotnull(cd_marital_status#19)) - : : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, string, true], input[2, string, true])) - : : : +- *(4) Project [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] - : : : +- *(4) Filter ((isnotnull(cd_education_status#23) && isnotnull(cd_marital_status#22)) && isnotnull(cd_demo_sk#21)) - : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_education_status), IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [ca_address_sk#15, ca_state#16] - : : +- *(5) Filter ((isnotnull(ca_country#33) && (ca_country#33 = United States)) && isnotnull(ca_address_sk#15)) - : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [d_date_sk#13] - : +- *(6) Filter ((isnotnull(d_year#34) && (d_year#34 = 2000)) && isnotnull(d_date_sk#13)) - : +- *(6) FileScan parquet default.date_dim[d_date_sk#13,d_year#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [r_reason_sk#11, r_reason_desc#6] - +- *(7) Filter isnotnull(r_reason_sk#11) - +- *(7) FileScan parquet default.reason[r_reason_sk#11,r_reason_desc#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt deleted file mode 100644 index 86a427106..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt +++ /dev/null @@ -1,66 +0,0 @@ -TakeOrderedAndProject [aggOrder,avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),substring(r_reason_desc, 1, 20)] - WholeStageCodegen - HashAggregate [avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),avg(cast(ws_quantity as bigint)),count,count,count,r_reason_desc,sum,sum,sum] [aggOrder,avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),avg(cast(ws_quantity as bigint)),avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),count,count,count,substring(r_reason_desc, 1, 20),sum,sum,sum] - InputAdapter - Exchange [r_reason_desc] #1 - WholeStageCodegen - HashAggregate [count,count,count,count,count,count,r_reason_desc,sum,sum,sum,sum,sum,sum,wr_fee,wr_refunded_cash,ws_quantity] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum] - Project [r_reason_desc,wr_fee,wr_refunded_cash,ws_quantity] - BroadcastHashJoin [r_reason_sk,wr_reason_sk] - Project [wr_fee,wr_reason_sk,wr_refunded_cash,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [wr_fee,wr_reason_sk,wr_refunded_cash,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [ca_address_sk,ca_state,wr_refunded_addr_sk,ws_net_profit] - Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,ws_net_profit,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_education_status,cd_marital_status,cd_marital_status,wr_returning_cdemo_sk] - Project [cd_education_status,cd_marital_status,wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,wr_refunded_cdemo_sk,ws_sales_price] - Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price,ws_sold_date_sk] - BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] - Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] - Filter [ws_item_sk,ws_order_number,ws_sold_date_sk,ws_web_page_sk] - Scan parquet default.web_sales [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] - Filter [wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] - Scan parquet default.web_returns [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [wp_web_page_sk] - Filter [wp_web_page_sk] - Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [cd_demo_sk,cd_education_status,cd_marital_status] - Filter [cd_demo_sk,cd_education_status,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [cd_demo_sk,cd_education_status,cd_marital_status] - Filter [cd_demo_sk,cd_education_status,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [ca_address_sk,ca_state] - Filter [ca_address_sk,ca_country] - Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [r_reason_desc,r_reason_sk] - Filter [r_reason_sk] - Scan parquet default.reason [r_reason_desc,r_reason_sk] [r_reason_desc,r_reason_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt deleted file mode 100644 index 61285df23..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt +++ /dev/null @@ -1,25 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN i_category#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[total_sum#4,i_category#2,i_class#5,lochierarchy#1,rank_within_parent#3]) -+- *(6) Project [total_sum#4, i_category#2, i_class#5, lochierarchy#1, rank_within_parent#3] - +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] - +- *(5) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 - +- Exchange hashpartitioning(_w1#7, _w2#8, 5) - +- *(4) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ws_net_paid#10))]) - +- Exchange hashpartitioning(i_category#2, i_class#5, spark_grouping_id#9, 5) - +- *(3) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ws_net_paid#10))]) - +- *(3) Expand [List(ws_net_paid#10, i_category#11, i_class#12, 0), List(ws_net_paid#10, i_category#11, null, 1), List(ws_net_paid#10, null, null, 3)], [ws_net_paid#10, i_category#2, i_class#5, spark_grouping_id#9] - +- *(3) Project [ws_net_paid#10, i_category#13 AS i_category#11, i_class#14 AS i_class#12] - +- *(3) BroadcastHashJoin [ws_item_sk#15], [i_item_sk#16], Inner, BuildRight - :- *(3) Project [ws_item_sk#15, ws_net_paid#10] - : +- *(3) BroadcastHashJoin [ws_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : :- *(3) Project [ws_sold_date_sk#17, ws_item_sk#15, ws_net_paid#10] - : : +- *(3) Filter (isnotnull(ws_sold_date_sk#17) && isnotnull(ws_item_sk#15)) - : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#17,ws_item_sk#15,ws_net_paid#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [d_date_sk#18] - : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [i_item_sk#16, i_class#14, i_category#13] - +- *(2) Filter isnotnull(i_item_sk#16) - +- *(2) FileScan parquet default.item[i_item_sk#16,i_class#14,i_category#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt deleted file mode 100644 index 5ec040bda..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt +++ /dev/null @@ -1,35 +0,0 @@ -TakeOrderedAndProject [i_category,i_class,lochierarchy,rank_within_parent,total_sum] - WholeStageCodegen - Project [i_category,i_class,lochierarchy,rank_within_parent,total_sum] - InputAdapter - Window [_w1,_w2,_w3] - WholeStageCodegen - Sort [_w1,_w2,_w3] - InputAdapter - Exchange [_w1,_w2] #1 - WholeStageCodegen - HashAggregate [i_category,i_class,spark_grouping_id,sum,sum(UnscaledValue(ws_net_paid))] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ws_net_paid)),total_sum] - InputAdapter - Exchange [i_category,i_class,spark_grouping_id] #2 - WholeStageCodegen - HashAggregate [i_category,i_class,spark_grouping_id,sum,sum,ws_net_paid] [sum,sum] - Expand [i_category,i_class,ws_net_paid] - Project [i_category,i_class,ws_net_paid] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_item_sk,ws_net_paid] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_item_sk,ws_net_paid,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [i_category,i_class,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt deleted file mode 100644 index c47ee7616..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt +++ /dev/null @@ -1,54 +0,0 @@ -== Physical Plan == -*(13) HashAggregate(keys=[], functions=[count(1)]) -+- Exchange SinglePartition - +- *(12) HashAggregate(keys=[], functions=[partial_count(1)]) - +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#4, ), coalesce(c_first_name#5, ), coalesce(d_date#6, 0)], LeftAnti, BuildRight, (((c_last_name#1 <=> c_last_name#4) && (c_first_name#2 <=> c_first_name#5)) && (d_date#3 <=> d_date#6)) - :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftAnti, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) - : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 5) - : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] - : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight - : : :- *(3) Project [ss_customer_sk#10, d_date#3] - : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] - : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#13, d_date#3] - : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] - : : +- *(2) Filter isnotnull(c_customer_sk#11) - : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) - : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) - : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 5) - : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) - : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] - : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight - : :- *(6) Project [cs_bill_customer_sk#15, d_date#9] - : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] - : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) - +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) - +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 5) - +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) - +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] - +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - :- *(10) Project [ws_bill_customer_sk#19, d_date#6] - : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight - : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] - : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) - : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt deleted file mode 100644 index 1a7672916..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt +++ /dev/null @@ -1,74 +0,0 @@ -WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] - HashAggregate [c_first_name,c_last_name,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #2 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [d_date,ss_customer_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #6 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_customer_sk,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #8 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [d_date,ws_bill_customer_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_customer_sk,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt deleted file mode 100644 index d729f0b3f..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt +++ /dev/null @@ -1,165 +0,0 @@ -== Physical Plan == -BroadcastNestedLoopJoin BuildRight, Inner -:- BroadcastNestedLoopJoin BuildRight, Inner -: :- BroadcastNestedLoopJoin BuildRight, Inner -: : :- BroadcastNestedLoopJoin BuildRight, Inner -: : : :- BroadcastNestedLoopJoin BuildRight, Inner -: : : : :- BroadcastNestedLoopJoin BuildRight, Inner -: : : : : :- BroadcastNestedLoopJoin BuildRight, Inner -: : : : : : :- *(5) HashAggregate(keys=[], functions=[count(1)]) -: : : : : : : +- Exchange SinglePartition -: : : : : : : +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) -: : : : : : : +- *(4) Project -: : : : : : : +- *(4) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : : : : : : :- *(4) Project [ss_store_sk#1] -: : : : : : : : +- *(4) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : : : : : : :- *(4) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : : : : : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : : : : : : :- *(4) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : : : : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : : : : +- *(1) Project [hd_demo_sk#6] -: : : : : : : : : +- *(1) Filter (((((hd_dep_count#7 = 4) && (hd_vehicle_count#8 <= 6)) || ((hd_dep_count#7 = 2) && (hd_vehicle_count#8 <= 4))) || ((hd_dep_count#7 = 0) && (hd_vehicle_count#8 <= 2))) && isnotnull(hd_demo_sk#6)) -: : : : : : : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#6,hd_dep_count#7,hd_vehicle_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,..., ReadSchema: struct -: : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : : : +- *(2) Project [t_time_sk#4] -: : : : : : : : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 8)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : : : : : : : +- *(2) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct -: : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : : +- *(3) Project [s_store_sk#2] -: : : : : : : +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#2)) -: : : : : : : +- *(3) FileScan parquet default.store[s_store_sk#2,s_store_name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct -: : : : : : +- BroadcastExchange IdentityBroadcastMode -: : : : : : +- *(10) HashAggregate(keys=[], functions=[count(1)]) -: : : : : : +- Exchange SinglePartition -: : : : : : +- *(9) HashAggregate(keys=[], functions=[partial_count(1)]) -: : : : : : +- *(9) Project -: : : : : : +- *(9) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : : : : : :- *(9) Project [ss_store_sk#1] -: : : : : : : +- *(9) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : : : : : :- *(9) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : : : : : +- *(9) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : : : : : :- *(9) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : : : : : +- *(9) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : : : +- *(9) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : : +- *(7) Project [t_time_sk#4] -: : : : : : : +- *(7) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) -: : : : : : : +- *(7) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_ti..., ReadSchema: struct -: : : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : +- BroadcastExchange IdentityBroadcastMode -: : : : : +- *(15) HashAggregate(keys=[], functions=[count(1)]) -: : : : : +- Exchange SinglePartition -: : : : : +- *(14) HashAggregate(keys=[], functions=[partial_count(1)]) -: : : : : +- *(14) Project -: : : : : +- *(14) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : : : : :- *(14) Project [ss_store_sk#1] -: : : : : : +- *(14) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : : : : :- *(14) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : : : : +- *(14) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : : : : :- *(14) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : : : : +- *(14) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : +- *(12) Project [t_time_sk#4] -: : : : : : +- *(12) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : : : : : +- *(12) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct -: : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : +- BroadcastExchange IdentityBroadcastMode -: : : : +- *(20) HashAggregate(keys=[], functions=[count(1)]) -: : : : +- Exchange SinglePartition -: : : : +- *(19) HashAggregate(keys=[], functions=[partial_count(1)]) -: : : : +- *(19) Project -: : : : +- *(19) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : : : :- *(19) Project [ss_store_sk#1] -: : : : : +- *(19) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : : : :- *(19) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : : : +- *(19) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : : : :- *(19) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : : : +- *(19) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : +- *(19) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : +- *(17) Project [t_time_sk#4] -: : : : : +- *(17) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) -: : : : : +- *(17) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct -: : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : +- BroadcastExchange IdentityBroadcastMode -: : : +- *(25) HashAggregate(keys=[], functions=[count(1)]) -: : : +- Exchange SinglePartition -: : : +- *(24) HashAggregate(keys=[], functions=[partial_count(1)]) -: : : +- *(24) Project -: : : +- *(24) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : : :- *(24) Project [ss_store_sk#1] -: : : : +- *(24) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : : :- *(24) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : : +- *(24) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : : :- *(24) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : : +- *(24) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : +- *(22) Project [t_time_sk#4] -: : : : +- *(22) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : : : +- *(22) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct -: : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : +- BroadcastExchange IdentityBroadcastMode -: : +- *(30) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(29) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(29) Project -: : +- *(29) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : :- *(29) Project [ss_store_sk#1] -: : : +- *(29) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : :- *(29) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : +- *(29) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : :- *(29) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : +- *(29) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : +- *(29) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : +- *(27) Project [t_time_sk#4] -: : : +- *(27) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) -: : : +- *(27) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct -: : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: +- BroadcastExchange IdentityBroadcastMode -: +- *(35) HashAggregate(keys=[], functions=[count(1)]) -: +- Exchange SinglePartition -: +- *(34) HashAggregate(keys=[], functions=[partial_count(1)]) -: +- *(34) Project -: +- *(34) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: :- *(34) Project [ss_store_sk#1] -: : +- *(34) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : :- *(34) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : +- *(34) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : :- *(34) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : +- *(34) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : +- *(34) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : +- *(32) Project [t_time_sk#4] -: : +- *(32) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : +- *(32) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct -: +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -+- BroadcastExchange IdentityBroadcastMode - +- *(40) HashAggregate(keys=[], functions=[count(1)]) - +- Exchange SinglePartition - +- *(39) HashAggregate(keys=[], functions=[partial_count(1)]) - +- *(39) Project - +- *(39) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight - :- *(39) Project [ss_store_sk#1] - : +- *(39) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight - : :- *(39) Project [ss_sold_time_sk#3, ss_store_sk#1] - : : +- *(39) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight - : : :- *(39) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] - : : : +- *(39) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) - : : : +- *(39) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(37) Project [t_time_sk#4] - : +- *(37) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 12)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) - : +- *(37) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct - +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt deleted file mode 100644 index 77bbef312..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt +++ /dev/null @@ -1,222 +0,0 @@ -BroadcastNestedLoopJoin - BroadcastNestedLoopJoin - BroadcastNestedLoopJoin - BroadcastNestedLoopJoin - BroadcastNestedLoopJoin - BroadcastNestedLoopJoin - BroadcastNestedLoopJoin - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h8_30_to_9] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_store_sk] - BroadcastHashJoin [ss_sold_time_sk,t_time_sk] - Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [t_time_sk] - Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_name,s_store_sk] - Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] - BroadcastExchange #5 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h9_to_9_30] - InputAdapter - Exchange #6 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_store_sk] - BroadcastHashJoin [ss_sold_time_sk,t_time_sk] - Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [t_time_sk] - Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] - InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 - BroadcastExchange #8 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h9_30_to_10] - InputAdapter - Exchange #9 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_store_sk] - BroadcastHashJoin [ss_sold_time_sk,t_time_sk] - Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [t_time_sk] - Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] - InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 - BroadcastExchange #11 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h10_to_10_30] - InputAdapter - Exchange #12 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_store_sk] - BroadcastHashJoin [ss_sold_time_sk,t_time_sk] - Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 - InputAdapter - BroadcastExchange #13 - WholeStageCodegen - Project [t_time_sk] - Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] - InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 - BroadcastExchange #14 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h10_30_to_11] - InputAdapter - Exchange #15 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_store_sk] - BroadcastHashJoin [ss_sold_time_sk,t_time_sk] - Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 - InputAdapter - BroadcastExchange #16 - WholeStageCodegen - Project [t_time_sk] - Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] - InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 - BroadcastExchange #17 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h11_to_11_30] - InputAdapter - Exchange #18 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_store_sk] - BroadcastHashJoin [ss_sold_time_sk,t_time_sk] - Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 - InputAdapter - BroadcastExchange #19 - WholeStageCodegen - Project [t_time_sk] - Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] - InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 - BroadcastExchange #20 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h11_30_to_12] - InputAdapter - Exchange #21 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_store_sk] - BroadcastHashJoin [ss_sold_time_sk,t_time_sk] - Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 - InputAdapter - BroadcastExchange #22 - WholeStageCodegen - Project [t_time_sk] - Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] - InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 - BroadcastExchange #23 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h12_to_12_30] - InputAdapter - Exchange #24 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_store_sk] - BroadcastHashJoin [ss_sold_time_sk,t_time_sk] - Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 - InputAdapter - BroadcastExchange #25 - WholeStageCodegen - Project [t_time_sk] - Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] - InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt deleted file mode 100644 index fb0d68dce..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt +++ /dev/null @@ -1,31 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_class#5,i_brand#6,s_store_name#3,s_company_name#7,d_moy#8,sum_sales#1,avg_monthly_sales#2]) -+- *(7) Project [i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, sum_sales#1, avg_monthly_sales#2] - +- *(7) Filter (CASE WHEN NOT (avg_monthly_sales#2 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) - +- Window [avg(_w0#9) windowspecdefinition(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#6, s_store_name#3, s_company_name#7] - +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#6 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#7 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, 5) - +- *(5) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#10))]) - +- Exchange hashpartitioning(i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, 5) - +- *(4) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#10))]) - +- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_sales_price#10, d_moy#8, s_store_name#3, s_company_name#7] - +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight - :- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_store_sk#11, ss_sales_price#10, d_moy#8] - : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight - : :- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_sold_date_sk#13, ss_store_sk#11, ss_sales_price#10] - : : +- *(4) BroadcastHashJoin [i_item_sk#15], [ss_item_sk#16], Inner, BuildRight - : : :- *(4) Project [i_item_sk#15, i_brand#6, i_class#5, i_category#4] - : : : +- *(4) Filter (((i_category#4 IN (Books,Electronics,Sports) && i_class#5 IN (computers,stereo,football)) || (i_category#4 IN (Men,Jewelry,Women) && i_class#5 IN (shirts,birdal,dresses))) && isnotnull(i_item_sk#15)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#15,i_brand#6,i_class#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(In(i_category, [Books,Electronics,Sports]),In(i_class, [computers,stereo,football])),And(..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- *(1) Project [ss_sold_date_sk#13, ss_item_sk#16, ss_store_sk#11, ss_sales_price#10] - : : +- *(1) Filter ((isnotnull(ss_item_sk#16) && isnotnull(ss_sold_date_sk#13)) && isnotnull(ss_store_sk#11)) - : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#16,ss_store_sk#11,ss_sales_price#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#14, d_moy#8] - : +- *(2) Filter ((isnotnull(d_year#17) && (d_year#17 = 1999)) && isnotnull(d_date_sk#14)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#14,d_year#17,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [s_store_sk#12, s_store_name#3, s_company_name#7] - +- *(3) Filter isnotnull(s_store_sk#12) - +- *(3) FileScan parquet default.store[s_store_sk#12,s_store_name#3,s_company_name#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt deleted file mode 100644 index 33e1ef0af..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt +++ /dev/null @@ -1,43 +0,0 @@ -TakeOrderedAndProject [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] - WholeStageCodegen - Project [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] - Filter [avg_monthly_sales,sum_sales] - InputAdapter - Window [_w0,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Sort [i_brand,i_category,s_company_name,s_store_name] - InputAdapter - Exchange [i_brand,i_category,s_company_name,s_store_name] #1 - WholeStageCodegen - HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] - InputAdapter - Exchange [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name] #2 - WholeStageCodegen - HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] - Project [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_moy,i_brand,i_category,i_class,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand,i_category,i_class,ss_sales_price,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_brand,i_category,i_class,i_item_sk] - Filter [i_category,i_class,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_class,i_item_sk] [i_brand,i_category,i_class,i_item_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk,d_moy] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [s_company_name,s_store_name,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt deleted file mode 100644 index 0ed6b1727..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt +++ /dev/null @@ -1,109 +0,0 @@ -== Physical Plan == -*(1) Project [CASE WHEN (Subquery subquery1150 > 62316685) THEN Subquery subquery1151 ELSE Subquery subquery1152 END AS bucket1#1, CASE WHEN (Subquery subquery1154 > 19045798) THEN Subquery subquery1155 ELSE Subquery subquery1156 END AS bucket2#2, CASE WHEN (Subquery subquery1158 > 365541424) THEN Subquery subquery1159 ELSE Subquery subquery1160 END AS bucket3#3, CASE WHEN (Subquery subquery1162 > 216357808) THEN Subquery subquery1163 ELSE Subquery subquery1164 END AS bucket4#4, CASE WHEN (Subquery subquery1166 > 184483884) THEN Subquery subquery1167 ELSE Subquery subquery1168 END AS bucket5#5] -: :- Subquery subquery1150 -: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(1) Project -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery1151 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- *(1) Project [ss_ext_discount_amt#7] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery1152 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) -: : +- *(1) Project [ss_net_paid#8] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery1154 -: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(1) Project -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery1155 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- *(1) Project [ss_ext_discount_amt#7] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery1156 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) -: : +- *(1) Project [ss_net_paid#8] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery1158 -: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(1) Project -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery1159 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- *(1) Project [ss_ext_discount_amt#7] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery1160 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) -: : +- *(1) Project [ss_net_paid#8] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery1162 -: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(1) Project -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery1163 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- *(1) Project [ss_ext_discount_amt#7] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery1164 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) -: : +- *(1) Project [ss_net_paid#8] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery1166 -: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(1) Project -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct -: :- Subquery subquery1167 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- *(1) Project [ss_ext_discount_amt#7] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct -: +- Subquery subquery1168 -: +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) -: +- Exchange SinglePartition -: +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) -: +- *(1) Project [ss_net_paid#8] -: +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) -: +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct -+- *(1) Filter (isnotnull(r_reason_sk#9) && (r_reason_sk#9 = 1)) - +- *(1) FileScan parquet default.reason[r_reason_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt deleted file mode 100644 index f0bccc7ec..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt +++ /dev/null @@ -1,154 +0,0 @@ -WholeStageCodegen - Project - Subquery #1 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity] [ss_quantity] - Subquery #2 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] - InputAdapter - Exchange #2 - WholeStageCodegen - HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] - Subquery #3 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] - InputAdapter - Exchange #3 - WholeStageCodegen - HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] - Project [ss_net_paid] - Filter [ss_quantity] - Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] - Subquery #4 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] - InputAdapter - Exchange #4 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity] [ss_quantity] - Subquery #5 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] - InputAdapter - Exchange #5 - WholeStageCodegen - HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] - Subquery #6 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] - InputAdapter - Exchange #6 - WholeStageCodegen - HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] - Project [ss_net_paid] - Filter [ss_quantity] - Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] - Subquery #7 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] - InputAdapter - Exchange #7 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity] [ss_quantity] - Subquery #8 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] - InputAdapter - Exchange #8 - WholeStageCodegen - HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] - Subquery #9 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] - InputAdapter - Exchange #9 - WholeStageCodegen - HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] - Project [ss_net_paid] - Filter [ss_quantity] - Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] - Subquery #10 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] - InputAdapter - Exchange #10 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity] [ss_quantity] - Subquery #11 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] - InputAdapter - Exchange #11 - WholeStageCodegen - HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] - Subquery #12 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] - InputAdapter - Exchange #12 - WholeStageCodegen - HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] - Project [ss_net_paid] - Filter [ss_quantity] - Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] - Subquery #13 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] - InputAdapter - Exchange #13 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity] [ss_quantity] - Subquery #14 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] - InputAdapter - Exchange #14 - WholeStageCodegen - HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] - Subquery #15 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] - InputAdapter - Exchange #15 - WholeStageCodegen - HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] - Project [ss_net_paid] - Filter [ss_quantity] - Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] - Filter [r_reason_sk] - Scan parquet default.reason [r_reason_sk] [r_reason_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt deleted file mode 100644 index 0d0c65f33..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt +++ /dev/null @@ -1,47 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[am_pm_ratio#1 ASC NULLS FIRST], output=[am_pm_ratio#1]) -+- *(11) Project [CheckOverflow((promote_precision(cast(amc#2 as decimal(15,4))) / promote_precision(cast(pmc#3 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#1] - +- BroadcastNestedLoopJoin BuildRight, Inner - :- *(5) HashAggregate(keys=[], functions=[count(1)]) - : +- Exchange SinglePartition - : +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) - : +- *(4) Project - : +- *(4) BroadcastHashJoin [ws_web_page_sk#4], [wp_web_page_sk#5], Inner, BuildRight - : :- *(4) Project [ws_web_page_sk#4] - : : +- *(4) BroadcastHashJoin [ws_sold_time_sk#6], [t_time_sk#7], Inner, BuildRight - : : :- *(4) Project [ws_sold_time_sk#6, ws_web_page_sk#4] - : : : +- *(4) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight - : : : :- *(4) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] - : : : : +- *(4) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [hd_demo_sk#9] - : : : +- *(1) Filter ((isnotnull(hd_dep_count#10) && (hd_dep_count#10 = 6)) && isnotnull(hd_demo_sk#9)) - : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_dep_count#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [t_time_sk#7] - : : +- *(2) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 8)) && (t_hour#11 <= 9)) && isnotnull(t_time_sk#7)) - : : +- *(2) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [wp_web_page_sk#5] - : +- *(3) Filter (((isnotnull(wp_char_count#12) && (wp_char_count#12 >= 5000)) && (wp_char_count#12 <= 5200)) && isnotnull(wp_web_page_sk#5)) - : +- *(3) FileScan parquet default.web_page[wp_web_page_sk#5,wp_char_count#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,..., ReadSchema: struct - +- BroadcastExchange IdentityBroadcastMode - +- *(10) HashAggregate(keys=[], functions=[count(1)]) - +- Exchange SinglePartition - +- *(9) HashAggregate(keys=[], functions=[partial_count(1)]) - +- *(9) Project - +- *(9) BroadcastHashJoin [ws_web_page_sk#4], [wp_web_page_sk#5], Inner, BuildRight - :- *(9) Project [ws_web_page_sk#4] - : +- *(9) BroadcastHashJoin [ws_sold_time_sk#6], [t_time_sk#7], Inner, BuildRight - : :- *(9) Project [ws_sold_time_sk#6, ws_web_page_sk#4] - : : +- *(9) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight - : : :- *(9) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] - : : : +- *(9) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) - : : : +- *(9) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct - : : +- ReusedExchange [hd_demo_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [t_time_sk#7] - : +- *(7) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 19)) && (t_hour#11 <= 20)) && isnotnull(t_time_sk#7)) - : +- *(7) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)], ReadSchema: struct - +- ReusedExchange [wp_web_page_sk#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt deleted file mode 100644 index 1d0dc7ad2..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt +++ /dev/null @@ -1,64 +0,0 @@ -TakeOrderedAndProject [am_pm_ratio] - WholeStageCodegen - Project [amc,pmc] - InputAdapter - BroadcastNestedLoopJoin - WholeStageCodegen - HashAggregate [count,count(1)] [amc,count,count(1)] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] - Project [ws_web_page_sk] - BroadcastHashJoin [t_time_sk,ws_sold_time_sk] - Project [ws_sold_time_sk,ws_web_page_sk] - BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] - Project [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [t_time_sk] - Filter [t_hour,t_time_sk] - Scan parquet default.time_dim [t_hour,t_time_sk] [t_hour,t_time_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [wp_web_page_sk] - Filter [wp_char_count,wp_web_page_sk] - Scan parquet default.web_page [wp_char_count,wp_web_page_sk] [wp_char_count,wp_web_page_sk] - BroadcastExchange #5 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),pmc] - InputAdapter - Exchange #6 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] - Project [ws_web_page_sk] - BroadcastHashJoin [t_time_sk,ws_sold_time_sk] - Project [ws_sold_time_sk,ws_web_page_sk] - BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] - Project [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [t_time_sk] - Filter [t_hour,t_time_sk] - Scan parquet default.time_dim [t_hour,t_time_sk] [t_hour,t_time_sk] - InputAdapter - ReusedExchange [wp_web_page_sk] [wp_web_page_sk] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt deleted file mode 100644 index fd5605d2e..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt +++ /dev/null @@ -1,45 +0,0 @@ -== Physical Plan == -*(9) Sort [Returns_Loss#1 DESC NULLS LAST], true, 0 -+- Exchange rangepartitioning(Returns_Loss#1 DESC NULLS LAST, 5) - +- *(8) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[sum(UnscaledValue(cr_net_loss#7))]) - +- Exchange hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6, 5) - +- *(7) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[partial_sum(UnscaledValue(cr_net_loss#7))]) - +- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#5, cd_education_status#6] - +- *(7) BroadcastHashJoin [c_current_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight - :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#8, cd_marital_status#5, cd_education_status#6] - : +- *(7) BroadcastHashJoin [c_current_cdemo_sk#10], [cd_demo_sk#11], Inner, BuildRight - : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#10, c_current_hdemo_sk#8] - : : +- *(7) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight - : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#10, c_current_hdemo_sk#8, c_current_addr_sk#12] - : : : +- *(7) BroadcastHashJoin [cr_returning_customer_sk#14], [c_customer_sk#15], Inner, BuildRight - : : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#14, cr_net_loss#7] - : : : : +- *(7) BroadcastHashJoin [cr_returned_date_sk#16], [d_date_sk#17], Inner, BuildRight - : : : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#16, cr_returning_customer_sk#14, cr_net_loss#7] - : : : : : +- *(7) BroadcastHashJoin [cc_call_center_sk#18], [cr_call_center_sk#19], Inner, BuildRight - : : : : : :- *(7) Project [cc_call_center_sk#18, cc_call_center_id#2, cc_name#3, cc_manager#4] - : : : : : : +- *(7) Filter isnotnull(cc_call_center_sk#18) - : : : : : : +- *(7) FileScan parquet default.call_center[cc_call_center_sk#18,cc_call_center_id#2,cc_name#3,cc_manager#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) - : : : : : +- *(1) Project [cr_returned_date_sk#16, cr_returning_customer_sk#14, cr_call_center_sk#19, cr_net_loss#7] - : : : : : +- *(1) Filter ((isnotnull(cr_call_center_sk#19) && isnotnull(cr_returned_date_sk#16)) && isnotnull(cr_returning_customer_sk#14)) - : : : : : +- *(1) FileScan parquet default.catalog_returns[cr_returned_date_sk#16,cr_returning_customer_sk#14,cr_call_center_sk#19,cr_net_loss#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [c_customer_sk#15, c_current_cdemo_sk#10, c_current_hdemo_sk#8, c_current_addr_sk#12] - : : : +- *(3) Filter (((isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) && isnotnull(c_current_cdemo_sk#10)) && isnotnull(c_current_hdemo_sk#8)) - : : : +- *(3) FileScan parquet default.customer[c_customer_sk#15,c_current_cdemo_sk#10,c_current_hdemo_sk#8,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [ca_address_sk#13] - : : +- *(4) Filter ((isnotnull(ca_gmt_offset#22) && (ca_gmt_offset#22 = -7.00)) && isnotnull(ca_address_sk#13)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#13,ca_gmt_offset#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(5) Project [cd_demo_sk#11, cd_marital_status#5, cd_education_status#6] - : +- *(5) Filter ((((cd_marital_status#5 = M) && (cd_education_status#6 = Unknown)) || ((cd_marital_status#5 = W) && (cd_education_status#6 = Advanced Degree))) && isnotnull(cd_demo_sk#11)) - : +- *(5) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#5,cd_education_status#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(6) Project [hd_demo_sk#9] - +- *(6) Filter ((isnotnull(hd_buy_potential#23) && StartsWith(hd_buy_potential#23, Unknown)) && isnotnull(hd_demo_sk#9)) - +- *(6) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_buy_potential#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt deleted file mode 100644 index 1030b9095..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt +++ /dev/null @@ -1,61 +0,0 @@ -WholeStageCodegen - Sort [Returns_Loss] - InputAdapter - Exchange [Returns_Loss] #1 - WholeStageCodegen - HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,sum,sum(UnscaledValue(cr_net_loss))] [Call_Center,Call_Center_Name,Manager,Returns_Loss,sum,sum(UnscaledValue(cr_net_loss))] - InputAdapter - Exchange [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status] #2 - WholeStageCodegen - HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss,sum,sum] [sum,sum] - Project [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] - BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] - BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cr_net_loss] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cr_net_loss] - BroadcastHashJoin [c_customer_sk,cr_returning_customer_sk] - Project [cc_call_center_id,cc_manager,cc_name,cr_net_loss,cr_returning_customer_sk] - BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cc_call_center_id,cc_manager,cc_name,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] - BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] - Project [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] - Filter [cc_call_center_sk] - Scan parquet default.call_center [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] - Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] - Scan parquet default.catalog_returns [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] - Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [ca_address_sk] - Filter [ca_address_sk,ca_gmt_offset] - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [cd_demo_sk,cd_education_status,cd_marital_status] - Filter [cd_demo_sk,cd_education_status,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [hd_demo_sk] - Filter [hd_buy_potential,hd_demo_sk] - Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] [hd_buy_potential,hd_demo_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt deleted file mode 100644 index c27934561..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt +++ /dev/null @@ -1,33 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[Excess Discount Amount #1 ASC NULLS FIRST], output=[Excess Discount Amount #1]) -+- *(7) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_discount_amt#2))]) - +- Exchange SinglePartition - +- *(6) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ws_ext_discount_amt#2))]) - +- *(6) Project [ws_ext_discount_amt#2] - +- *(6) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight - :- *(6) Project [ws_sold_date_sk#3, ws_ext_discount_amt#2] - : +- *(6) BroadcastHashJoin [i_item_sk#5], [ws_item_sk#6#7], Inner, BuildRight, (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) - : :- *(6) Project [ws_sold_date_sk#3, ws_ext_discount_amt#2, i_item_sk#5] - : : +- *(6) BroadcastHashJoin [ws_item_sk#6], [i_item_sk#5], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] - : : : +- *(6) Filter ((isnotnull(ws_item_sk#6) && isnotnull(ws_ext_discount_amt#2)) && isnotnull(ws_sold_date_sk#3)) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [i_item_sk#5] - : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 350)) && isnotnull(i_item_sk#5)) - : : +- *(1) FileScan parquet default.item[i_item_sk#5,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) - : +- *(4) HashAggregate(keys=[ws_item_sk#6], functions=[avg(UnscaledValue(ws_ext_discount_amt#2))]) - : +- Exchange hashpartitioning(ws_item_sk#6, 5) - : +- *(3) HashAggregate(keys=[ws_item_sk#6], functions=[partial_avg(UnscaledValue(ws_ext_discount_amt#2))]) - : +- *(3) Project [ws_item_sk#6, ws_ext_discount_amt#2] - : +- *(3) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight - : :- *(3) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] - : : +- *(3) Filter (isnotnull(ws_sold_date_sk#3) && isnotnull(ws_item_sk#6)) - : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#4] - : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#4)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#4,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt deleted file mode 100644 index cab3234b8..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt +++ /dev/null @@ -1,44 +0,0 @@ -TakeOrderedAndProject [Excess Discount Amount ] - WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ws_ext_discount_amt))] [Excess Discount Amount ,sum,sum(UnscaledValue(ws_ext_discount_amt))] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [sum,sum,ws_ext_discount_amt] [sum,sum] - Project [ws_ext_discount_amt] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_ext_discount_amt,ws_sold_date_sk] - BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),i_item_sk,ws_ext_discount_amt,ws_item_sk] - Project [i_item_sk,ws_ext_discount_amt,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] - Filter [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [i_item_sk] - Filter [i_item_sk,i_manufact_id] - Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [avg(UnscaledValue(ws_ext_discount_amt)),count,sum,ws_item_sk] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(ws_ext_discount_amt)),count,sum,ws_item_sk] - InputAdapter - Exchange [ws_item_sk] #4 - WholeStageCodegen - HashAggregate [count,count,sum,sum,ws_ext_discount_amt,ws_item_sk] [count,count,sum,sum] - Project [ws_ext_discount_amt,ws_item_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt deleted file mode 100644 index 5cbc9a6c1..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt +++ /dev/null @@ -1,18 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[sumsales#1 ASC NULLS FIRST,ss_customer_sk#2 ASC NULLS FIRST], output=[ss_customer_sk#2,sumsales#1]) -+- *(4) HashAggregate(keys=[ss_customer_sk#2], functions=[sum(act_sales#3)]) - +- Exchange hashpartitioning(ss_customer_sk#2, 5) - +- *(3) HashAggregate(keys=[ss_customer_sk#2], functions=[partial_sum(act_sales#3)]) - +- *(3) Project [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#5 - sr_return_quantity#4) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#5 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#3] - +- *(3) BroadcastHashJoin [sr_reason_sk#7], [cast(r_reason_sk#8 as bigint)], Inner, BuildRight - :- *(3) Project [ss_customer_sk#2, ss_quantity#5, ss_sales_price#6, sr_reason_sk#7, sr_return_quantity#4] - : +- *(3) BroadcastHashJoin [cast(ss_item_sk#9 as bigint), cast(ss_ticket_number#10 as bigint)], [sr_item_sk#11, sr_ticket_number#12], Inner, BuildRight - : :- *(3) FileScan parquet default.store_sales[ss_item_sk#9,ss_customer_sk#2,ss_ticket_number#10,ss_quantity#5,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [r_reason_sk#8] - +- *(2) Filter ((isnotnull(r_reason_desc#13) && (r_reason_desc#13 = reason 28)) && isnotnull(r_reason_sk#8)) - +- *(2) FileScan parquet default.reason[r_reason_sk#8,r_reason_desc#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28), IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt deleted file mode 100644 index 6b2505ff0..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt +++ /dev/null @@ -1,24 +0,0 @@ -TakeOrderedAndProject [ss_customer_sk,sumsales] - WholeStageCodegen - HashAggregate [ss_customer_sk,sum,sum(act_sales)] [sum,sum(act_sales),sumsales] - InputAdapter - Exchange [ss_customer_sk] #1 - WholeStageCodegen - HashAggregate [act_sales,ss_customer_sk,sum,sum] [sum,sum] - Project [sr_return_quantity,ss_customer_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [r_reason_sk,sr_reason_sk] - Project [sr_reason_sk,sr_return_quantity,ss_customer_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_ticket_number] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] - Filter [sr_item_sk,sr_reason_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [r_reason_sk] - Filter [r_reason_desc,r_reason_sk] - Scan parquet default.reason [r_reason_desc,r_reason_sk] [r_reason_desc,r_reason_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt deleted file mode 100644 index 2e6006488..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt +++ /dev/null @@ -1,37 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) -+- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_ship_cost#4)), sum(UnscaledValue(ws_net_profit#5)), count(distinct ws_order_number#6)]) - +- Exchange SinglePartition - +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) - +- *(7) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) - +- Exchange hashpartitioning(ws_order_number#6, 5) - +- *(6) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) - +- *(6) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - +- *(6) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight - :- *(6) Project [ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : +- *(6) BroadcastHashJoin [ws_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight - : :- *(6) Project [ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : : +- *(6) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight - : : :- *(6) BroadcastHashJoin [cast(ws_order_number#6 as bigint)], [wr_order_number#13], LeftAnti, BuildRight - : : : :- *(6) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : : : : +- *(6) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight, NOT (ws_warehouse_sk#15 = ws_warehouse_sk#15#16) - : : : : :- *(6) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_warehouse_sk#15, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : : : : : +- *(6) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) - : : : : : +- *(6) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_warehouse_sk#15,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) - : : : +- *(2) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#12] - : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 1999-02-01)) && (d_date#17 <= 10683)) && isnotnull(d_date_sk#12)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [ca_address_sk#10] - : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = IL)) && isnotnull(ca_address_sk#10)) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [web_site_sk#8] - +- *(5) Filter ((isnotnull(web_company_name#19) && (web_company_name#19 = pri)) && isnotnull(web_site_sk#8)) - +- *(5) FileScan parquet default.web_site[web_site_sk#8,web_company_name#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt deleted file mode 100644 index c7a5e5d90..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt +++ /dev/null @@ -1,51 +0,0 @@ -TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] - WholeStageCodegen - HashAggregate [count,count(ws_order_number),sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] - HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] - InputAdapter - Exchange [ws_order_number] #2 - WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] - BroadcastHashJoin [web_site_sk,ws_web_site_sk] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_web_site_sk] - BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_web_site_sk] - BroadcastHashJoin [d_date_sk,ws_ship_date_sk] - BroadcastHashJoin [wr_order_number,ws_order_number] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] - BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] - Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ws_order_number,ws_warehouse_sk] - Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Scan parquet default.web_returns [wr_order_number] [wr_order_number] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [ca_address_sk] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [web_site_sk] - Filter [web_company_name,web_site_sk] - Scan parquet default.web_site [web_company_name,web_site_sk] [web_company_name,web_site_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt deleted file mode 100644 index 2d497b75e..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt +++ /dev/null @@ -1,54 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) -+- *(11) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_ship_cost#4)), sum(UnscaledValue(ws_net_profit#5)), count(distinct ws_order_number#6)]) - +- Exchange SinglePartition - +- *(10) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) - +- *(10) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) - +- Exchange hashpartitioning(ws_order_number#6, 5) - +- *(9) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) - +- *(9) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - +- *(9) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight - :- *(9) Project [ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : +- *(9) BroadcastHashJoin [ws_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight - : :- *(9) Project [ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : : +- *(9) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight - : : :- *(9) BroadcastHashJoin [cast(ws_order_number#6 as bigint)], [wr_order_number#13], LeftSemi, BuildRight - : : : :- *(9) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight - : : : : :- *(9) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : : : : : +- *(9) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) - : : : : : +- *(9) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : : +- *(1) Project [ws_warehouse_sk#17, ws_order_number#15] - : : : : +- *(1) Filter (isnotnull(ws_order_number#15) && isnotnull(ws_warehouse_sk#17)) - : : : : +- *(1) FileScan parquet default.web_sales[ws_warehouse_sk#17,ws_order_number#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) - : : : +- *(5) Project [wr_order_number#13] - : : : +- *(5) BroadcastHashJoin [wr_order_number#13], [cast(ws_order_number#6 as bigint)], Inner, BuildRight - : : : :- *(5) Project [wr_order_number#13] - : : : : +- *(5) Filter isnotnull(wr_order_number#13) - : : : : +- *(5) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_order_number)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [ws_order_number#6] - : : : +- *(4) BroadcastHashJoin [ws_order_number#6], [ws_order_number#15], Inner, BuildRight, NOT (ws_warehouse_sk#16 = ws_warehouse_sk#17) - : : : :- *(4) Project [ws_warehouse_sk#16, ws_order_number#6] - : : : : +- *(4) Filter (isnotnull(ws_order_number#6) && isnotnull(ws_warehouse_sk#16)) - : : : : +- *(4) FileScan parquet default.web_sales[ws_warehouse_sk#16,ws_order_number#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct - : : : +- ReusedExchange [ws_warehouse_sk#17, ws_order_number#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [d_date_sk#12] - : : +- *(6) Filter (((isnotnull(d_date#18) && (cast(d_date#18 as string) >= 1999-02-01)) && (d_date#18 <= 10683)) && isnotnull(d_date_sk#12)) - : : +- *(6) FileScan parquet default.date_dim[d_date_sk#12,d_date#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [ca_address_sk#10] - : +- *(7) Filter ((isnotnull(ca_state#19) && (ca_state#19 = IL)) && isnotnull(ca_address_sk#10)) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [web_site_sk#8] - +- *(8) Filter ((isnotnull(web_company_name#20) && (web_company_name#20 = pri)) && isnotnull(web_site_sk#8)) - +- *(8) FileScan parquet default.web_site[web_site_sk#8,web_company_name#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt deleted file mode 100644 index 343982d5a..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt +++ /dev/null @@ -1,73 +0,0 @@ -TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] - WholeStageCodegen - HashAggregate [count,count(ws_order_number),sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] - HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] - InputAdapter - Exchange [ws_order_number] #2 - WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] - BroadcastHashJoin [web_site_sk,ws_web_site_sk] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_web_site_sk] - BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_web_site_sk] - BroadcastHashJoin [d_date_sk,ws_ship_date_sk] - BroadcastHashJoin [wr_order_number,ws_order_number] - BroadcastHashJoin [ws_order_number,ws_order_number] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] - Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ws_order_number] - BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Project [ws_order_number,ws_warehouse_sk] - Filter [ws_order_number,ws_warehouse_sk] - Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [ws_order_number,ws_warehouse_sk] - Filter [ws_order_number,ws_warehouse_sk] - Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [wr_order_number] - BroadcastHashJoin [wr_order_number,ws_order_number] - Project [wr_order_number] - Filter [wr_order_number] - Scan parquet default.web_returns [wr_order_number] [wr_order_number] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [ws_order_number] - BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Project [ws_order_number,ws_warehouse_sk] - Filter [ws_order_number,ws_warehouse_sk] - Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] - InputAdapter - ReusedExchange [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] #4 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [ca_address_sk] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [web_site_sk] - Filter [web_company_name,web_site_sk] - Scan parquet default.web_site [web_company_name,web_site_sk] [web_company_name,web_site_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt deleted file mode 100644 index c66e7e331..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt +++ /dev/null @@ -1,26 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[count(1)#1 ASC NULLS FIRST], output=[count(1)#1]) -+- *(5) HashAggregate(keys=[], functions=[count(1)]) - +- Exchange SinglePartition - +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) - +- *(4) Project - +- *(4) BroadcastHashJoin [ss_store_sk#2], [s_store_sk#3], Inner, BuildRight - :- *(4) Project [ss_store_sk#2] - : +- *(4) BroadcastHashJoin [ss_sold_time_sk#4], [t_time_sk#5], Inner, BuildRight - : :- *(4) Project [ss_sold_time_sk#4, ss_store_sk#2] - : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#6], [hd_demo_sk#7], Inner, BuildRight - : : :- *(4) Project [ss_sold_time_sk#4, ss_hdemo_sk#6, ss_store_sk#2] - : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#6) && isnotnull(ss_sold_time_sk#4)) && isnotnull(ss_store_sk#2)) - : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#4,ss_hdemo_sk#6,ss_store_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [hd_demo_sk#7] - : : +- *(1) Filter ((isnotnull(hd_dep_count#8) && (hd_dep_count#8 = 7)) && isnotnull(hd_demo_sk#7)) - : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#7,hd_dep_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [t_time_sk#5] - : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 20)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#5)) - : +- *(2) FileScan parquet default.time_dim[t_time_sk#5,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [s_store_sk#3] - +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#3)) - +- *(3) FileScan parquet default.store[s_store_sk#3,s_store_name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt deleted file mode 100644 index 7f1a66829..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt +++ /dev/null @@ -1,34 +0,0 @@ -TakeOrderedAndProject [count(1)] - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] - Project - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_store_sk] - BroadcastHashJoin [ss_sold_time_sk,t_time_sk] - Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [t_time_sk] - Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_name,s_store_sk] - Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt deleted file mode 100644 index 0d528d336..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt +++ /dev/null @@ -1,32 +0,0 @@ -== Physical Plan == -CollectLimit 100 -+- *(10) HashAggregate(keys=[], functions=[sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint))]) - +- Exchange SinglePartition - +- *(9) HashAggregate(keys=[], functions=[partial_sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint))]) - +- *(9) Project [customer_sk#1, customer_sk#2] - +- SortMergeJoin [customer_sk#1, item_sk#3], [customer_sk#2, item_sk#4], FullOuter - :- *(4) Sort [customer_sk#1 ASC NULLS FIRST, item_sk#3 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(customer_sk#1, item_sk#3, 5) - : +- *(3) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) - : +- Exchange hashpartitioning(ss_customer_sk#5, ss_item_sk#6, 5) - : +- *(2) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) - : +- *(2) Project [ss_item_sk#6, ss_customer_sk#5] - : +- *(2) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight - : :- *(2) Project [ss_sold_date_sk#7, ss_item_sk#6, ss_customer_sk#5] - : : +- *(2) Filter isnotnull(ss_sold_date_sk#7) - : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#6,ss_customer_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [d_date_sk#8] - : +- *(1) Filter (((isnotnull(d_month_seq#9) && (d_month_seq#9 >= 1200)) && (d_month_seq#9 <= 1211)) && isnotnull(d_date_sk#8)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - +- *(8) Sort [customer_sk#2 ASC NULLS FIRST, item_sk#4 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(customer_sk#2, item_sk#4, 5) - +- *(7) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) - +- Exchange hashpartitioning(cs_bill_customer_sk#10, cs_item_sk#11, 5) - +- *(6) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) - +- *(6) Project [cs_bill_customer_sk#10, cs_item_sk#11] - +- *(6) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#8], Inner, BuildRight - :- *(6) Project [cs_sold_date_sk#12, cs_bill_customer_sk#10, cs_item_sk#11] - : +- *(6) Filter isnotnull(cs_sold_date_sk#12) - : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_bill_customer_sk#10,cs_item_sk#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt deleted file mode 100644 index 40d75add7..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt +++ /dev/null @@ -1,48 +0,0 @@ -CollectLimit - WholeStageCodegen - HashAggregate [sum,sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] [catalog_only,store_and_catalog,store_only,sum,sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [customer_sk,customer_sk,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [customer_sk,customer_sk] - InputAdapter - SortMergeJoin [customer_sk,customer_sk,item_sk,item_sk] - WholeStageCodegen - Sort [customer_sk,item_sk] - InputAdapter - Exchange [customer_sk,item_sk] #2 - WholeStageCodegen - HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [ss_customer_sk,ss_item_sk] #3 - WholeStageCodegen - HashAggregate [ss_customer_sk,ss_item_sk] - Project [ss_customer_sk,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] [ss_customer_sk,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] - WholeStageCodegen - Sort [customer_sk,item_sk] - InputAdapter - Exchange [customer_sk,item_sk] #5 - WholeStageCodegen - HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk] #6 - WholeStageCodegen - HashAggregate [cs_bill_customer_sk,cs_item_sk] - Project [cs_bill_customer_sk,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt deleted file mode 100644 index 2ffc242b5..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt +++ /dev/null @@ -1,26 +0,0 @@ -== Physical Plan == -*(7) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, revenueratio#6] -+- *(7) Sort [i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST], true, 0 - +- Exchange rangepartitioning(i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST, 5) - +- *(6) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#6, i_item_id#7] - +- Window [sum(_w1#10) windowspecdefinition(i_class#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#3] - +- *(5) Sort [i_class#3 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_class#3, 5) - +- *(4) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[sum(UnscaledValue(ss_ext_sales_price#11))]) - +- Exchange hashpartitioning(i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4, 5) - +- *(3) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#11))]) - +- *(3) Project [ss_ext_sales_price#11, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] - +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - :- *(3) Project [ss_sold_date_sk#12, ss_ext_sales_price#11, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] - : +- *(3) BroadcastHashJoin [ss_item_sk#14], [i_item_sk#15], Inner, BuildRight - : :- *(3) Project [ss_sold_date_sk#12, ss_item_sk#14, ss_ext_sales_price#11] - : : +- *(3) Filter (isnotnull(ss_item_sk#14) && isnotnull(ss_sold_date_sk#12)) - : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_item_sk#14,ss_ext_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [i_item_sk#15, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] - : +- *(1) Filter (i_category#2 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) - : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#7,i_item_desc#1,i_current_price#4,i_class#3,i_category#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) - +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt deleted file mode 100644 index 914d8934b..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt +++ /dev/null @@ -1,38 +0,0 @@ -WholeStageCodegen - Project [i_category,i_class,i_current_price,i_item_desc,itemrevenue,revenueratio] - Sort [i_category,i_class,i_item_desc,i_item_id,revenueratio] - InputAdapter - Exchange [i_category,i_class,i_item_desc,i_item_id,revenueratio] #1 - WholeStageCodegen - Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] - InputAdapter - Window [_w1,i_class] - WholeStageCodegen - Sort [i_class] - InputAdapter - Exchange [i_class] #2 - WholeStageCodegen - HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ss_ext_sales_price))] - InputAdapter - Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #3 - WholeStageCodegen - HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] - Filter [i_category,i_item_sk] - Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt deleted file mode 100644 index 2106af4de..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt +++ /dev/null @@ -1,32 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[substring(w_warehouse_name, 1, 20)#1 ASC NULLS FIRST,sm_type#2 ASC NULLS FIRST,cc_name#3 ASC NULLS FIRST], output=[substring(w_warehouse_name, 1, 20)#1,sm_type#2,cc_name#3,30 days #4,31 - 60 days #5,61 - 90 days #6,91 - 120 days #7,>120 days #8]) -+- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3, 5) - +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) - +- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, w_warehouse_name#9, sm_type#2, cc_name#3] - +- *(5) BroadcastHashJoin [cs_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight - :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, w_warehouse_name#9, sm_type#2, cc_name#3] - : +- *(5) BroadcastHashJoin [cs_call_center_sk#14], [cc_call_center_sk#15], Inner, BuildRight - : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, w_warehouse_name#9, sm_type#2] - : : +- *(5) BroadcastHashJoin [cs_ship_mode_sk#16], [sm_ship_mode_sk#17], Inner, BuildRight - : : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, cs_ship_mode_sk#16, w_warehouse_name#9] - : : : +- *(5) BroadcastHashJoin [cs_warehouse_sk#18], [w_warehouse_sk#19], Inner, BuildRight - : : : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, cs_ship_mode_sk#16, cs_warehouse_sk#18] - : : : : +- *(5) Filter (((isnotnull(cs_warehouse_sk#18) && isnotnull(cs_ship_mode_sk#16)) && isnotnull(cs_call_center_sk#14)) && isnotnull(cs_ship_date_sk#11)) - : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_ship_date_sk#11,cs_call_center_sk#14,cs_ship_mode_sk#16,cs_warehouse_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] - : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) - : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [cc_call_center_sk#15, cc_name#3] - : +- *(3) Filter isnotnull(cc_call_center_sk#15) - : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#15,cc_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [d_date_sk#13] - +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt deleted file mode 100644 index 4e22db2fe..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt +++ /dev/null @@ -1,42 +0,0 @@ -TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,cc_name,sm_type,substring(w_warehouse_name, 1, 20)] - WholeStageCodegen - HashAggregate [cc_name,sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] - InputAdapter - Exchange [cc_name,sm_type,substring(w_warehouse_name, 1, 20)] #1 - WholeStageCodegen - HashAggregate [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name] [substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] - BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] - BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] - Project [cs_call_center_sk,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] - BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] - Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name] - BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] - Filter [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_warehouse_sk] - Scan parquet default.catalog_sales [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [sm_ship_mode_sk,sm_type] - Filter [sm_ship_mode_sk] - Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] [sm_ship_mode_sk,sm_type] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [cc_call_center_sk,cc_name] - Filter [cc_call_center_sk] - Scan parquet default.call_center [cc_call_center_sk,cc_name] [cc_call_center_sk,cc_name] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] From 0b40853b581e3ee2d416a072ad7ce2da846b0d14 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Fri, 12 Mar 2021 20:06:36 -0800 Subject: [PATCH 21/31] updated plans based on the plan stability suite --- .../approved-plans-v1_4/q1/explain.txt | 296 +---- .../approved-plans-v1_4/q1/simplified.txt | 73 +- .../approved-plans-v1_4/q10/explain.txt | 326 +---- .../approved-plans-v1_4/q10/simplified.txt | 83 +- .../approved-plans-v1_4/q11/explain.txt | 494 ++------ .../approved-plans-v1_4/q11/simplified.txt | 181 +-- .../approved-plans-v1_4/q12/explain.txt | 159 +-- .../approved-plans-v1_4/q12/simplified.txt | 48 +- .../approved-plans-v1_4/q13/explain.txt | 251 +--- .../approved-plans-v1_4/q13/simplified.txt | 78 +- .../approved-plans-v1_4/q14a/explain.txt | 988 +++------------ .../approved-plans-v1_4/q14a/simplified.txt | 377 +++--- .../approved-plans-v1_4/q14b/explain.txt | 927 +++----------- .../approved-plans-v1_4/q14b/simplified.txt | 328 ++--- .../approved-plans-v1_4/q15/explain.txt | 174 +-- .../approved-plans-v1_4/q15/simplified.txt | 51 +- .../approved-plans-v1_4/q16/explain.txt | 270 +--- .../approved-plans-v1_4/q16/simplified.txt | 73 +- .../approved-plans-v1_4/q17/explain.txt | 317 +---- .../approved-plans-v1_4/q17/simplified.txt | 107 +- .../approved-plans-v1_4/q18/explain.txt | 307 +---- .../approved-plans-v1_4/q18/simplified.txt | 90 +- .../approved-plans-v1_4/q19/explain.txt | 257 +--- .../approved-plans-v1_4/q19/simplified.txt | 80 +- .../approved-plans-v1_4/q2/explain.txt | 252 +--- .../approved-plans-v1_4/q2/simplified.txt | 67 +- .../approved-plans-v1_4/q20/explain.txt | 159 +-- .../approved-plans-v1_4/q20/simplified.txt | 44 +- .../approved-plans-v1_4/q21/explain.txt | 180 +-- .../approved-plans-v1_4/q21/simplified.txt | 55 +- .../approved-plans-v1_4/q22/explain.txt | 180 +-- .../approved-plans-v1_4/q22/simplified.txt | 57 +- .../approved-plans-v1_4/q23a/explain.txt | 624 ++------- .../approved-plans-v1_4/q23a/simplified.txt | 260 ++-- .../approved-plans-v1_4/q23b/explain.txt | 788 ++---------- .../approved-plans-v1_4/q23b/simplified.txt | 244 ++-- .../approved-plans-v1_4/q24a/explain.txt | 557 ++------ .../approved-plans-v1_4/q24a/simplified.txt | 188 ++- .../approved-plans-v1_4/q24b/explain.txt | 557 ++------ .../approved-plans-v1_4/q24b/simplified.txt | 188 ++- .../approved-plans-v1_4/q25/explain.txt | 317 +---- .../approved-plans-v1_4/q25/simplified.txt | 107 +- .../approved-plans-v1_4/q26/explain.txt | 223 +--- .../approved-plans-v1_4/q26/simplified.txt | 58 +- .../approved-plans-v1_4/q27/explain.txt | 224 +--- .../approved-plans-v1_4/q27/simplified.txt | 69 +- .../approved-plans-v1_4/q28/explain.txt | 501 +------- .../approved-plans-v1_4/q28/simplified.txt | 120 +- .../approved-plans-v1_4/q29/explain.txt | 340 +---- .../approved-plans-v1_4/q29/simplified.txt | 105 +- .../approved-plans-v1_4/q3/explain.txt | 140 +- .../approved-plans-v1_4/q3/simplified.txt | 41 +- .../approved-plans-v1_4/q30/explain.txt | 353 +----- .../approved-plans-v1_4/q30/simplified.txt | 104 +- .../approved-plans-v1_4/q31/explain.txt | 661 ++-------- .../approved-plans-v1_4/q31/simplified.txt | 200 ++- .../approved-plans-v1_4/q32/explain.txt | 203 +-- .../approved-plans-v1_4/q32/simplified.txt | 46 +- .../approved-plans-v1_4/q33/explain.txt | 441 +------ .../approved-plans-v1_4/q33/simplified.txt | 128 +- .../approved-plans-v1_4/q34/explain.txt | 235 +--- .../approved-plans-v1_4/q34/simplified.txt | 64 +- .../approved-plans-v1_4/q35/explain.txt | 321 +---- .../approved-plans-v1_4/q35/simplified.txt | 84 +- .../approved-plans-v1_4/q36/explain.txt | 209 +-- .../approved-plans-v1_4/q36/simplified.txt | 60 +- .../approved-plans-v1_4/q37/explain.txt | 184 +-- .../approved-plans-v1_4/q37/simplified.txt | 53 +- .../approved-plans-v1_4/q38/explain.txt | 376 +----- .../approved-plans-v1_4/q38/simplified.txt | 151 ++- .../approved-plans-v1_4/q39a/explain.txt | 341 +---- .../approved-plans-v1_4/q39a/simplified.txt | 104 +- .../approved-plans-v1_4/q39b/explain.txt | 341 +---- .../approved-plans-v1_4/q39b/simplified.txt | 104 +- .../approved-plans-v1_4/q4/explain.txt | 728 ++--------- .../approved-plans-v1_4/q4/simplified.txt | 265 ++-- .../approved-plans-v1_4/q40/explain.txt | 213 +--- .../approved-plans-v1_4/q40/simplified.txt | 64 +- .../approved-plans-v1_4/q41/explain.txt | 137 +- .../approved-plans-v1_4/q41/simplified.txt | 26 +- .../approved-plans-v1_4/q42/explain.txt | 140 +- .../approved-plans-v1_4/q42/simplified.txt | 43 +- .../approved-plans-v1_4/q43/explain.txt | 140 +- .../approved-plans-v1_4/q43/simplified.txt | 41 +- .../approved-plans-v1_4/q44/explain.txt | 296 +---- .../approved-plans-v1_4/q44/simplified.txt | 82 +- .../approved-plans-v1_4/q45/explain.txt | 263 +--- .../approved-plans-v1_4/q45/simplified.txt | 76 +- .../approved-plans-v1_4/q46/explain.txt | 280 +--- .../approved-plans-v1_4/q46/simplified.txt | 85 +- .../approved-plans-v1_4/q47/explain.txt | 327 +---- .../approved-plans-v1_4/q47/simplified.txt | 143 +-- .../approved-plans-v1_4/q48/explain.txt | 212 +--- .../approved-plans-v1_4/q48/simplified.txt | 63 +- .../approved-plans-v1_4/q5/explain.txt | 509 ++------ .../approved-plans-v1_4/q5/simplified.txt | 173 ++- .../approved-plans-v1_4/q50/explain.txt | 213 +--- .../approved-plans-v1_4/q50/simplified.txt | 70 +- .../approved-plans-v1_4/q51/explain.txt | 267 +--- .../approved-plans-v1_4/q51/simplified.txt | 104 +- .../approved-plans-v1_4/q52/explain.txt | 140 +- .../approved-plans-v1_4/q52/simplified.txt | 41 +- .../approved-plans-v1_4/q53/explain.txt | 209 +-- .../approved-plans-v1_4/q53/simplified.txt | 60 +- .../approved-plans-v1_4/q54/explain.txt | 545 ++------ .../approved-plans-v1_4/q54/simplified.txt | 160 +-- .../approved-plans-v1_4/q55/explain.txt | 140 +- .../approved-plans-v1_4/q55/simplified.txt | 41 +- .../approved-plans-v1_4/q56/explain.txt | 441 +------ .../approved-plans-v1_4/q56/simplified.txt | 128 +- .../approved-plans-v1_4/q57/explain.txt | 327 +---- .../approved-plans-v1_4/q57/simplified.txt | 143 +-- .../approved-plans-v1_4/q58/explain.txt | 576 ++------- .../approved-plans-v1_4/q58/simplified.txt | 164 +-- .../approved-plans-v1_4/q59/explain.txt | 290 +---- .../approved-plans-v1_4/q59/simplified.txt | 82 +- .../approved-plans-v1_4/q6/explain.txt | 357 +----- .../approved-plans-v1_4/q6/simplified.txt | 99 +- .../approved-plans-v1_4/q60/explain.txt | 441 +------ .../approved-plans-v1_4/q60/simplified.txt | 126 +- .../approved-plans-v1_4/q61/explain.txt | 462 +------ .../approved-plans-v1_4/q61/simplified.txt | 123 +- .../approved-plans-v1_4/q62/explain.txt | 213 +--- .../approved-plans-v1_4/q62/simplified.txt | 70 +- .../approved-plans-v1_4/q63/explain.txt | 209 +-- .../approved-plans-v1_4/q63/simplified.txt | 60 +- .../approved-plans-v1_4/q64/explain.txt | 1086 +++------------- .../approved-plans-v1_4/q64/simplified.txt | 331 +++-- .../approved-plans-v1_4/q65/explain.txt | 285 +---- .../approved-plans-v1_4/q65/simplified.txt | 88 +- .../approved-plans-v1_4/q66/explain.txt | 362 +----- .../approved-plans-v1_4/q66/simplified.txt | 108 +- .../approved-plans-v1_4/q67/explain.txt | 204 +-- .../approved-plans-v1_4/q67/simplified.txt | 67 +- .../approved-plans-v1_4/q68/explain.txt | 280 +--- .../approved-plans-v1_4/q68/simplified.txt | 85 +- .../approved-plans-v1_4/q69/explain.txt | 320 +---- .../approved-plans-v1_4/q69/simplified.txt | 83 +- .../approved-plans-v1_4/q7/explain.txt | 223 +--- .../approved-plans-v1_4/q7/simplified.txt | 64 +- .../approved-plans-v1_4/q70/explain.txt | 308 +---- .../approved-plans-v1_4/q70/simplified.txt | 105 +- .../approved-plans-v1_4/q71/explain.txt | 270 +--- .../approved-plans-v1_4/q71/simplified.txt | 81 +- .../approved-plans-v1_4/q72/explain.txt | 457 +------ .../approved-plans-v1_4/q72/simplified.txt | 136 +- .../approved-plans-v1_4/q73/explain.txt | 235 +--- .../approved-plans-v1_4/q73/simplified.txt | 60 +- .../approved-plans-v1_4/q74/explain.txt | 488 ++----- .../approved-plans-v1_4/q74/simplified.txt | 181 +-- .../approved-plans-v1_4/q75/explain.txt | 762 ++--------- .../approved-plans-v1_4/q75/simplified.txt | 259 ++-- .../approved-plans-v1_4/q76/explain.txt | 246 +--- .../approved-plans-v1_4/q76/simplified.txt | 85 +- .../approved-plans-v1_4/q77/explain.txt | 618 ++------- .../approved-plans-v1_4/q77/simplified.txt | 208 +-- .../approved-plans-v1_4/q78/explain.txt | 400 +----- .../approved-plans-v1_4/q78/simplified.txt | 123 +- .../approved-plans-v1_4/q79/explain.txt | 223 +--- .../approved-plans-v1_4/q79/simplified.txt | 68 +- .../approved-plans-v1_4/q8/explain.txt | 315 +---- .../approved-plans-v1_4/q8/simplified.txt | 99 +- .../approved-plans-v1_4/q80/explain.txt | 648 ++-------- .../approved-plans-v1_4/q80/simplified.txt | 195 ++- .../approved-plans-v1_4/q81/explain.txt | 348 +---- .../approved-plans-v1_4/q81/simplified.txt | 101 +- .../approved-plans-v1_4/q82/explain.txt | 184 +-- .../approved-plans-v1_4/q82/simplified.txt | 51 +- .../approved-plans-v1_4/q83/explain.txt | 411 +----- .../approved-plans-v1_4/q83/simplified.txt | 123 +- .../approved-plans-v1_4/q84/explain.txt | 233 +--- .../approved-plans-v1_4/q84/simplified.txt | 58 +- .../approved-plans-v1_4/q85/explain.txt | 335 +---- .../approved-plans-v1_4/q85/simplified.txt | 106 +- .../approved-plans-v1_4/q86/explain.txt | 165 +-- .../approved-plans-v1_4/q86/simplified.txt | 50 +- .../approved-plans-v1_4/q87/explain.txt | 375 +----- .../approved-plans-v1_4/q87/simplified.txt | 138 +- .../approved-plans-v1_4/q88/explain.txt | 1123 +++-------------- .../approved-plans-v1_4/q88/simplified.txt | 242 ++-- .../approved-plans-v1_4/q89/explain.txt | 204 +-- .../approved-plans-v1_4/q89/simplified.txt | 67 +- .../approved-plans-v1_4/q9/explain.txt | 825 ++---------- .../approved-plans-v1_4/q9/simplified.txt | 186 ++- .../approved-plans-v1_4/q90/explain.txt | 325 +---- .../approved-plans-v1_4/q90/simplified.txt | 74 +- .../approved-plans-v1_4/q91/explain.txt | 307 +---- .../approved-plans-v1_4/q91/simplified.txt | 82 +- .../approved-plans-v1_4/q92/explain.txt | 227 +--- .../approved-plans-v1_4/q92/simplified.txt | 62 +- .../approved-plans-v1_4/q93/explain.txt | 127 +- .../approved-plans-v1_4/q93/simplified.txt | 37 +- .../approved-plans-v1_4/q94/explain.txt | 270 +--- .../approved-plans-v1_4/q94/simplified.txt | 73 +- .../approved-plans-v1_4/q95/explain.txt | 370 +----- .../approved-plans-v1_4/q95/simplified.txt | 95 +- .../approved-plans-v1_4/q96/explain.txt | 184 +-- .../approved-plans-v1_4/q96/simplified.txt | 41 +- .../approved-plans-v1_4/q97/explain.txt | 204 +-- .../approved-plans-v1_4/q97/simplified.txt | 89 +- .../approved-plans-v1_4/q98/explain.txt | 171 +-- .../approved-plans-v1_4/q98/simplified.txt | 54 +- .../approved-plans-v1_4/q99/explain.txt | 213 +--- .../approved-plans-v1_4/q99/simplified.txt | 64 +- 204 files changed, 10505 insertions(+), 36395 deletions(-) diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt index 8b11a662f..1e2caffbe 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt @@ -1,255 +1,43 @@ == Physical Plan == -TakeOrderedAndProject (44) -+- * Project (43) - +- * BroadcastHashJoin Inner BuildRight (42) - :- * Project (37) - : +- * BroadcastHashJoin Inner BuildRight (36) - : :- * Project (30) - : : +- * BroadcastHashJoin Inner BuildRight (29) - : : :- * Filter (14) - : : : +- * HashAggregate (13) - : : : +- Exchange (12) - : : : +- * HashAggregate (11) - : : : +- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_returns (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (28) - : : +- * Filter (27) - : : +- * HashAggregate (26) - : : +- Exchange (25) - : : +- * HashAggregate (24) - : : +- * HashAggregate (23) - : : +- Exchange (22) - : : +- * HashAggregate (21) - : : +- * Project (20) - : : +- * BroadcastHashJoin Inner BuildRight (19) - : : :- * Filter (17) - : : : +- * ColumnarToRow (16) - : : : +- Scan parquet default.store_returns (15) - : : +- ReusedExchange (18) - : +- BroadcastExchange (35) - : +- * Project (34) - : +- * Filter (33) - : +- * ColumnarToRow (32) - : +- Scan parquet default.store (31) - +- BroadcastExchange (41) - +- * Filter (40) - +- * ColumnarToRow (39) - +- Scan parquet default.customer (38) - - -(1) Scan parquet default.store_returns -Output [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 2] -Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] - -(3) Filter [codegen id : 2] -Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] -Condition : ((isnotnull(sr_returned_date_sk#1) AND isnotnull(sr_store_sk#3)) AND isnotnull(sr_customer_sk#2)) - -(4) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_year#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_year#6] - -(6) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_year#6] -Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_year#6] - -(8) BroadcastExchange -Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [sr_returned_date_sk#1] -Right keys [1]: [cast(d_date_sk#5 as bigint)] -Join condition: None - -(10) Project [codegen id : 2] -Output [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] -Input [5]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4, d_date_sk#5] - -(11) HashAggregate [codegen id : 2] -Input [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] -Keys [2]: [sr_customer_sk#2, sr_store_sk#3] -Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#4))] -Aggregate Attributes [1]: [sum#8] -Results [3]: [sr_customer_sk#2, sr_store_sk#3, sum#9] - -(12) Exchange -Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#9] -Arguments: hashpartitioning(sr_customer_sk#2, sr_store_sk#3, 5), true, [id=#10] - -(13) HashAggregate [codegen id : 9] -Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#9] -Keys [2]: [sr_customer_sk#2, sr_store_sk#3] -Functions [1]: [sum(UnscaledValue(sr_return_amt#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#4))#11] -Results [3]: [sr_customer_sk#2 AS ctr_customer_sk#12, sr_store_sk#3 AS ctr_store_sk#13, MakeDecimal(sum(UnscaledValue(sr_return_amt#4))#11,17,2) AS ctr_total_return#14] - -(14) Filter [codegen id : 9] -Input [3]: [ctr_customer_sk#12, ctr_store_sk#13, ctr_total_return#14] -Condition : isnotnull(ctr_total_return#14) - -(15) Scan parquet default.store_returns -Output [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] -ReadSchema: struct - -(16) ColumnarToRow [codegen id : 4] -Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] - -(17) Filter [codegen id : 4] -Input [4]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] -Condition : (isnotnull(sr_returned_date_sk#1) AND isnotnull(sr_store_sk#3)) - -(18) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(19) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [sr_returned_date_sk#1] -Right keys [1]: [cast(d_date_sk#5 as bigint)] -Join condition: None - -(20) Project [codegen id : 4] -Output [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] -Input [5]: [sr_returned_date_sk#1, sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4, d_date_sk#5] - -(21) HashAggregate [codegen id : 4] -Input [3]: [sr_customer_sk#2, sr_store_sk#3, sr_return_amt#4] -Keys [2]: [sr_customer_sk#2, sr_store_sk#3] -Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#4))] -Aggregate Attributes [1]: [sum#15] -Results [3]: [sr_customer_sk#2, sr_store_sk#3, sum#16] - -(22) Exchange -Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#16] -Arguments: hashpartitioning(sr_customer_sk#2, sr_store_sk#3, 5), true, [id=#17] - -(23) HashAggregate [codegen id : 5] -Input [3]: [sr_customer_sk#2, sr_store_sk#3, sum#16] -Keys [2]: [sr_customer_sk#2, sr_store_sk#3] -Functions [1]: [sum(UnscaledValue(sr_return_amt#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#4))#18] -Results [2]: [sr_store_sk#3 AS ctr_store_sk#13, MakeDecimal(sum(UnscaledValue(sr_return_amt#4))#18,17,2) AS ctr_total_return#14] - -(24) HashAggregate [codegen id : 5] -Input [2]: [ctr_store_sk#13, ctr_total_return#14] -Keys [1]: [ctr_store_sk#13] -Functions [1]: [partial_avg(ctr_total_return#14)] -Aggregate Attributes [2]: [sum#19, count#20] -Results [3]: [ctr_store_sk#13, sum#21, count#22] - -(25) Exchange -Input [3]: [ctr_store_sk#13, sum#21, count#22] -Arguments: hashpartitioning(ctr_store_sk#13, 5), true, [id=#23] - -(26) HashAggregate [codegen id : 6] -Input [3]: [ctr_store_sk#13, sum#21, count#22] -Keys [1]: [ctr_store_sk#13] -Functions [1]: [avg(ctr_total_return#14)] -Aggregate Attributes [1]: [avg(ctr_total_return#14)#24] -Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#14)#24) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13 AS ctr_store_sk#13#26] - -(27) Filter [codegen id : 6] -Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13#26] -Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25) - -(28) BroadcastExchange -Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13#26] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#27] - -(29) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ctr_store_sk#13] -Right keys [1]: [ctr_store_sk#13#26] -Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25) - -(30) Project [codegen id : 9] -Output [2]: [ctr_customer_sk#12, ctr_store_sk#13] -Input [5]: [ctr_customer_sk#12, ctr_store_sk#13, ctr_total_return#14, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#25, ctr_store_sk#13#26] - -(31) Scan parquet default.store -Output [2]: [s_store_sk#28, s_state#29] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] -ReadSchema: struct - -(32) ColumnarToRow [codegen id : 7] -Input [2]: [s_store_sk#28, s_state#29] - -(33) Filter [codegen id : 7] -Input [2]: [s_store_sk#28, s_state#29] -Condition : ((isnotnull(s_state#29) AND (s_state#29 = TN)) AND isnotnull(s_store_sk#28)) - -(34) Project [codegen id : 7] -Output [1]: [s_store_sk#28] -Input [2]: [s_store_sk#28, s_state#29] - -(35) BroadcastExchange -Input [1]: [s_store_sk#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] - -(36) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ctr_store_sk#13] -Right keys [1]: [cast(s_store_sk#28 as bigint)] -Join condition: None - -(37) Project [codegen id : 9] -Output [1]: [ctr_customer_sk#12] -Input [3]: [ctr_customer_sk#12, ctr_store_sk#13, s_store_sk#28] - -(38) Scan parquet default.customer -Output [2]: [c_customer_sk#31, c_customer_id#32] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(39) ColumnarToRow [codegen id : 8] -Input [2]: [c_customer_sk#31, c_customer_id#32] - -(40) Filter [codegen id : 8] -Input [2]: [c_customer_sk#31, c_customer_id#32] -Condition : isnotnull(c_customer_sk#31) - -(41) BroadcastExchange -Input [2]: [c_customer_sk#31, c_customer_id#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] - -(42) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ctr_customer_sk#12] -Right keys [1]: [cast(c_customer_sk#31 as bigint)] -Join condition: None - -(43) Project [codegen id : 9] -Output [1]: [c_customer_id#32] -Input [3]: [ctr_customer_sk#12, c_customer_sk#31, c_customer_id#32] - -(44) TakeOrderedAndProject -Input [1]: [c_customer_id#32] -Arguments: 100, [c_customer_id#32 ASC NULLS FIRST], [c_customer_id#32] - +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], output=[c_customer_id#1]) ++- *(9) Project [c_customer_id#1] + +- *(9) BroadcastHashJoin [ctr_customer_sk#2], [cast(c_customer_sk#3 as bigint)], Inner, BuildRight + :- *(9) Project [ctr_customer_sk#2] + : +- *(9) BroadcastHashJoin [ctr_store_sk#4], [cast(s_store_sk#5 as bigint)], Inner, BuildRight + : :- *(9) Project [ctr_customer_sk#2, ctr_store_sk#4] + : : +- *(9) BroadcastHashJoin [ctr_store_sk#4], [ctr_store_sk#4#6], Inner, BuildRight, (cast(ctr_total_return#7 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#8) + : : :- *(9) Filter isnotnull(ctr_total_return#7) + : : : +- *(9) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[sum(UnscaledValue(sr_return_amt#11))]) + : : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 5) + : : : +- *(2) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[partial_sum(UnscaledValue(sr_return_amt#11))]) + : : : +- *(2) Project [sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : : +- *(2) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight + : : : :- *(2) Project [sr_returned_date_sk#12, sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : : : +- *(2) Filter ((isnotnull(sr_returned_date_sk#12) && isnotnull(sr_store_sk#10)) && isnotnull(sr_customer_sk#9)) + : : : : +- *(2) FileScan parquet default.store_returns[sr_returned_date_sk#12,sr_customer_sk#9,sr_store_sk#10,sr_return_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true])) + : : +- *(6) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#8) + : : +- *(6) HashAggregate(keys=[ctr_store_sk#4], functions=[avg(ctr_total_return#7)]) + : : +- Exchange hashpartitioning(ctr_store_sk#4, 5) + : : +- *(5) HashAggregate(keys=[ctr_store_sk#4], functions=[partial_avg(ctr_total_return#7)]) + : : +- *(5) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[sum(UnscaledValue(sr_return_amt#11))]) + : : +- Exchange hashpartitioning(sr_customer_sk#9, sr_store_sk#10, 5) + : : +- *(4) HashAggregate(keys=[sr_customer_sk#9, sr_store_sk#10], functions=[partial_sum(UnscaledValue(sr_return_amt#11))]) + : : +- *(4) Project [sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : +- *(4) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight + : : :- *(4) Project [sr_returned_date_sk#12, sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] + : : : +- *(4) Filter (isnotnull(sr_returned_date_sk#12) && isnotnull(sr_store_sk#10)) + : : : +- *(4) FileScan parquet default.store_returns[sr_returned_date_sk#12,sr_customer_sk#9,sr_store_sk#10,sr_return_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [c_customer_sk#3, c_customer_id#1] + +- *(8) Filter isnotnull(c_customer_sk#3) + +- *(8) FileScan parquet default.customer[c_customer_sk#3,c_customer_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/simplified.txt index 6868eb766..4f838850c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/simplified.txt @@ -1,65 +1,58 @@ TakeOrderedAndProject [c_customer_id] - WholeStageCodegen (9) + WholeStageCodegen Project [c_customer_id] - BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + BroadcastHashJoin [c_customer_sk,ctr_customer_sk] Project [ctr_customer_sk] BroadcastHashJoin [ctr_store_sk,s_store_sk] Project [ctr_customer_sk,ctr_store_sk] - BroadcastHashJoin [ctr_store_sk,ctr_store_skL,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_store_sk,ctr_store_skL,ctr_total_return] Filter [ctr_total_return] - HashAggregate [sr_customer_sk,sr_store_sk,sum] [sum(UnscaledValue(sr_return_amt)),ctr_customer_sk,ctr_store_sk,ctr_total_return,sum] + HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_customer_sk,ctr_store_sk,ctr_total_return,sum,sum(UnscaledValue(sr_return_amt))] InputAdapter Exchange [sr_customer_sk,sr_store_sk] #1 - WholeStageCodegen (2) - HashAggregate [sr_customer_sk,sr_store_sk,sr_return_amt] [sum,sum] - Project [sr_customer_sk,sr_store_sk,sr_return_amt] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Filter [sr_returned_date_sk,sr_store_sk,sr_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + WholeStageCodegen + HashAggregate [sr_customer_sk,sr_return_amt,sr_store_sk,sum,sum] [sum,sum] + Project [sr_customer_sk,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] + Filter [sr_customer_sk,sr_returned_date_sk,sr_store_sk] + Scan parquet default.store_returns [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #3 - WholeStageCodegen (6) + WholeStageCodegen Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [ctr_store_sk,sum,count] [avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_store_skL,sum,count] + HashAggregate [avg(ctr_total_return),count,ctr_store_sk,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_store_skL,sum] InputAdapter Exchange [ctr_store_sk] #4 - WholeStageCodegen (5) - HashAggregate [ctr_store_sk,ctr_total_return] [sum,count,sum,count] - HashAggregate [sr_customer_sk,sr_store_sk,sum] [sum(UnscaledValue(sr_return_amt)),ctr_store_sk,ctr_total_return,sum] + WholeStageCodegen + HashAggregate [count,count,ctr_store_sk,ctr_total_return,sum,sum] [count,count,sum,sum] + HashAggregate [sr_customer_sk,sr_store_sk,sum,sum(UnscaledValue(sr_return_amt))] [ctr_store_sk,ctr_total_return,sum,sum(UnscaledValue(sr_return_amt))] InputAdapter Exchange [sr_customer_sk,sr_store_sk] #5 - WholeStageCodegen (4) - HashAggregate [sr_customer_sk,sr_store_sk,sr_return_amt] [sum,sum] - Project [sr_customer_sk,sr_store_sk,sr_return_amt] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Filter [sr_returned_date_sk,sr_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_customer_sk,sr_store_sk,sr_return_amt] + WholeStageCodegen + HashAggregate [sr_customer_sk,sr_return_amt,sr_store_sk,sum,sum] [sum,sum] + Project [sr_customer_sk,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] + Filter [sr_returned_date_sk,sr_store_sk] + Scan parquet default.store_returns [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_customer_sk,sr_return_amt,sr_returned_date_sk,sr_store_sk] InputAdapter - ReusedExchange [d_date_sk] #2 + ReusedExchange [d_date_sk] [d_date_sk] #2 InputAdapter BroadcastExchange #6 - WholeStageCodegen (7) + WholeStageCodegen Project [s_store_sk] Filter [s_state,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_state] + Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] InputAdapter BroadcastExchange #7 - WholeStageCodegen (8) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_customer_id] + WholeStageCodegen + Project [c_customer_id,c_customer_sk] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_id,c_customer_sk] [c_customer_id,c_customer_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt index 509ce3ac4..678ed7254 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt @@ -1,279 +1,49 @@ == Physical Plan == -TakeOrderedAndProject (50) -+- * HashAggregate (49) - +- Exchange (48) - +- * HashAggregate (47) - +- * Project (46) - +- * BroadcastHashJoin Inner BuildRight (45) - :- * Project (40) - : +- * BroadcastHashJoin Inner BuildRight (39) - : :- * Project (33) - : : +- * Filter (32) - : : +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (31) - : : :- * BroadcastHashJoin ExistenceJoin(exists#2) BuildRight (23) - : : : :- * BroadcastHashJoin LeftSemi BuildRight (15) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.customer (1) - : : : : +- BroadcastExchange (14) - : : : : +- * Project (13) - : : : : +- * BroadcastHashJoin Inner BuildRight (12) - : : : : :- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.store_sales (4) - : : : : +- BroadcastExchange (11) - : : : : +- * Project (10) - : : : : +- * Filter (9) - : : : : +- * ColumnarToRow (8) - : : : : +- Scan parquet default.date_dim (7) - : : : +- BroadcastExchange (22) - : : : +- * Project (21) - : : : +- * BroadcastHashJoin Inner BuildRight (20) - : : : :- * Filter (18) - : : : : +- * ColumnarToRow (17) - : : : : +- Scan parquet default.web_sales (16) - : : : +- ReusedExchange (19) - : : +- BroadcastExchange (30) - : : +- * Project (29) - : : +- * BroadcastHashJoin Inner BuildRight (28) - : : :- * Filter (26) - : : : +- * ColumnarToRow (25) - : : : +- Scan parquet default.catalog_sales (24) - : : +- ReusedExchange (27) - : +- BroadcastExchange (38) - : +- * Project (37) - : +- * Filter (36) - : +- * ColumnarToRow (35) - : +- Scan parquet default.customer_address (34) - +- BroadcastExchange (44) - +- * Filter (43) - +- * ColumnarToRow (42) - +- Scan parquet default.customer_demographics (41) - - -(1) Scan parquet default.customer -Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 9] -Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] - -(3) Filter [codegen id : 9] -Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] -Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) - -(4) Scan parquet default.store_sales -Output [2]: [ss_sold_date_sk#6, ss_customer_sk#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 2] -Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] - -(6) Filter [codegen id : 2] -Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] -Condition : isnotnull(ss_sold_date_sk#6) - -(7) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#9, d_moy#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)] -ReadSchema: struct - -(8) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#9, d_moy#10] - -(9) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#9, d_moy#10] -Condition : (((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2002)) AND (d_moy#10 >= 1)) AND (d_moy#10 <= 4)) AND isnotnull(d_date_sk#8)) - -(10) Project [codegen id : 1] -Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#9, d_moy#10] - -(11) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] - -(12) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(13) Project [codegen id : 2] -Output [1]: [ss_customer_sk#7] -Input [3]: [ss_sold_date_sk#6, ss_customer_sk#7, d_date_sk#8] - -(14) BroadcastExchange -Input [1]: [ss_customer_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(15) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ss_customer_sk#7] -Join condition: None - -(16) Scan parquet default.web_sales -Output [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 4] -Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] - -(18) Filter [codegen id : 4] -Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] -Condition : isnotnull(ws_sold_date_sk#13) - -(19) ReusedExchange [Reuses operator id: 11] -Output [1]: [d_date_sk#8] - -(20) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_sold_date_sk#13] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(21) Project [codegen id : 4] -Output [1]: [ws_bill_customer_sk#14] -Input [3]: [ws_sold_date_sk#13, ws_bill_customer_sk#14, d_date_sk#8] - -(22) BroadcastExchange -Input [1]: [ws_bill_customer_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] - -(23) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ws_bill_customer_sk#14] -Join condition: None - -(24) Scan parquet default.catalog_sales -Output [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(25) ColumnarToRow [codegen id : 6] -Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] - -(26) Filter [codegen id : 6] -Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] -Condition : isnotnull(cs_sold_date_sk#16) - -(27) ReusedExchange [Reuses operator id: 11] -Output [1]: [d_date_sk#8] - -(28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#16] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(29) Project [codegen id : 6] -Output [1]: [cs_ship_customer_sk#17] -Input [3]: [cs_sold_date_sk#16, cs_ship_customer_sk#17, d_date_sk#8] - -(30) BroadcastExchange -Input [1]: [cs_ship_customer_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] - -(31) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_customer_sk#3] -Right keys [1]: [cs_ship_customer_sk#17] -Join condition: None - -(32) Filter [codegen id : 9] -Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] -Condition : (exists#2 OR exists#1) - -(33) Project [codegen id : 9] -Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] -Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] - -(34) Scan parquet default.customer_address -Output [2]: [ca_address_sk#19, ca_county#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [In(ca_county, [Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County]), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(35) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_county#20] - -(36) Filter [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_county#20] -Condition : (ca_county#20 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#19)) - -(37) Project [codegen id : 7] -Output [1]: [ca_address_sk#19] -Input [2]: [ca_address_sk#19, ca_county#20] - -(38) BroadcastExchange -Input [1]: [ca_address_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] - -(39) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_current_addr_sk#5] -Right keys [1]: [ca_address_sk#19] -Join condition: None - -(40) Project [codegen id : 9] -Output [1]: [c_current_cdemo_sk#4] -Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#19] - -(41) Scan parquet default.customer_demographics -Output [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(42) ColumnarToRow [codegen id : 8] -Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] - -(43) Filter [codegen id : 8] -Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Condition : isnotnull(cd_demo_sk#22) - -(44) BroadcastExchange -Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] - -(45) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_current_cdemo_sk#4] -Right keys [1]: [cd_demo_sk#22] -Join condition: None - -(46) Project [codegen id : 9] -Output [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] - -(47) HashAggregate [codegen id : 9] -Input [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#32] -Results [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] - -(48) Exchange -Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] -Arguments: hashpartitioning(cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, 5), true, [id=#34] - -(49) HashAggregate [codegen id : 10] -Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] -Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#35] -Results [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, count(1)#35 AS cnt1#36, cd_purchase_estimate#26, count(1)#35 AS cnt2#37, cd_credit_rating#27, count(1)#35 AS cnt3#38, cd_dep_count#28, count(1)#35 AS cnt4#39, cd_dep_employed_count#29, count(1)#35 AS cnt5#40, cd_dep_college_count#30, count(1)#35 AS cnt6#41] - -(50) TakeOrderedAndProject -Input [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] -Arguments: 100, [cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_education_status#25 ASC NULLS FIRST, cd_purchase_estimate#26 ASC NULLS FIRST, cd_credit_rating#27 ASC NULLS FIRST, cd_dep_count#28 ASC NULLS FIRST, cd_dep_employed_count#29 ASC NULLS FIRST, cd_dep_college_count#30 ASC NULLS FIRST], [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] - +TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST,cd_dep_count#6 ASC NULLS FIRST,cd_dep_employed_count#7 ASC NULLS FIRST,cd_dep_college_count#8 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#9,cd_purchase_estimate#4,cnt2#10,cd_credit_rating#5,cnt3#11,cd_dep_count#6,cnt4#12,cd_dep_employed_count#7,cnt5#13,cd_dep_college_count#8,cnt6#14]) ++- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[count(1)]) + +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8, 5) + +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[partial_count(1)]) + +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] + +- *(9) BroadcastHashJoin [c_current_cdemo_sk#15], [cd_demo_sk#16], Inner, BuildRight + :- *(9) Project [c_current_cdemo_sk#15] + : +- *(9) BroadcastHashJoin [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight + : :- *(9) Project [c_current_cdemo_sk#15, c_current_addr_sk#17] + : : +- *(9) Filter (exists#19 || exists#20) + : : +- *(9) BroadcastHashJoin [c_customer_sk#21], [cs_ship_customer_sk#22], ExistenceJoin(exists#20), BuildRight + : : :- *(9) BroadcastHashJoin [c_customer_sk#21], [ws_bill_customer_sk#23], ExistenceJoin(exists#19), BuildRight + : : : :- *(9) BroadcastHashJoin [c_customer_sk#21], [ss_customer_sk#24], LeftSemi, BuildRight + : : : : :- *(9) Project [c_customer_sk#21, c_current_cdemo_sk#15, c_current_addr_sk#17] + : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#17) && isnotnull(c_current_cdemo_sk#15)) + : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#21,c_current_cdemo_sk#15,c_current_addr_sk#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [ss_customer_sk#24] + : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#25], [d_date_sk#26], Inner, BuildRight + : : : : :- *(2) Project [ss_sold_date_sk#25, ss_customer_sk#24] + : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#25) + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#25,ss_customer_sk#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#26] + : : : : +- *(1) Filter (((((isnotnull(d_year#27) && isnotnull(d_moy#28)) && (d_year#27 = 2002)) && (d_moy#28 >= 1)) && (d_moy#28 <= 4)) && isnotnull(d_date_sk#26)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#26,d_year#27,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThan..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_bill_customer_sk#23] + : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#26], Inner, BuildRight + : : : :- *(4) Project [ws_sold_date_sk#29, ws_bill_customer_sk#23] + : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#29) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_bill_customer_sk#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [cs_ship_customer_sk#22] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#30], [d_date_sk#26], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#30, cs_ship_customer_sk#22] + : : : +- *(6) Filter isnotnull(cs_sold_date_sk#30) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_ship_customer_sk#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#18] + : +- *(7) Filter (ca_county#31 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) && isnotnull(ca_address_sk#18)) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#18,ca_county#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_county, [Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County]), IsNo..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [cd_demo_sk#16, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] + +- *(8) Filter isnotnull(cd_demo_sk#16) + +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#16,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5,cd_dep_count#6,cd_dep_employed_count#7,cd_dep_college_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] - -(3) Filter [codegen id : 3] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(4) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] - -(6) Filter [codegen id : 1] -Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] -Condition : (isnotnull(ss_customer_sk#10) AND isnotnull(ss_sold_date_sk#9)) - -(7) BroadcastExchange -Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#13] - -(8) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#10] -Join condition: None - -(9) Project [codegen id : 3] -Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12] -Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] - -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#14, d_year#15] - -(12) Filter [codegen id : 2] -Input [2]: [d_date_sk#14, d_year#15] -Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2001)) AND isnotnull(d_date_sk#14)) - -(13) BroadcastExchange -Input [2]: [d_date_sk#14, d_year#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] - -(14) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#9] -Right keys [1]: [d_date_sk#14] -Join condition: None - -(15) Project [codegen id : 3] -Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12, d_date_sk#14, d_year#15] - -(16) HashAggregate [codegen id : 3] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] -Aggregate Attributes [1]: [sum#17] -Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] - -(17) Exchange -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), true, [id=#19] - -(18) HashAggregate [codegen id : 16] -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#20] -Results [2]: [c_customer_id#2 AS customer_id#21, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#20,18,2) AS year_total#22] - -(19) Filter [codegen id : 16] -Input [2]: [customer_id#21, year_total#22] -Condition : (isnotnull(year_total#22) AND (year_total#22 > 0.00)) - -(20) Scan parquet default.customer -Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(21) ColumnarToRow [codegen id : 6] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] - -(22) Filter [codegen id : 6] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(23) ReusedExchange [Reuses operator id: 7] -Output [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] - -(24) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#10] -Join condition: None - -(25) Project [codegen id : 6] -Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12] -Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] - -(26) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] -ReadSchema: struct - -(27) ColumnarToRow [codegen id : 5] -Input [2]: [d_date_sk#14, d_year#15] - -(28) Filter [codegen id : 5] -Input [2]: [d_date_sk#14, d_year#15] -Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14)) - -(29) BroadcastExchange -Input [2]: [d_date_sk#14, d_year#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] - -(30) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#9] -Right keys [1]: [d_date_sk#14] -Join condition: None - -(31) Project [codegen id : 6] -Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12, d_date_sk#14, d_year#15] - -(32) HashAggregate [codegen id : 6] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] -Aggregate Attributes [1]: [sum#24] -Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] - -(33) Exchange -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), true, [id=#26] - -(34) HashAggregate [codegen id : 7] -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#27] -Results [3]: [c_customer_id#2 AS customer_id#28, c_preferred_cust_flag#5 AS customer_preferred_cust_flag#29, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#27,18,2) AS year_total#30] - -(35) BroadcastExchange -Input [3]: [customer_id#28, customer_preferred_cust_flag#29, year_total#30] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#31] - -(36) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#21] -Right keys [1]: [customer_id#28] -Join condition: None - -(37) Project [codegen id : 16] -Output [4]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30] -Input [5]: [customer_id#21, year_total#22, customer_id#28, customer_preferred_cust_flag#29, year_total#30] - -(38) Scan parquet default.customer -Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(39) ColumnarToRow [codegen id : 10] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] - -(40) Filter [codegen id : 10] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(41) Scan parquet default.web_sales -Output [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(42) ColumnarToRow [codegen id : 8] -Input [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] - -(43) Filter [codegen id : 8] -Input [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] -Condition : (isnotnull(ws_bill_customer_sk#33) AND isnotnull(ws_sold_date_sk#32)) - -(44) BroadcastExchange -Input [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#36] - -(45) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ws_bill_customer_sk#33] -Join condition: None - -(46) Project [codegen id : 10] -Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35] -Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] - -(47) ReusedExchange [Reuses operator id: 13] -Output [2]: [d_date_sk#14, d_year#15] - -(48) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ws_sold_date_sk#32] -Right keys [1]: [d_date_sk#14] -Join condition: None - -(49) Project [codegen id : 10] -Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35, d_date_sk#14, d_year#15] - -(50) HashAggregate [codegen id : 10] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] -Aggregate Attributes [1]: [sum#37] -Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#38] - -(51) Exchange -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#38] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, 5), true, [id=#39] - -(52) HashAggregate [codegen id : 11] -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#38] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#40] -Results [2]: [c_customer_id#2 AS customer_id#41, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#40,18,2) AS year_total#42] - -(53) Filter [codegen id : 11] -Input [2]: [customer_id#41, year_total#42] -Condition : (isnotnull(year_total#42) AND (year_total#42 > 0.00)) - -(54) Project [codegen id : 11] -Output [2]: [customer_id#41 AS customer_id#43, year_total#42 AS year_total#44] -Input [2]: [customer_id#41, year_total#42] - -(55) BroadcastExchange -Input [2]: [customer_id#43, year_total#44] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#45] - -(56) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#21] -Right keys [1]: [customer_id#43] -Join condition: None - -(57) Project [codegen id : 16] -Output [5]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30, year_total#44] -Input [6]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30, customer_id#43, year_total#44] - -(58) Scan parquet default.customer -Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(59) ColumnarToRow [codegen id : 14] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] - -(60) Filter [codegen id : 14] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(61) ReusedExchange [Reuses operator id: 44] -Output [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] - -(62) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ws_bill_customer_sk#33] -Join condition: None - -(63) Project [codegen id : 14] -Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35] -Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] - -(64) ReusedExchange [Reuses operator id: 29] -Output [2]: [d_date_sk#14, d_year#15] - -(65) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_sold_date_sk#32] -Right keys [1]: [d_date_sk#14] -Join condition: None - -(66) Project [codegen id : 14] -Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35, d_date_sk#14, d_year#15] - -(67) HashAggregate [codegen id : 14] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] -Aggregate Attributes [1]: [sum#46] -Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#47] - -(68) Exchange -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#47] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, 5), true, [id=#48] - -(69) HashAggregate [codegen id : 15] -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#47] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#49] -Results [2]: [c_customer_id#2 AS customer_id#50, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#49,18,2) AS year_total#51] - -(70) BroadcastExchange -Input [2]: [customer_id#50, year_total#51] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#52] - -(71) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#21] -Right keys [1]: [customer_id#50] -Join condition: (CASE WHEN (year_total#44 > 0.00) THEN CheckOverflow((promote_precision(year_total#51) / promote_precision(year_total#44)), DecimalType(38,20), true) ELSE null END > CASE WHEN (year_total#22 > 0.00) THEN CheckOverflow((promote_precision(year_total#30) / promote_precision(year_total#22)), DecimalType(38,20), true) ELSE null END) - -(72) Project [codegen id : 16] -Output [1]: [customer_preferred_cust_flag#29] -Input [7]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30, year_total#44, customer_id#50, year_total#51] - -(73) TakeOrderedAndProject -Input [1]: [customer_preferred_cust_flag#29] -Arguments: 100, [customer_preferred_cust_flag#29 ASC NULLS FIRST], [customer_preferred_cust_flag#29] - +TakeOrderedAndProject(limit=100, orderBy=[customer_preferred_cust_flag#1 ASC NULLS FIRST], output=[customer_preferred_cust_flag#1]) ++- *(17) Project [customer_preferred_cust_flag#1] + +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#3], Inner, BuildRight, (CASE WHEN (year_total#4 > 0.00) THEN CheckOverflow((promote_precision(year_total#5) / promote_precision(year_total#4)), DecimalType(38,20)) ELSE null END > CASE WHEN (year_total#6 > 0.00) THEN CheckOverflow((promote_precision(year_total#7) / promote_precision(year_total#6)), DecimalType(38,20)) ELSE null END) + :- *(17) Project [customer_id#2, year_total#6, customer_preferred_cust_flag#1, year_total#7, year_total#4] + : +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#8], Inner, BuildRight + : :- *(17) Project [customer_id#2, year_total#6, customer_preferred_cust_flag#1, year_total#7] + : : +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#9], Inner, BuildRight + : : :- Union + : : : :- *(4) Filter (isnotnull(year_total#6) && (year_total#6 > 0.00)) + : : : : +- *(4) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 5) + : : : : +- *(3) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : : +- *(3) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : : :- *(3) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_sold_date_sk#20, ss_ext_discount_amt#19, ss_ext_list_price#18] + : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#22], [ss_customer_sk#23], Inner, BuildRight + : : : : : :- *(3) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : +- LocalTableScan , [customer_id#24, year_total#25] + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- Union + : : :- *(8) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 5) + : : : +- *(7) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) + : : : +- *(7) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] + : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : :- *(7) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_sold_date_sk#20, ss_ext_discount_amt#19, ss_ext_list_price#18] + : : : : +- *(7) BroadcastHashJoin [c_customer_sk#22], [ss_customer_sk#23], Inner, BuildRight + : : : : :- *(7) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : : : : +- *(7) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : +- LocalTableScan , [customer_id#24, customer_preferred_cust_flag#26, year_total#25] + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- Union + : :- LocalTableScan , [customer_id#8, year_total#4] + : +- *(12) Filter (isnotnull(year_total#25) && (year_total#25 > 0.00)) + : +- *(12) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 5) + : +- *(11) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + : +- *(11) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] + : +- *(11) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight + : :- *(11) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_sold_date_sk#29, ws_ext_discount_amt#28, ws_ext_list_price#27] + : : +- *(11) BroadcastHashJoin [c_customer_sk#22], [ws_bill_customer_sk#30], Inner, BuildRight + : : :- *(11) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : : +- *(11) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : : +- *(11) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#3, year_total#5] + +- *(16) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 5) + +- *(15) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) + +- *(15) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] + +- *(15) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight + :- *(15) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_sold_date_sk#29, ws_ext_discount_amt#28, ws_ext_list_price#27] + : +- *(15) BroadcastHashJoin [c_customer_sk#22], [ws_bill_customer_sk#30], Inner, BuildRight + : :- *(15) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] + : : +- *(15) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) + : : +- *(15) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] - -(3) Filter [codegen id : 3] -Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] -Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) - -(4) Scan parquet default.item -Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] - -(6) Filter [codegen id : 1] -Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) - -(7) BroadcastExchange -Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] - -(8) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_item_sk#2] -Right keys [1]: [i_item_sk#4] -Join condition: None - -(9) Project [codegen id : 3] -Output [7]: [ws_sold_date_sk#1, ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Input [9]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] - -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#11, d_date#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#11, d_date#12] - -(12) Filter [codegen id : 2] -Input [2]: [d_date_sk#11, d_date#12] -Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) - -(13) Project [codegen id : 2] -Output [1]: [d_date_sk#11] -Input [2]: [d_date_sk#11, d_date#12] - -(14) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(15) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_sold_date_sk#1] -Right keys [1]: [d_date_sk#11] -Join condition: None - -(16) Project [codegen id : 3] -Output [6]: [ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Input [8]: [ws_sold_date_sk#1, ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] - -(17) HashAggregate [codegen id : 3] -Input [6]: [ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#14] -Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] - -(18) Exchange -Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] -Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] - -(19) HashAggregate [codegen id : 4] -Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] -Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#3))#17] -Results [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS _w1#20, i_item_id#5] - -(20) Exchange -Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] -Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] - -(21) Sort [codegen id : 5] -Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] -Arguments: [i_class#8 ASC NULLS FIRST], false, 0 - -(22) Window -Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] -Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] - -(23) Project [codegen id : 6] -Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#5] -Input [9]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5, _we0#22] - -(24) TakeOrderedAndProject -Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] -Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] - +TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 ASC NULLS FIRST,i_item_id#3 ASC NULLS FIRST,i_item_desc#4 ASC NULLS FIRST,revenueratio#5 ASC NULLS FIRST], output=[i_item_desc#4,i_category#1,i_class#2,i_current_price#6,itemrevenue#7,revenueratio#5]) ++- *(6) Project [i_item_desc#4, i_category#1, i_class#2, i_current_price#6, itemrevenue#7, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#5, i_item_id#3] + +- Window [sum(_w1#10) windowspecdefinition(i_class#2, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#2] + +- *(5) Sort [i_class#2 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_class#2, 5) + +- *(4) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[sum(UnscaledValue(ws_ext_sales_price#11))]) + +- Exchange hashpartitioning(i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6, 5) + +- *(3) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#11))]) + +- *(3) Project [ws_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + :- *(3) Project [ws_sold_date_sk#12, ws_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + : +- *(3) BroadcastHashJoin [ws_item_sk#14], [i_item_sk#15], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#14, ws_ext_sales_price#11] + : : +- *(3) Filter (isnotnull(ws_item_sk#14) && isnotnull(ws_sold_date_sk#12)) + : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#14,ws_ext_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) + : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt index f67bfe65f..b815e3d91 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt @@ -1,38 +1,34 @@ -TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] - WholeStageCodegen (6) - Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] + WholeStageCodegen + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] InputAdapter Window [_w1,i_class] - WholeStageCodegen (5) + WholeStageCodegen Sort [i_class] InputAdapter Exchange [i_class] #1 - WholeStageCodegen (4) - HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,_w1,sum] + WholeStageCodegen + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ws_ext_sales_price))] InputAdapter - Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 - WholeStageCodegen (3) - HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ws_ext_sales_price] [sum,sum] - Project [ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Filter [ws_item_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 + WholeStageCodegen + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [i_category,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + WholeStageCodegen + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + Filter [i_category,i_item_sk] + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt index d0c4e0e49..1e61c9ee5 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt @@ -1,216 +1,37 @@ == Physical Plan == -* HashAggregate (38) -+- Exchange (37) - +- * HashAggregate (36) - +- * Project (35) - +- * BroadcastHashJoin Inner BuildRight (34) - :- * Project (29) - : +- * BroadcastHashJoin Inner BuildRight (28) - : :- * Project (23) - : : +- * BroadcastHashJoin Inner BuildRight (22) - : : :- * Project (16) - : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : :- * Project (9) - : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.store_sales (1) - : : : : +- BroadcastExchange (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.store (4) - : : : +- BroadcastExchange (14) - : : : +- * Project (13) - : : : +- * Filter (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.customer_address (10) - : : +- BroadcastExchange (21) - : : +- * Project (20) - : : +- * Filter (19) - : : +- * ColumnarToRow (18) - : : +- Scan parquet default.date_dim (17) - : +- BroadcastExchange (27) - : +- * Filter (26) - : +- * ColumnarToRow (25) - : +- Scan parquet default.customer_demographics (24) - +- BroadcastExchange (33) - +- * Filter (32) - +- * ColumnarToRow (31) - +- Scan parquet default.household_demographics (30) - - -(1) Scan parquet default.store_sales -Output [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_hdemo_sk), Or(Or(And(GreaterThanOrEqual(ss_net_profit,100.00),LessThanOrEqual(ss_net_profit,200.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,300.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,250.00))), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00)))] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 6] -Input [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] - -(3) Filter [codegen id : 6] -Input [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] -Condition : ((((((isnotnull(ss_store_sk#5) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_cdemo_sk#2)) AND isnotnull(ss_hdemo_sk#3)) AND ((((ss_net_profit#10 >= 100.00) AND (ss_net_profit#10 <= 200.00)) OR ((ss_net_profit#10 >= 150.00) AND (ss_net_profit#10 <= 300.00))) OR ((ss_net_profit#10 >= 50.00) AND (ss_net_profit#10 <= 250.00)))) AND ((((ss_sales_price#7 >= 100.00) AND (ss_sales_price#7 <= 150.00)) OR ((ss_sales_price#7 >= 50.00) AND (ss_sales_price#7 <= 100.00))) OR ((ss_sales_price#7 >= 150.00) AND (ss_sales_price#7 <= 200.00)))) - -(4) Scan parquet default.store -Output [1]: [s_store_sk#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [1]: [s_store_sk#11] - -(6) Filter [codegen id : 1] -Input [1]: [s_store_sk#11] -Condition : isnotnull(s_store_sk#11) - -(7) BroadcastExchange -Input [1]: [s_store_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] - -(8) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_store_sk#5] -Right keys [1]: [s_store_sk#11] -Join condition: None - -(9) Project [codegen id : 6] -Output [9]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] -Input [11]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, s_store_sk#11] - -(10) Scan parquet default.customer_address -Output [3]: [ca_address_sk#13, ca_state#14, ca_country#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [TX,OH]),In(ca_state, [OR,NM,KY])),In(ca_state, [VA,TX,MS]))] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] - -(12) Filter [codegen id : 2] -Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] -Condition : (((isnotnull(ca_country#15) AND (ca_country#15 = United States)) AND isnotnull(ca_address_sk#13)) AND ((ca_state#14 IN (TX,OH) OR ca_state#14 IN (OR,NM,KY)) OR ca_state#14 IN (VA,TX,MS))) - -(13) Project [codegen id : 2] -Output [2]: [ca_address_sk#13, ca_state#14] -Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] - -(14) BroadcastExchange -Input [2]: [ca_address_sk#13, ca_state#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(15) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_addr_sk#4] -Right keys [1]: [ca_address_sk#13] -Join condition: ((((ca_state#14 IN (TX,OH) AND (ss_net_profit#10 >= 100.00)) AND (ss_net_profit#10 <= 200.00)) OR ((ca_state#14 IN (OR,NM,KY) AND (ss_net_profit#10 >= 150.00)) AND (ss_net_profit#10 <= 300.00))) OR ((ca_state#14 IN (VA,TX,MS) AND (ss_net_profit#10 >= 50.00)) AND (ss_net_profit#10 <= 250.00))) - -(16) Project [codegen id : 6] -Output [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] -Input [11]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, ca_address_sk#13, ca_state#14] - -(17) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_year#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#17, d_year#18] - -(19) Filter [codegen id : 3] -Input [2]: [d_date_sk#17, d_year#18] -Condition : ((isnotnull(d_year#18) AND (d_year#18 = 2001)) AND isnotnull(d_date_sk#17)) - -(20) Project [codegen id : 3] -Output [1]: [d_date_sk#17] -Input [2]: [d_date_sk#17, d_year#18] - -(21) BroadcastExchange -Input [1]: [d_date_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] - -(22) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#17] -Join condition: None - -(23) Project [codegen id : 6] -Output [6]: [ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] -Input [8]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, d_date_sk#17] - -(24) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] -ReadSchema: struct - -(25) ColumnarToRow [codegen id : 4] -Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] - -(26) Filter [codegen id : 4] -Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] -Condition : (isnotnull(cd_demo_sk#20) AND ((((cd_marital_status#21 = M) AND (cd_education_status#22 = Advanced Degree)) OR ((cd_marital_status#21 = S) AND (cd_education_status#22 = College))) OR ((cd_marital_status#21 = W) AND (cd_education_status#22 = 2 yr Degree)))) - -(27) BroadcastExchange -Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] - -(28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#20] -Join condition: ((((((cd_marital_status#21 = M) AND (cd_education_status#22 = Advanced Degree)) AND (ss_sales_price#7 >= 100.00)) AND (ss_sales_price#7 <= 150.00)) OR ((((cd_marital_status#21 = S) AND (cd_education_status#22 = College)) AND (ss_sales_price#7 >= 50.00)) AND (ss_sales_price#7 <= 100.00))) OR ((((cd_marital_status#21 = W) AND (cd_education_status#22 = 2 yr Degree)) AND (ss_sales_price#7 >= 150.00)) AND (ss_sales_price#7 <= 200.00))) - -(29) Project [codegen id : 6] -Output [7]: [ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, cd_marital_status#21, cd_education_status#22] -Input [9]: [ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] - -(30) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#24, hd_dep_count#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_demo_sk), Or(Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1)),EqualTo(hd_dep_count,1))] -ReadSchema: struct - -(31) ColumnarToRow [codegen id : 5] -Input [2]: [hd_demo_sk#24, hd_dep_count#25] - -(32) Filter [codegen id : 5] -Input [2]: [hd_demo_sk#24, hd_dep_count#25] -Condition : (isnotnull(hd_demo_sk#24) AND (((hd_dep_count#25 = 3) OR (hd_dep_count#25 = 1)) OR (hd_dep_count#25 = 1))) - -(33) BroadcastExchange -Input [2]: [hd_demo_sk#24, hd_dep_count#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] - -(34) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#24] -Join condition: (((((((cd_marital_status#21 = M) AND (cd_education_status#22 = Advanced Degree)) AND (ss_sales_price#7 >= 100.00)) AND (ss_sales_price#7 <= 150.00)) AND (hd_dep_count#25 = 3)) OR (((((cd_marital_status#21 = S) AND (cd_education_status#22 = College)) AND (ss_sales_price#7 >= 50.00)) AND (ss_sales_price#7 <= 100.00)) AND (hd_dep_count#25 = 1))) OR (((((cd_marital_status#21 = W) AND (cd_education_status#22 = 2 yr Degree)) AND (ss_sales_price#7 >= 150.00)) AND (ss_sales_price#7 <= 200.00)) AND (hd_dep_count#25 = 1))) - -(35) Project [codegen id : 6] -Output [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] -Input [9]: [ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, cd_marital_status#21, cd_education_status#22, hd_demo_sk#24, hd_dep_count#25] - -(36) HashAggregate [codegen id : 6] -Input [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] -Keys: [] -Functions [4]: [partial_avg(cast(ss_quantity#6 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#8)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#9)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#9))] -Aggregate Attributes [7]: [sum#27, count#28, sum#29, count#30, sum#31, count#32, sum#33] -Results [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] - -(37) Exchange -Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] -Arguments: SinglePartition, true, [id=#41] - -(38) HashAggregate [codegen id : 7] -Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] -Keys: [] -Functions [4]: [avg(cast(ss_quantity#6 as bigint)), avg(UnscaledValue(ss_ext_sales_price#8)), avg(UnscaledValue(ss_ext_wholesale_cost#9)), sum(UnscaledValue(ss_ext_wholesale_cost#9))] -Aggregate Attributes [4]: [avg(cast(ss_quantity#6 as bigint))#42, avg(UnscaledValue(ss_ext_sales_price#8))#43, avg(UnscaledValue(ss_ext_wholesale_cost#9))#44, sum(UnscaledValue(ss_ext_wholesale_cost#9))#45] -Results [4]: [avg(cast(ss_quantity#6 as bigint))#42 AS avg(ss_quantity)#46, cast((avg(UnscaledValue(ss_ext_sales_price#8))#43 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#47, cast((avg(UnscaledValue(ss_ext_wholesale_cost#9))#44 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#48, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#9))#45,17,2) AS sum(ss_ext_wholesale_cost)#49] - +*(7) HashAggregate(keys=[], functions=[avg(cast(ss_quantity#1 as bigint)), avg(UnscaledValue(ss_ext_sales_price#2)), avg(UnscaledValue(ss_ext_wholesale_cost#3)), sum(UnscaledValue(ss_ext_wholesale_cost#3))]) ++- Exchange SinglePartition + +- *(6) HashAggregate(keys=[], functions=[partial_avg(cast(ss_quantity#1 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#2)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#3)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#3))]) + +- *(6) Project [ss_quantity#1, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] + +- *(6) BroadcastHashJoin [ss_hdemo_sk#4], [hd_demo_sk#5], Inner, BuildRight, (((((((cd_marital_status#6 = M) && (cd_education_status#7 = Advanced Degree)) && (ss_sales_price#8 >= 100.00)) && (ss_sales_price#8 <= 150.00)) && (hd_dep_count#9 = 3)) || (((((cd_marital_status#6 = S) && (cd_education_status#7 = College)) && (ss_sales_price#8 >= 50.00)) && (ss_sales_price#8 <= 100.00)) && (hd_dep_count#9 = 1))) || (((((cd_marital_status#6 = W) && (cd_education_status#7 = 2 yr Degree)) && (ss_sales_price#8 >= 150.00)) && (ss_sales_price#8 <= 200.00)) && (hd_dep_count#9 = 1))) + :- *(6) Project [ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, cd_marital_status#6, cd_education_status#7] + : +- *(6) BroadcastHashJoin [ss_cdemo_sk#10], [cd_demo_sk#11], Inner, BuildRight + : :- *(6) Project [ss_cdemo_sk#10, ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] + : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] + : : : +- *(6) BroadcastHashJoin [ss_addr_sk#14], [ca_address_sk#15], Inner, BuildRight, ((((ca_state#16 IN (TX,OH) && (ss_net_profit#17 >= 100.00)) && (ss_net_profit#17 <= 200.00)) || ((ca_state#16 IN (OR,NM,KY) && (ss_net_profit#17 >= 150.00)) && (ss_net_profit#17 <= 300.00))) || ((ca_state#16 IN (VA,TX,MS) && (ss_net_profit#17 >= 50.00)) && (ss_net_profit#17 <= 250.00))) + : : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_addr_sk#14, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, ss_net_profit#17] + : : : : +- *(6) BroadcastHashJoin [ss_store_sk#18], [s_store_sk#19], Inner, BuildRight + : : : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_addr_sk#14, ss_store_sk#18, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, ss_net_profit#17] + : : : : : +- *(6) Filter ((((isnotnull(ss_store_sk#18) && isnotnull(ss_addr_sk#14)) && isnotnull(ss_sold_date_sk#12)) && isnotnull(ss_cdemo_sk#10)) && isnotnull(ss_hdemo_sk#4)) + : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_cdemo_sk#10,ss_hdemo_sk#4,ss_addr_sk#14,ss_store_sk#18,ss_quantity#1,ss_sales_price#8,ss_ext_sales_price#2,ss_ext_wholesale_cost#3,ss_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#15, ca_state#16] + : : : +- *(2) Filter ((isnotnull(ca_country#20) && (ca_country#20 = United States)) && isnotnull(ca_address_sk#15)) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#13] + : : +- *(3) Filter ((isnotnull(d_year#21) && (d_year#21 = 2001)) && isnotnull(d_date_sk#13)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#13,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [cd_demo_sk#11, cd_marital_status#6, cd_education_status#7] + : +- *(4) Filter isnotnull(cd_demo_sk#11) + : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#6,cd_education_status#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [hd_demo_sk#5, hd_dep_count#9] + +- *(5) Filter isnotnull(hd_demo_sk#5) + +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#5,hd_dep_count#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt index 6e3a322d8..cd146d06c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt @@ -1,57 +1,49 @@ -WholeStageCodegen (7) - HashAggregate [sum,count,sum,count,sum,count,sum] [avg(cast(ss_quantity as bigint)),avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),sum(UnscaledValue(ss_ext_wholesale_cost)),avg(ss_quantity),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),sum(ss_ext_wholesale_cost),sum,count,sum,count,sum,count,sum] +WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),avg(cast(ss_quantity as bigint)),count,count,count,sum,sum,sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost))] [avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),avg(cast(ss_quantity as bigint)),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),avg(ss_quantity),count,count,count,sum,sum,sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum(ss_ext_wholesale_cost)] InputAdapter Exchange #1 - WholeStageCodegen (6) - HashAggregate [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] [sum,count,sum,count,sum,count,sum,sum,count,sum,count,sum,count,sum] - Project [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price,hd_dep_count] - Project [ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_marital_status,cd_education_status] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price] - Project [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost] - BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] - Project [ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Filter [ss_store_sk,ss_addr_sk,ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit] + WholeStageCodegen + HashAggregate [count,count,count,count,count,count,ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity] + BroadcastHashJoin [cd_education_status,cd_marital_status,hd_demo_sk,hd_dep_count,ss_hdemo_sk,ss_sales_price] + Project [cd_education_status,cd_marital_status,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] + Project [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_addr_sk,ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk] + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [ca_address_sk,ca_state] - Filter [ca_country,ca_address_sk,ca_state] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] + Filter [ca_address_sk,ca_country] + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) - Filter [cd_demo_sk,cd_marital_status,cd_education_status] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + WholeStageCodegen + Project [cd_demo_sk,cd_education_status,cd_marital_status] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] InputAdapter BroadcastExchange #6 - WholeStageCodegen (5) - Filter [hd_demo_sk,hd_dep_count] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] + WholeStageCodegen + Project [hd_demo_sk,hd_dep_count] + Filter [hd_demo_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt index 3f0cc9e7a..0acde64df 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt @@ -1,798 +1,192 @@ == Physical Plan == -TakeOrderedAndProject (115) -+- * HashAggregate (114) - +- Exchange (113) - +- * HashAggregate (112) - +- * Expand (111) - +- Union (110) - :- * Project (77) - : +- * Filter (76) - : +- * HashAggregate (75) - : +- Exchange (74) - : +- * HashAggregate (73) - : +- * Project (72) - : +- * BroadcastHashJoin Inner BuildRight (71) - : :- * Project (65) - : : +- * BroadcastHashJoin Inner BuildRight (64) - : : :- * BroadcastHashJoin LeftSemi BuildRight (57) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (56) - : : : +- * Project (55) - : : : +- * BroadcastHashJoin Inner BuildRight (54) - : : : :- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.item (4) - : : : +- BroadcastExchange (53) - : : : +- * HashAggregate (52) - : : : +- * HashAggregate (51) - : : : +- * HashAggregate (50) - : : : +- Exchange (49) - : : : +- * HashAggregate (48) - : : : +- * BroadcastHashJoin LeftSemi BuildRight (47) - : : : :- * BroadcastHashJoin LeftSemi BuildRight (36) - : : : : :- * Project (22) - : : : : : +- * BroadcastHashJoin Inner BuildRight (21) - : : : : : :- * Project (15) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : : : :- * Filter (9) - : : : : : : : +- * ColumnarToRow (8) - : : : : : : : +- Scan parquet default.store_sales (7) - : : : : : : +- BroadcastExchange (13) - : : : : : : +- * Filter (12) - : : : : : : +- * ColumnarToRow (11) - : : : : : : +- Scan parquet default.item (10) - : : : : : +- BroadcastExchange (20) - : : : : : +- * Project (19) - : : : : : +- * Filter (18) - : : : : : +- * ColumnarToRow (17) - : : : : : +- Scan parquet default.date_dim (16) - : : : : +- BroadcastExchange (35) - : : : : +- * Project (34) - : : : : +- * BroadcastHashJoin Inner BuildRight (33) - : : : : :- * Project (31) - : : : : : +- * BroadcastHashJoin Inner BuildRight (30) - : : : : : :- * Filter (25) - : : : : : : +- * ColumnarToRow (24) - : : : : : : +- Scan parquet default.catalog_sales (23) - : : : : : +- BroadcastExchange (29) - : : : : : +- * Filter (28) - : : : : : +- * ColumnarToRow (27) - : : : : : +- Scan parquet default.item (26) - : : : : +- ReusedExchange (32) - : : : +- BroadcastExchange (46) - : : : +- * Project (45) - : : : +- * BroadcastHashJoin Inner BuildRight (44) - : : : :- * Project (42) - : : : : +- * BroadcastHashJoin Inner BuildRight (41) - : : : : :- * Filter (39) - : : : : : +- * ColumnarToRow (38) - : : : : : +- Scan parquet default.web_sales (37) - : : : : +- ReusedExchange (40) - : : : +- ReusedExchange (43) - : : +- BroadcastExchange (63) - : : +- * BroadcastHashJoin LeftSemi BuildRight (62) - : : :- * Filter (60) - : : : +- * ColumnarToRow (59) - : : : +- Scan parquet default.item (58) - : : +- ReusedExchange (61) - : +- BroadcastExchange (70) - : +- * Project (69) - : +- * Filter (68) - : +- * ColumnarToRow (67) - : +- Scan parquet default.date_dim (66) - :- * Project (93) - : +- * Filter (92) - : +- * HashAggregate (91) - : +- Exchange (90) - : +- * HashAggregate (89) - : +- * Project (88) - : +- * BroadcastHashJoin Inner BuildRight (87) - : :- * Project (85) - : : +- * BroadcastHashJoin Inner BuildRight (84) - : : :- * BroadcastHashJoin LeftSemi BuildRight (82) - : : : :- * Filter (80) - : : : : +- * ColumnarToRow (79) - : : : : +- Scan parquet default.catalog_sales (78) - : : : +- ReusedExchange (81) - : : +- ReusedExchange (83) - : +- ReusedExchange (86) - +- * Project (109) - +- * Filter (108) - +- * HashAggregate (107) - +- Exchange (106) - +- * HashAggregate (105) - +- * Project (104) - +- * BroadcastHashJoin Inner BuildRight (103) - :- * Project (101) - : +- * BroadcastHashJoin Inner BuildRight (100) - : :- * BroadcastHashJoin LeftSemi BuildRight (98) - : : :- * Filter (96) - : : : +- * ColumnarToRow (95) - : : : +- Scan parquet default.web_sales (94) - : : +- ReusedExchange (97) - : +- ReusedExchange (99) - +- ReusedExchange (102) - - -(1) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 25] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] - -(3) Filter [codegen id : 25] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] -Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) - -(4) Scan parquet default.item -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 11] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(6) Filter [codegen id : 11] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) - -(7) Scan parquet default.store_sales -Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(8) ColumnarToRow [codegen id : 9] -Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] - -(9) Filter [codegen id : 9] -Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] -Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) - -(10) Scan parquet default.item -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 1] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(12) Filter [codegen id : 1] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_brand_id#6)) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) - -(13) BroadcastExchange -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] - -(14) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(15) Project [codegen id : 9] -Output [4]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(16) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_year#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#10, d_year#11] - -(18) Filter [codegen id : 2] -Input [2]: [d_date_sk#10, d_year#11] -Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) - -(19) Project [codegen id : 2] -Output [1]: [d_date_sk#10] -Input [2]: [d_date_sk#10, d_year#11] - -(20) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(21) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(22) Project [codegen id : 9] -Output [3]: [i_brand_id#6 AS brand_id#13, i_class_id#7 AS class_id#14, i_category_id#8 AS category_id#15] -Input [5]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] - -(23) Scan parquet default.catalog_sales -Output [2]: [cs_sold_date_sk#16, cs_item_sk#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 5] -Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] - -(25) Filter [codegen id : 5] -Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] -Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) - -(26) Scan parquet default.item -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(27) ColumnarToRow [codegen id : 3] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(28) Filter [codegen id : 3] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Condition : isnotnull(i_item_sk#5) - -(29) BroadcastExchange -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] - -(30) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_item_sk#17] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(31) Project [codegen id : 5] -Output [4]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [6]: [cs_sold_date_sk#16, cs_item_sk#17, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(32) ReusedExchange [Reuses operator id: 20] -Output [1]: [d_date_sk#10] - -(33) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#16] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(34) Project [codegen id : 5] -Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Input [5]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] - -(35) BroadcastExchange -Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#19] - -(36) BroadcastHashJoin [codegen id : 9] -Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] -Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] -Join condition: None - -(37) Scan parquet default.web_sales -Output [2]: [ws_sold_date_sk#20, ws_item_sk#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(38) ColumnarToRow [codegen id : 8] -Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] - -(39) Filter [codegen id : 8] -Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] -Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) - -(40) ReusedExchange [Reuses operator id: 29] -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(41) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_item_sk#21] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(42) Project [codegen id : 8] -Output [4]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [6]: [ws_sold_date_sk#20, ws_item_sk#21, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(43) ReusedExchange [Reuses operator id: 20] -Output [1]: [d_date_sk#10] - -(44) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_sold_date_sk#20] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(45) Project [codegen id : 8] -Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Input [5]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] - -(46) BroadcastExchange -Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#22] - -(47) BroadcastHashJoin [codegen id : 9] -Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] -Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] -Join condition: None - -(48) HashAggregate [codegen id : 9] -Input [3]: [brand_id#13, class_id#14, category_id#15] -Keys [3]: [brand_id#13, class_id#14, category_id#15] -Functions: [] -Aggregate Attributes: [] -Results [3]: [brand_id#13, class_id#14, category_id#15] - -(49) Exchange -Input [3]: [brand_id#13, class_id#14, category_id#15] -Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), true, [id=#23] - -(50) HashAggregate [codegen id : 10] -Input [3]: [brand_id#13, class_id#14, category_id#15] -Keys [3]: [brand_id#13, class_id#14, category_id#15] -Functions: [] -Aggregate Attributes: [] -Results [3]: [brand_id#13, class_id#14, category_id#15] - -(51) HashAggregate [codegen id : 10] -Input [3]: [brand_id#13, class_id#14, category_id#15] -Keys [3]: [brand_id#13, class_id#14, category_id#15] -Functions: [] -Aggregate Attributes: [] -Results [3]: [brand_id#13, class_id#14, category_id#15] - -(52) HashAggregate [codegen id : 10] -Input [3]: [brand_id#13, class_id#14, category_id#15] -Keys [3]: [brand_id#13, class_id#14, category_id#15] -Functions: [] -Aggregate Attributes: [] -Results [3]: [brand_id#13, class_id#14, category_id#15] - -(53) BroadcastExchange -Input [3]: [brand_id#13, class_id#14, category_id#15] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#24] - -(54) BroadcastHashJoin [codegen id : 11] -Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Right keys [3]: [brand_id#13, class_id#14, category_id#15] -Join condition: None - -(55) Project [codegen id : 11] -Output [1]: [i_item_sk#5 AS ss_item_sk#25] -Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#13, class_id#14, category_id#15] - -(56) BroadcastExchange -Input [1]: [ss_item_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] - -(57) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [ss_item_sk#25] -Join condition: None - -(58) Scan parquet default.item -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(59) ColumnarToRow [codegen id : 23] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(60) Filter [codegen id : 23] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Condition : isnotnull(i_item_sk#5) - -(61) ReusedExchange [Reuses operator id: 56] -Output [1]: [ss_item_sk#25] - -(62) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [i_item_sk#5] -Right keys [1]: [ss_item_sk#25] -Join condition: None - -(63) BroadcastExchange -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] - -(64) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(65) Project [codegen id : 25] -Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(66) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#28] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)] -ReadSchema: struct - -(67) ColumnarToRow [codegen id : 24] -Input [3]: [d_date_sk#10, d_year#11, d_moy#28] - -(68) Filter [codegen id : 24] -Input [3]: [d_date_sk#10, d_year#11, d_moy#28] -Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#28)) AND (d_year#11 = 2001)) AND (d_moy#28 = 11)) AND isnotnull(d_date_sk#10)) - -(69) Project [codegen id : 24] -Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#11, d_moy#28] - -(70) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] - -(71) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(72) Project [codegen id : 25] -Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] - -(73) HashAggregate [codegen id : 25] -Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] -Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] -Aggregate Attributes [3]: [sum#30, isEmpty#31, count#32] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] - -(74) Exchange -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] -Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#36] - -(75) HashAggregate [codegen id : 26] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] -Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37, count(1)#38] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#39, count(1)#38 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] - -(76) Filter [codegen id : 26] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41 as decimal(32,6)) > cast(Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) - -(77) Project [codegen id : 26] -Output [6]: [sales#39, number_sales#40, store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] - -(78) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(79) ColumnarToRow [codegen id : 51] -Input [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] - -(80) Filter [codegen id : 51] -Input [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] -Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) - -(81) ReusedExchange [Reuses operator id: 56] -Output [1]: [ss_item_sk#25] - -(82) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_item_sk#17] -Right keys [1]: [ss_item_sk#25] -Join condition: None - -(83) ReusedExchange [Reuses operator id: 63] -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(84) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_item_sk#17] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(85) Project [codegen id : 51] -Output [6]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [8]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(86) ReusedExchange [Reuses operator id: 70] -Output [1]: [d_date_sk#10] - -(87) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_sold_date_sk#16] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(88) Project [codegen id : 51] -Output [5]: [cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [7]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] - -(89) HashAggregate [codegen id : 51] -Input [5]: [cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] -Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] -Aggregate Attributes [3]: [sum#47, isEmpty#48, count#49] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] - -(90) Exchange -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] -Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#53] - -(91) HashAggregate [codegen id : 52] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] -Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54, count(1)#55] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#56, count(1)#55 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] - -(92) Filter [codegen id : 52] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) - -(93) Project [codegen id : 52] -Output [6]: [sales#56, number_sales#57, catalog AS channel#59, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] - -(94) Scan parquet default.web_sales -Output [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(95) ColumnarToRow [codegen id : 77] -Input [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] - -(96) Filter [codegen id : 77] -Input [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] -Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) - -(97) ReusedExchange [Reuses operator id: 56] -Output [1]: [ss_item_sk#25] - -(98) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_item_sk#21] -Right keys [1]: [ss_item_sk#25] -Join condition: None - -(99) ReusedExchange [Reuses operator id: 63] -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(100) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_item_sk#21] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(101) Project [codegen id : 77] -Output [6]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [8]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(102) ReusedExchange [Reuses operator id: 70] -Output [1]: [d_date_sk#10] - -(103) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_sold_date_sk#20] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(104) Project [codegen id : 77] -Output [5]: [ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [7]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] - -(105) HashAggregate [codegen id : 77] -Input [5]: [ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] -Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] -Aggregate Attributes [3]: [sum#62, isEmpty#63, count#64] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] - -(106) Exchange -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] -Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#68] - -(107) HashAggregate [codegen id : 78] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] -Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69, count(1)#70] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#71, count(1)#70 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] - -(108) Filter [codegen id : 78] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) - -(109) Project [codegen id : 78] -Output [6]: [sales#71, number_sales#72, web AS channel#74, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] - -(110) Union - -(111) Expand [codegen id : 79] -Input [6]: [sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: [List(sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 0), List(sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, null, 1), List(sales#39, number_sales#40, channel#44, i_brand_id#6, null, null, 3), List(sales#39, number_sales#40, channel#44, null, null, null, 7), List(sales#39, number_sales#40, null, null, null, null, 15)], [sales#39, number_sales#40, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] - -(112) HashAggregate [codegen id : 79] -Input [7]: [sales#39, number_sales#40, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] -Keys [5]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] -Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)] -Aggregate Attributes [3]: [sum#80, isEmpty#81, sum#82] -Results [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85] - -(113) Exchange -Input [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85] -Arguments: hashpartitioning(channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, 5), true, [id=#86] - -(114) HashAggregate [codegen id : 80] -Input [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85] -Keys [5]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] -Functions [2]: [sum(sales#39), sum(number_sales#40)] -Aggregate Attributes [2]: [sum(sales#39)#87, sum(number_sales#40)#88] -Results [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales#39)#87 AS sum(sales)#89, sum(number_sales#40)#88 AS sum(number_sales)#90] - -(115) TakeOrderedAndProject -Input [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales)#89, sum(number_sales)#90] -Arguments: 100, [channel#75 ASC NULLS FIRST, i_brand_id#76 ASC NULLS FIRST, i_class_id#77 ASC NULLS FIRST, i_category_id#78 ASC NULLS FIRST], [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales)#89, sum(number_sales)#90] - -===== Subqueries ===== - -Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#42, [id=#43] -* HashAggregate (141) -+- Exchange (140) - +- * HashAggregate (139) - +- Union (138) - :- * Project (125) - : +- * BroadcastHashJoin Inner BuildRight (124) - : :- * Filter (118) - : : +- * ColumnarToRow (117) - : : +- Scan parquet default.store_sales (116) - : +- BroadcastExchange (123) - : +- * Project (122) - : +- * Filter (121) - : +- * ColumnarToRow (120) - : +- Scan parquet default.date_dim (119) - :- * Project (131) - : +- * BroadcastHashJoin Inner BuildRight (130) - : :- * Filter (128) - : : +- * ColumnarToRow (127) - : : +- Scan parquet default.catalog_sales (126) - : +- ReusedExchange (129) - +- * Project (137) - +- * BroadcastHashJoin Inner BuildRight (136) - :- * Filter (134) - : +- * ColumnarToRow (133) - : +- Scan parquet default.web_sales (132) - +- ReusedExchange (135) - - -(116) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(117) ColumnarToRow [codegen id : 2] -Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] - -(118) Filter [codegen id : 2] -Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] -Condition : isnotnull(ss_sold_date_sk#1) - -(119) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_year#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(120) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#10, d_year#11] - -(121) Filter [codegen id : 1] -Input [2]: [d_date_sk#10, d_year#11] -Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) - -(122) Project [codegen id : 1] -Output [1]: [d_date_sk#10] -Input [2]: [d_date_sk#10, d_year#11] - -(123) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#91] - -(124) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(125) Project [codegen id : 2] -Output [2]: [ss_quantity#3 AS quantity#92, ss_list_price#4 AS list_price#93] -Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] - -(126) Scan parquet default.catalog_sales -Output [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(127) ColumnarToRow [codegen id : 4] -Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] - -(128) Filter [codegen id : 4] -Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] -Condition : isnotnull(cs_sold_date_sk#16) - -(129) ReusedExchange [Reuses operator id: 123] -Output [1]: [d_date_sk#10] - -(130) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#16] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(131) Project [codegen id : 4] -Output [2]: [cs_quantity#45 AS quantity#94, cs_list_price#46 AS list_price#95] -Input [4]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, d_date_sk#10] - -(132) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(133) ColumnarToRow [codegen id : 6] -Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] - -(134) Filter [codegen id : 6] -Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] -Condition : isnotnull(ws_sold_date_sk#20) - -(135) ReusedExchange [Reuses operator id: 123] -Output [1]: [d_date_sk#10] - -(136) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#20] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(137) Project [codegen id : 6] -Output [2]: [ws_quantity#60 AS quantity#96, ws_list_price#61 AS list_price#97] -Input [4]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, d_date_sk#10] - -(138) Union - -(139) HashAggregate [codegen id : 7] -Input [2]: [quantity#92, list_price#93] -Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#98, count#99] -Results [2]: [sum#100, count#101] - -(140) Exchange -Input [2]: [sum#100, count#101] -Arguments: SinglePartition, true, [id=#102] - -(141) HashAggregate [codegen id : 8] -Input [2]: [sum#100, count#101] -Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))#103] -Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))#103 AS average_sales#104] - -Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] - -Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] - - +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sum(sales)#5,sum(number_sales)#6]) ++- *(80) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[sum(sales#8), sum(number_sales#9)]) + +- Exchange hashpartitioning(channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7, 5) + +- *(79) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[partial_sum(sales#8), partial_sum(number_sales#9)]) + +- *(79) Expand [List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13, 0), List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, null, 1), List(sales#8, number_sales#9, channel#10, i_brand_id#11, null, null, 3), List(sales#8, number_sales#9, channel#10, null, null, null, 7), List(sales#8, number_sales#9, null, null, null, null, 15)], [sales#8, number_sales#9, channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7] + +- Union + :- *(26) Project [sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(26) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16 as decimal(32,6)) > cast(Subquery subquery1662 as decimal(32,6)))) + : : +- Subquery subquery1662 + : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange SinglePartition + : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Union + : : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] + : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#20] + : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] + : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] + : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(26) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) + : +- *(25) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + : +- *(25) Project [ss_quantity#14, ss_list_price#15, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(25) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : :- *(25) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15, i_brand_id#11, i_class_id#12, i_category_id#13] + : : +- *(25) BroadcastHashJoin [ss_item_sk#32], [i_item_sk#33], Inner, BuildRight + : : :- *(25) BroadcastHashJoin [ss_item_sk#32], [ss_item_sk#34], LeftSemi, BuildRight + : : : :- *(25) Project [ss_sold_date_sk#19, ss_item_sk#32, ss_quantity#14, ss_list_price#15] + : : : : +- *(25) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) + : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(11) Project [i_item_sk#33 AS ss_item_sk#34] + : : : +- *(11) BroadcastHashJoin [i_brand_id#11, i_class_id#12, i_category_id#13], [brand_id#35, class_id#36, category_id#37], Inner, BuildRight + : : : :- *(11) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : +- *(11) Filter ((isnotnull(i_class_id#12) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) + : : : : +- *(11) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) + : : : :- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : : +- Exchange hashpartitioning(brand_id#35, class_id#36, category_id#37, 5) + : : : : +- *(6) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) + : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) + : : : : :- *(6) Project [i_brand_id#11 AS brand_id#35, i_class_id#12 AS class_id#36, i_category_id#13 AS category_id#37] + : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#19, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#32], [i_item_sk#33], Inner, BuildRight + : : : : : : :- *(6) Project [ss_sold_date_sk#19, ss_item_sk#32] + : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) + : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(1) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#33) && isnotnull(i_class_id#12)) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [d_date_sk#20] + : : : : : +- *(2) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : : +- *(5) Project [i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : : : :- *(5) Project [cs_sold_date_sk#26, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#38], [i_item_sk#33], Inner, BuildRight + : : : : : :- *(5) Project [cs_sold_date_sk#26, cs_item_sk#38] + : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) + : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(3) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : : +- *(3) Filter isnotnull(i_item_sk#33) + : : : : : +- *(3) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : +- *(9) Project [i_brand_id#11, i_class_id#12, i_category_id#13] + : : : +- *(9) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : : : :- *(9) Project [ws_sold_date_sk#31, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : : +- *(9) BroadcastHashJoin [ws_item_sk#39], [i_item_sk#33], Inner, BuildRight + : : : : :- *(9) Project [ws_sold_date_sk#31, ws_item_sk#39] + : : : : : +- *(9) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) + : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(23) BroadcastHashJoin [i_item_sk#33], [ss_item_sk#34], LeftSemi, BuildRight + : : :- *(23) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] + : : : +- *(23) Filter isnotnull(i_item_sk#33) + : : : +- *(23) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(24) Project [d_date_sk#20] + : +- *(24) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#40)) && (d_year#21 = 2001)) && (d_moy#40 = 11)) && isnotnull(d_date_sk#20)) + : +- *(24) FileScan parquet default.date_dim[d_date_sk#20,d_year#21,d_moy#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + :- *(52) Project [sales#41, number_sales#42, channel#43, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44) && (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44 as decimal(32,6)) > cast(Subquery subquery1667 as decimal(32,6)))) + : : +- Subquery subquery1667 + : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange SinglePartition + : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Union + : : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] + : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#20] + : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] + : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] + : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(52) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) + : +- *(51) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + : +- *(51) Project [cs_quantity#22, cs_list_price#24, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(51) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : :- *(51) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24, i_brand_id#11, i_class_id#12, i_category_id#13] + : : +- *(51) BroadcastHashJoin [cs_item_sk#38], [i_item_sk#33], Inner, BuildRight + : : :- *(51) BroadcastHashJoin [cs_item_sk#38], [ss_item_sk#34], LeftSemi, BuildRight + : : : :- *(51) Project [cs_sold_date_sk#26, cs_item_sk#38, cs_quantity#22, cs_list_price#24] + : : : : +- *(51) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) + : : : : +- *(51) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(78) Project [sales#45, number_sales#46, channel#47, i_brand_id#11, i_class_id#12, i_category_id#13] + +- *(78) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48 as decimal(32,6)) > cast(Subquery subquery1672 as decimal(32,6)))) + : +- Subquery subquery1672 + : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Union + : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] + : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] + : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#20] + : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] + : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight + : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] + : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] + : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) + : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(78) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) + +- *(77) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + +- *(77) Project [ws_quantity#27, ws_list_price#29, i_brand_id#11, i_class_id#12, i_category_id#13] + +- *(77) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight + :- *(77) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29, i_brand_id#11, i_class_id#12, i_category_id#13] + : +- *(77) BroadcastHashJoin [ws_item_sk#39], [i_item_sk#33], Inner, BuildRight + : :- *(77) BroadcastHashJoin [ws_item_sk#39], [ss_item_sk#34], LeftSemi, BuildRight + : : :- *(77) Project [ws_sold_date_sk#31, ws_item_sk#39, ws_quantity#27, ws_list_price#29] + : : : +- *(77) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) + : : : +- *(77) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt index dfa8c1bcc..837757bfe 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt @@ -1,214 +1,267 @@ -TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),sum(number_sales)] - WholeStageCodegen (80) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum(sales),sum(number_sales),sum,isEmpty,sum] +TakeOrderedAndProject [channel,i_brand_id,i_category_id,i_class_id,sum(number_sales),sum(sales)] + WholeStageCodegen + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,spark_grouping_id,sum,sum,sum(number_salesL),sum(sales)] [sum,sum,sum(number_salesL),sum(number_sales),sum(sales),sum(sales)] InputAdapter - Exchange [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id] #1 - WholeStageCodegen (79) - HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] - Expand [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] + Exchange [channel,i_brand_id,i_category_id,i_class_id,spark_grouping_id] #1 + WholeStageCodegen + HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales,spark_grouping_id,sum,sum,sum,sum] [sum,sum,sum,sum] + Expand [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] InputAdapter Union - WholeStageCodegen (26) - Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + WholeStageCodegen + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #1 - WholeStageCodegen (8) - HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count] + WholeStageCodegen + HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] InputAdapter Exchange #13 - WholeStageCodegen (7) - HashAggregate [quantity,list_price] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] InputAdapter Union - WholeStageCodegen (2) - Project [ss_quantity,ss_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] + WholeStageCodegen + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_list_price,ss_quantity,ss_sold_date_sk] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #14 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - WholeStageCodegen (4) - Project [cs_quantity,cs_list_price] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_list_price,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk] #14 - WholeStageCodegen (6) - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] + ReusedExchange [d_date_sk] [d_date_sk] #14 + WholeStageCodegen + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk] #14 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + ReusedExchange [d_date_sk] [d_date_sk] #14 + HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #2 - WholeStageCodegen (25) - HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] - Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] + Exchange [i_brand_id,i_category_id,i_class_id] #2 + WholeStageCodegen + HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (11) + WholeStageCodegen Project [i_item_sk] - BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] - Filter [i_brand_id,i_class_id,i_category_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_brand_id,i_category_id,i_class_id] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (10) - HashAggregate [brand_id,class_id,category_id] - HashAggregate [brand_id,class_id,category_id] - HashAggregate [brand_id,class_id,category_id] - InputAdapter - Exchange [brand_id,class_id,category_id] #5 - WholeStageCodegen (9) - HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] + WholeStageCodegen + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + HashAggregate [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #5 + WholeStageCodegen + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen (1) - Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + WholeStageCodegen + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter BroadcastExchange #7 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #8 - WholeStageCodegen (5) - Project [i_brand_id,i_class_id,i_category_id] + WholeStageCodegen + Project [i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] + Project [cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] [cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #9 - WholeStageCodegen (3) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + WholeStageCodegen + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter - ReusedExchange [d_date_sk] #7 + ReusedExchange [d_date_sk] [d_date_sk] #7 + InputAdapter + BroadcastExchange #10 + WholeStageCodegen + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #9 InputAdapter - BroadcastExchange #10 - WholeStageCodegen (8) - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Filter [ws_item_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 - InputAdapter - ReusedExchange [d_date_sk] #7 + ReusedExchange [d_date_sk] [d_date_sk] #7 InputAdapter BroadcastExchange #11 - WholeStageCodegen (23) + WholeStageCodegen BroadcastHashJoin [i_item_sk,ss_item_sk] - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter - ReusedExchange [ss_item_sk] #3 + ReusedExchange [ss_item_sk] [ss_item_sk] #3 InputAdapter BroadcastExchange #12 - WholeStageCodegen (24) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - WholeStageCodegen (52) - Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] - Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + WholeStageCodegen + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #2 + WholeStageCodegen + HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] + InputAdapter + Exchange #13 + WholeStageCodegen + HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_list_price,ss_quantity,ss_sold_date_sk] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + WholeStageCodegen + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #15 - WholeStageCodegen (51) - HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count] - Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] + Exchange [i_brand_id,i_category_id,i_class_id] #15 + WholeStageCodegen + HashAggregate [count,count,cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id,sum,sum] [count,count,sum,sum] + Project [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] + Project [cs_list_price,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_item_sk,i_item_sk] BroadcastHashJoin [cs_item_sk,ss_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] + Project [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] InputAdapter - ReusedExchange [ss_item_sk] #3 + ReusedExchange [ss_item_sk] [ss_item_sk] #3 InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #11 InputAdapter - ReusedExchange [d_date_sk] #12 - WholeStageCodegen (78) - Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #1 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] - InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #16 - WholeStageCodegen (77) - HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count] - Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - BroadcastHashJoin [ws_item_sk,ss_item_sk] - Filter [ws_item_sk,ws_sold_date_sk] - ColumnarToRow + ReusedExchange [d_date_sk] [d_date_sk] #12 + WholeStageCodegen + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #3 + WholeStageCodegen + HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] + InputAdapter + Exchange #13 + WholeStageCodegen + HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_list_price,ss_quantity,ss_sold_date_sk] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] + BroadcastExchange #14 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + WholeStageCodegen + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #14 + HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] + InputAdapter + Exchange [i_brand_id,i_category_id,i_class_id] #16 + WholeStageCodegen + HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,sum,sum,ws_list_price,ws_quantity] [count,count,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + BroadcastHashJoin [ss_item_sk,ws_item_sk] + Project [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] InputAdapter - ReusedExchange [ss_item_sk] #3 + ReusedExchange [ss_item_sk] [ss_item_sk] #3 InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #11 InputAdapter - ReusedExchange [d_date_sk] #12 + ReusedExchange [d_date_sk] [d_date_sk] #12 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt index 1f31ded51..858eda8d6 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt @@ -1,763 +1,166 @@ == Physical Plan == -TakeOrderedAndProject (100) -+- * BroadcastHashJoin Inner BuildRight (99) - :- * Project (77) - : +- * Filter (76) - : +- * HashAggregate (75) - : +- Exchange (74) - : +- * HashAggregate (73) - : +- * Project (72) - : +- * BroadcastHashJoin Inner BuildRight (71) - : :- * Project (65) - : : +- * BroadcastHashJoin Inner BuildRight (64) - : : :- * BroadcastHashJoin LeftSemi BuildRight (57) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (56) - : : : +- * Project (55) - : : : +- * BroadcastHashJoin Inner BuildRight (54) - : : : :- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.item (4) - : : : +- BroadcastExchange (53) - : : : +- * HashAggregate (52) - : : : +- * HashAggregate (51) - : : : +- * HashAggregate (50) - : : : +- Exchange (49) - : : : +- * HashAggregate (48) - : : : +- * BroadcastHashJoin LeftSemi BuildRight (47) - : : : :- * BroadcastHashJoin LeftSemi BuildRight (36) - : : : : :- * Project (22) - : : : : : +- * BroadcastHashJoin Inner BuildRight (21) - : : : : : :- * Project (15) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : : : :- * Filter (9) - : : : : : : : +- * ColumnarToRow (8) - : : : : : : : +- Scan parquet default.store_sales (7) - : : : : : : +- BroadcastExchange (13) - : : : : : : +- * Filter (12) - : : : : : : +- * ColumnarToRow (11) - : : : : : : +- Scan parquet default.item (10) - : : : : : +- BroadcastExchange (20) - : : : : : +- * Project (19) - : : : : : +- * Filter (18) - : : : : : +- * ColumnarToRow (17) - : : : : : +- Scan parquet default.date_dim (16) - : : : : +- BroadcastExchange (35) - : : : : +- * Project (34) - : : : : +- * BroadcastHashJoin Inner BuildRight (33) - : : : : :- * Project (31) - : : : : : +- * BroadcastHashJoin Inner BuildRight (30) - : : : : : :- * Filter (25) - : : : : : : +- * ColumnarToRow (24) - : : : : : : +- Scan parquet default.catalog_sales (23) - : : : : : +- BroadcastExchange (29) - : : : : : +- * Filter (28) - : : : : : +- * ColumnarToRow (27) - : : : : : +- Scan parquet default.item (26) - : : : : +- ReusedExchange (32) - : : : +- BroadcastExchange (46) - : : : +- * Project (45) - : : : +- * BroadcastHashJoin Inner BuildRight (44) - : : : :- * Project (42) - : : : : +- * BroadcastHashJoin Inner BuildRight (41) - : : : : :- * Filter (39) - : : : : : +- * ColumnarToRow (38) - : : : : : +- Scan parquet default.web_sales (37) - : : : : +- ReusedExchange (40) - : : : +- ReusedExchange (43) - : : +- BroadcastExchange (63) - : : +- * BroadcastHashJoin LeftSemi BuildRight (62) - : : :- * Filter (60) - : : : +- * ColumnarToRow (59) - : : : +- Scan parquet default.item (58) - : : +- ReusedExchange (61) - : +- BroadcastExchange (70) - : +- * Project (69) - : +- * Filter (68) - : +- * ColumnarToRow (67) - : +- Scan parquet default.date_dim (66) - +- BroadcastExchange (98) - +- * Project (97) - +- * Filter (96) - +- * HashAggregate (95) - +- Exchange (94) - +- * HashAggregate (93) - +- * Project (92) - +- * BroadcastHashJoin Inner BuildRight (91) - :- * Project (85) - : +- * BroadcastHashJoin Inner BuildRight (84) - : :- * BroadcastHashJoin LeftSemi BuildRight (82) - : : :- * Filter (80) - : : : +- * ColumnarToRow (79) - : : : +- Scan parquet default.store_sales (78) - : : +- ReusedExchange (81) - : +- ReusedExchange (83) - +- BroadcastExchange (90) - +- * Project (89) - +- * Filter (88) - +- * ColumnarToRow (87) - +- Scan parquet default.date_dim (86) - - -(1) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 25] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] - -(3) Filter [codegen id : 25] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] -Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) - -(4) Scan parquet default.item -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 11] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(6) Filter [codegen id : 11] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) - -(7) Scan parquet default.store_sales -Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(8) ColumnarToRow [codegen id : 9] -Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] - -(9) Filter [codegen id : 9] -Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] -Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) - -(10) Scan parquet default.item -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 1] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(12) Filter [codegen id : 1] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_brand_id#6)) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) - -(13) BroadcastExchange -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] - -(14) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(15) Project [codegen id : 9] -Output [4]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(16) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_year#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#10, d_year#11] - -(18) Filter [codegen id : 2] -Input [2]: [d_date_sk#10, d_year#11] -Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) - -(19) Project [codegen id : 2] -Output [1]: [d_date_sk#10] -Input [2]: [d_date_sk#10, d_year#11] - -(20) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(21) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(22) Project [codegen id : 9] -Output [3]: [i_brand_id#6 AS brand_id#13, i_class_id#7 AS class_id#14, i_category_id#8 AS category_id#15] -Input [5]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] - -(23) Scan parquet default.catalog_sales -Output [2]: [cs_sold_date_sk#16, cs_item_sk#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 5] -Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] - -(25) Filter [codegen id : 5] -Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] -Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) - -(26) Scan parquet default.item -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(27) ColumnarToRow [codegen id : 3] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(28) Filter [codegen id : 3] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Condition : isnotnull(i_item_sk#5) - -(29) BroadcastExchange -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] - -(30) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_item_sk#17] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(31) Project [codegen id : 5] -Output [4]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [6]: [cs_sold_date_sk#16, cs_item_sk#17, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(32) ReusedExchange [Reuses operator id: 20] -Output [1]: [d_date_sk#10] - -(33) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#16] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(34) Project [codegen id : 5] -Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Input [5]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] - -(35) BroadcastExchange -Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#19] - -(36) BroadcastHashJoin [codegen id : 9] -Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] -Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] -Join condition: None - -(37) Scan parquet default.web_sales -Output [2]: [ws_sold_date_sk#20, ws_item_sk#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(38) ColumnarToRow [codegen id : 8] -Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] - -(39) Filter [codegen id : 8] -Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] -Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) - -(40) ReusedExchange [Reuses operator id: 29] -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(41) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_item_sk#21] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(42) Project [codegen id : 8] -Output [4]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [6]: [ws_sold_date_sk#20, ws_item_sk#21, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(43) ReusedExchange [Reuses operator id: 20] -Output [1]: [d_date_sk#10] - -(44) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_sold_date_sk#20] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(45) Project [codegen id : 8] -Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Input [5]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] - -(46) BroadcastExchange -Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#22] - -(47) BroadcastHashJoin [codegen id : 9] -Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] -Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] -Join condition: None - -(48) HashAggregate [codegen id : 9] -Input [3]: [brand_id#13, class_id#14, category_id#15] -Keys [3]: [brand_id#13, class_id#14, category_id#15] -Functions: [] -Aggregate Attributes: [] -Results [3]: [brand_id#13, class_id#14, category_id#15] - -(49) Exchange -Input [3]: [brand_id#13, class_id#14, category_id#15] -Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), true, [id=#23] - -(50) HashAggregate [codegen id : 10] -Input [3]: [brand_id#13, class_id#14, category_id#15] -Keys [3]: [brand_id#13, class_id#14, category_id#15] -Functions: [] -Aggregate Attributes: [] -Results [3]: [brand_id#13, class_id#14, category_id#15] - -(51) HashAggregate [codegen id : 10] -Input [3]: [brand_id#13, class_id#14, category_id#15] -Keys [3]: [brand_id#13, class_id#14, category_id#15] -Functions: [] -Aggregate Attributes: [] -Results [3]: [brand_id#13, class_id#14, category_id#15] - -(52) HashAggregate [codegen id : 10] -Input [3]: [brand_id#13, class_id#14, category_id#15] -Keys [3]: [brand_id#13, class_id#14, category_id#15] -Functions: [] -Aggregate Attributes: [] -Results [3]: [brand_id#13, class_id#14, category_id#15] - -(53) BroadcastExchange -Input [3]: [brand_id#13, class_id#14, category_id#15] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#24] - -(54) BroadcastHashJoin [codegen id : 11] -Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Right keys [3]: [brand_id#13, class_id#14, category_id#15] -Join condition: None - -(55) Project [codegen id : 11] -Output [1]: [i_item_sk#5 AS ss_item_sk#25] -Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#13, class_id#14, category_id#15] - -(56) BroadcastExchange -Input [1]: [ss_item_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] - -(57) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [ss_item_sk#25] -Join condition: None - -(58) Scan parquet default.item -Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] -ReadSchema: struct - -(59) ColumnarToRow [codegen id : 23] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(60) Filter [codegen id : 23] -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_brand_id#6)) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) - -(61) ReusedExchange [Reuses operator id: 56] -Output [1]: [ss_item_sk#25] - -(62) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [i_item_sk#5] -Right keys [1]: [ss_item_sk#25] -Join condition: None - -(63) BroadcastExchange -Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] - -(64) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(65) Project [codegen id : 25] -Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] - -(66) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_week_seq#28] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] -ReadSchema: struct - -(67) ColumnarToRow [codegen id : 24] -Input [2]: [d_date_sk#10, d_week_seq#28] - -(68) Filter [codegen id : 24] -Input [2]: [d_date_sk#10, d_week_seq#28] -Condition : ((isnotnull(d_week_seq#28) AND (d_week_seq#28 = Subquery scalar-subquery#29, [id=#30])) AND isnotnull(d_date_sk#10)) - -(69) Project [codegen id : 24] -Output [1]: [d_date_sk#10] -Input [2]: [d_date_sk#10, d_week_seq#28] - -(70) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#31] - -(71) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(72) Project [codegen id : 25] -Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] -Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] - -(73) HashAggregate [codegen id : 25] -Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] -Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] -Aggregate Attributes [3]: [sum#32, isEmpty#33, count#34] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] - -(74) Exchange -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] -Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#38] - -(75) HashAggregate [codegen id : 52] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] -Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39, count(1)#40] -Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#41, count(1)#40 AS number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] - -(76) Filter [codegen id : 52] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43 as decimal(32,6)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(32,6)))) - -(77) Project [codegen id : 52] -Output [6]: [store AS channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42] -Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] - -(78) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(79) ColumnarToRow [codegen id : 50] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] - -(80) Filter [codegen id : 50] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] -Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) - -(81) ReusedExchange [Reuses operator id: 56] -Output [1]: [ss_item_sk#25] - -(82) BroadcastHashJoin [codegen id : 50] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [ss_item_sk#25] -Join condition: None - -(83) ReusedExchange [Reuses operator id: 63] -Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] - -(84) BroadcastHashJoin [codegen id : 50] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#47] -Join condition: None - -(85) Project [codegen id : 50] -Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] -Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] - -(86) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_week_seq#28] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] -ReadSchema: struct - -(87) ColumnarToRow [codegen id : 49] -Input [2]: [d_date_sk#10, d_week_seq#28] - -(88) Filter [codegen id : 49] -Input [2]: [d_date_sk#10, d_week_seq#28] -Condition : ((isnotnull(d_week_seq#28) AND (d_week_seq#28 = Subquery scalar-subquery#51, [id=#52])) AND isnotnull(d_date_sk#10)) - -(89) Project [codegen id : 49] -Output [1]: [d_date_sk#10] -Input [2]: [d_date_sk#10, d_week_seq#28] - -(90) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] - -(91) BroadcastHashJoin [codegen id : 50] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(92) Project [codegen id : 50] -Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] -Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50, d_date_sk#10] - -(93) HashAggregate [codegen id : 50] -Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] -Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] -Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56] -Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] - -(94) Exchange -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] -Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), true, [id=#60] - -(95) HashAggregate [codegen id : 51] -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] -Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62] -Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#63, count(1)#62 AS number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] - -(96) Filter [codegen id : 51] -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#44, [id=#45] as decimal(32,6)))) - -(97) Project [codegen id : 51] -Output [6]: [store AS channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] - -(98) BroadcastExchange -Input [6]: [channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] -Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#67] - -(99) BroadcastHashJoin [codegen id : 52] -Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] -Right keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] -Join condition: None - -(100) TakeOrderedAndProject -Input [12]: [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] -Arguments: 100, [i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] - -===== Subqueries ===== - -Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#44, [id=#45] -* HashAggregate (126) -+- Exchange (125) - +- * HashAggregate (124) - +- Union (123) - :- * Project (110) - : +- * BroadcastHashJoin Inner BuildRight (109) - : :- * Filter (103) - : : +- * ColumnarToRow (102) - : : +- Scan parquet default.store_sales (101) - : +- BroadcastExchange (108) - : +- * Project (107) - : +- * Filter (106) - : +- * ColumnarToRow (105) - : +- Scan parquet default.date_dim (104) - :- * Project (116) - : +- * BroadcastHashJoin Inner BuildRight (115) - : :- * Filter (113) - : : +- * ColumnarToRow (112) - : : +- Scan parquet default.catalog_sales (111) - : +- ReusedExchange (114) - +- * Project (122) - +- * BroadcastHashJoin Inner BuildRight (121) - :- * Filter (119) - : +- * ColumnarToRow (118) - : +- Scan parquet default.web_sales (117) - +- ReusedExchange (120) - - -(101) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(102) ColumnarToRow [codegen id : 2] -Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] - -(103) Filter [codegen id : 2] -Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] -Condition : isnotnull(ss_sold_date_sk#1) - -(104) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_year#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(105) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#10, d_year#11] - -(106) Filter [codegen id : 1] -Input [2]: [d_date_sk#10, d_year#11] -Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) - -(107) Project [codegen id : 1] -Output [1]: [d_date_sk#10] -Input [2]: [d_date_sk#10, d_year#11] - -(108) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#68] - -(109) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(110) Project [codegen id : 2] -Output [2]: [ss_quantity#3 AS quantity#69, ss_list_price#4 AS list_price#70] -Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] - -(111) Scan parquet default.catalog_sales -Output [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(112) ColumnarToRow [codegen id : 4] -Input [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] - -(113) Filter [codegen id : 4] -Input [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] -Condition : isnotnull(cs_sold_date_sk#16) - -(114) ReusedExchange [Reuses operator id: 108] -Output [1]: [d_date_sk#10] - -(115) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#16] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(116) Project [codegen id : 4] -Output [2]: [cs_quantity#71 AS quantity#73, cs_list_price#72 AS list_price#74] -Input [4]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72, d_date_sk#10] - -(117) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(118) ColumnarToRow [codegen id : 6] -Input [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] - -(119) Filter [codegen id : 6] -Input [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] -Condition : isnotnull(ws_sold_date_sk#20) - -(120) ReusedExchange [Reuses operator id: 108] -Output [1]: [d_date_sk#10] - -(121) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#20] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(122) Project [codegen id : 6] -Output [2]: [ws_quantity#75 AS quantity#77, ws_list_price#76 AS list_price#78] -Input [4]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76, d_date_sk#10] - -(123) Union - -(124) HashAggregate [codegen id : 7] -Input [2]: [quantity#69, list_price#70] -Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#79, count#80] -Results [2]: [sum#81, count#82] - -(125) Exchange -Input [2]: [sum#81, count#82] -Arguments: SinglePartition, true, [id=#83] - -(126) HashAggregate [codegen id : 8] -Input [2]: [sum#81, count#82] -Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#84] -Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#84 AS average_sales#85] - -Subquery:2 Hosting operator id = 68 Hosting Expression = Subquery scalar-subquery#29, [id=#30] -* Project (130) -+- * Filter (129) - +- * ColumnarToRow (128) - +- Scan parquet default.date_dim (127) - - -(127) Scan parquet default.date_dim -Output [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)] -ReadSchema: struct - -(128) ColumnarToRow [codegen id : 1] -Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] - -(129) Filter [codegen id : 1] -Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] -Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#86)) AND isnotnull(d_dom#87)) AND (d_year#11 = 2000)) AND (d_moy#86 = 12)) AND (d_dom#87 = 11)) - -(130) Project [codegen id : 1] -Output [1]: [d_week_seq#28] -Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] - -Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#44, [id=#45] - -Subquery:4 Hosting operator id = 88 Hosting Expression = Subquery scalar-subquery#51, [id=#52] -* Project (134) -+- * Filter (133) - +- * ColumnarToRow (132) - +- Scan parquet default.date_dim (131) - - -(131) Scan parquet default.date_dim -Output [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)] -ReadSchema: struct - -(132) ColumnarToRow [codegen id : 1] -Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] - -(133) Filter [codegen id : 1] -Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] -Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#86)) AND isnotnull(d_dom#87)) AND (d_year#11 = 1999)) AND (d_moy#86 = 12)) AND (d_dom#87 = 11)) - -(134) Project [codegen id : 1] -Output [1]: [d_week_seq#28] -Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] - - +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sales#5,number_sales#6,channel#7,i_brand_id#8,i_class_id#9,i_category_id#10,sales#11,number_sales#12]) ++- *(52) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [i_brand_id#8, i_class_id#9, i_category_id#10], Inner, BuildRight + :- *(52) Project [channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, sales#5, number_sales#6] + : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15 as decimal(32,6)) > cast(Subquery subquery1884 as decimal(32,6)))) + : : +- Subquery subquery1884 + : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange SinglePartition + : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Union + : : :- *(2) Project [ss_quantity#13 AS quantity#16, ss_list_price#14 AS list_price#17] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] + : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#19] + : : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight + : : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] + : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] + : : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(52) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + : +- Exchange hashpartitioning(i_brand_id#2, i_class_id#3, i_category_id#4, 5) + : +- *(25) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + : +- *(25) Project [ss_quantity#13, ss_list_price#14, i_brand_id#2, i_class_id#3, i_category_id#4] + : +- *(25) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : :- *(25) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14, i_brand_id#2, i_class_id#3, i_category_id#4] + : : +- *(25) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#32], Inner, BuildRight + : : :- *(25) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight + : : : :- *(25) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] + : : : : +- *(25) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) + : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(11) Project [i_item_sk#32 AS ss_item_sk#33] + : : : +- *(11) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [brand_id#34, class_id#35, category_id#36], Inner, BuildRight + : : : :- *(11) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : +- *(11) Filter ((isnotnull(i_class_id#3) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) + : : : : +- *(11) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) + : : : :- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : : +- Exchange hashpartitioning(brand_id#34, class_id#35, category_id#36, 5) + : : : : +- *(6) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) + : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) + : : : : :- *(6) Project [i_brand_id#2 AS brand_id#34, i_class_id#3 AS class_id#35, i_category_id#4 AS category_id#36] + : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#18, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#32], Inner, BuildRight + : : : : : : :- *(6) Project [ss_sold_date_sk#18, ss_item_sk#31] + : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) + : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(1) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [d_date_sk#19] + : : : : : +- *(2) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) + : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : : +- *(5) Project [i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight + : : : : :- *(5) Project [cs_sold_date_sk#25, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#37], [i_item_sk#32], Inner, BuildRight + : : : : : :- *(5) Project [cs_sold_date_sk#25, cs_item_sk#37] + : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#37) && isnotnull(cs_sold_date_sk#25)) + : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_item_sk#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(3) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : : +- *(3) Filter isnotnull(i_item_sk#32) + : : : : : +- *(3) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) + : : : +- *(9) Project [i_brand_id#2, i_class_id#3, i_category_id#4] + : : : +- *(9) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight + : : : :- *(9) Project [ws_sold_date_sk#30, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : : +- *(9) BroadcastHashJoin [ws_item_sk#38], [i_item_sk#32], Inner, BuildRight + : : : : :- *(9) Project [ws_sold_date_sk#30, ws_item_sk#38] + : : : : : +- *(9) Filter (isnotnull(ws_item_sk#38) && isnotnull(ws_sold_date_sk#30)) + : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_item_sk#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : +- ReusedExchange [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(23) BroadcastHashJoin [i_item_sk#32], [ss_item_sk#33], LeftSemi, BuildRight + : : :- *(23) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] + : : : +- *(23) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_category_id#4)) && isnotnull(i_brand_id#2)) + : : : +- *(23) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(24) Project [d_date_sk#19] + : +- *(24) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1883)) && isnotnull(d_date_sk#19)) + : : +- Subquery subquery1883 + : : +- *(1) Project [d_week_seq#39] + : : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + : +- *(24) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : +- Subquery subquery1883 + : +- *(1) Project [d_week_seq#39] + : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true])) + +- *(51) Project [channel#7, i_brand_id#8, i_class_id#9, i_category_id#10, sales#11, number_sales#12] + +- *(51) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42 as decimal(32,6)) > cast(Subquery subquery1890 as decimal(32,6)))) + : +- Subquery subquery1890 + : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Union + : :- *(2) Project [ss_quantity#13 AS quantity#16, ss_list_price#14 AS list_price#17] + : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] + : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#19] + : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] + : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight + : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] + : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] + : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) + : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(51) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) + +- Exchange hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, 5) + +- *(50) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) + +- *(50) Project [ss_quantity#13, ss_list_price#14, i_brand_id#8, i_class_id#9, i_category_id#10] + +- *(50) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight + :- *(50) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14, i_brand_id#8, i_class_id#9, i_category_id#10] + : +- *(50) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#43], Inner, BuildRight + : :- *(50) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight + : : :- *(50) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] + : : : +- *(50) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) + : : : +- *(50) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#43, i_brand_id#8, i_class_id#9, i_category_id#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(49) Project [d_date_sk#19] + +- *(49) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1889)) && isnotnull(d_date_sk#19)) + : +- Subquery subquery1889 + : +- *(1) Project [d_week_seq#39] + : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct + +- *(49) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + +- Subquery subquery1889 + +- *(1) Project [d_week_seq#39] + +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) + +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt index 7bbf83e3d..ee18cd267 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt @@ -1,204 +1,226 @@ -TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] - WholeStageCodegen (52) - BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] +TakeOrderedAndProject [channel,channel,i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,number_sales,number_sales,sales,sales] + WholeStageCodegen + BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id] + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] Subquery #2 - WholeStageCodegen (8) - HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count] + WholeStageCodegen + HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] InputAdapter Exchange #12 - WholeStageCodegen (7) - HashAggregate [quantity,list_price] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] InputAdapter Union - WholeStageCodegen (2) - Project [ss_quantity,ss_list_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] + WholeStageCodegen + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_list_price,ss_quantity,ss_sold_date_sk] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #13 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - WholeStageCodegen (4) - Project [cs_quantity,cs_list_price] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_list_price,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk] #13 - WholeStageCodegen (6) - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] + ReusedExchange [d_date_sk] [d_date_sk] #13 + WholeStageCodegen + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk] #13 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + ReusedExchange [d_date_sk] [d_date_sk] #13 + HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #1 - WholeStageCodegen (25) - HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] - Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] + Exchange [i_brand_id,i_category_id,i_class_id] #1 + WholeStageCodegen + HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (11) + WholeStageCodegen Project [i_item_sk] - BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] - Filter [i_brand_id,i_class_id,i_category_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_brand_id,i_category_id,i_class_id] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (10) - HashAggregate [brand_id,class_id,category_id] - HashAggregate [brand_id,class_id,category_id] - HashAggregate [brand_id,class_id,category_id] - InputAdapter - Exchange [brand_id,class_id,category_id] #4 - WholeStageCodegen (9) - HashAggregate [brand_id,class_id,category_id] - BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] + WholeStageCodegen + HashAggregate [brand_id,category_id,class_id] + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + HashAggregate [brand_id,category_id,class_id] + InputAdapter + Exchange [brand_id,category_id,class_id] #4 + WholeStageCodegen + HashAggregate [brand_id,category_id,class_id] + BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (1) - Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + WholeStageCodegen + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #7 - WholeStageCodegen (5) - Project [i_brand_id,i_class_id,i_category_id] + WholeStageCodegen + Project [i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] + Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] + Project [cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] [cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #8 - WholeStageCodegen (3) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + WholeStageCodegen + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter - ReusedExchange [d_date_sk] #6 + ReusedExchange [d_date_sk] [d_date_sk] #6 + InputAdapter + BroadcastExchange #9 + WholeStageCodegen + Project [i_brand_id,i_category_id,i_class_id] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_item_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] [ws_item_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #8 InputAdapter - BroadcastExchange #9 - WholeStageCodegen (8) - Project [i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Filter [ws_item_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] - InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #8 - InputAdapter - ReusedExchange [d_date_sk] #6 + ReusedExchange [d_date_sk] [d_date_sk] #6 InputAdapter BroadcastExchange #10 - WholeStageCodegen (23) + WholeStageCodegen BroadcastHashJoin [i_item_sk,ss_item_sk] - Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] + Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] InputAdapter - ReusedExchange [ss_item_sk] #2 + ReusedExchange [ss_item_sk] [ss_item_sk] #2 InputAdapter BroadcastExchange #11 - WholeStageCodegen (24) + WholeStageCodegen Project [d_date_sk] - Filter [d_week_seq,d_date_sk] + Filter [d_date_sk,d_week_seq] Subquery #1 - WholeStageCodegen (1) + WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_moy,d_dom] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_week_seq] + Filter [d_dom,d_moy,d_year] + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] + Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] + Subquery #1 + WholeStageCodegen + Project [d_week_seq] + Filter [d_dom,d_moy,d_year] + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] InputAdapter BroadcastExchange #14 - WholeStageCodegen (51) - Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [average_sales] #2 - HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] + WholeStageCodegen + Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] + Subquery #4 + WholeStageCodegen + HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] + InputAdapter + Exchange #12 + WholeStageCodegen + HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] + InputAdapter + Union + WholeStageCodegen + Project [ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_list_price,ss_quantity,ss_sold_date_sk] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen + Project [cs_list_price,cs_quantity] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_list_price,cs_quantity,cs_sold_date_sk] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #13 + WholeStageCodegen + Project [ws_list_price,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_list_price,ws_quantity,ws_sold_date_sk] + Filter [ws_sold_date_sk] + Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #13 + HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] InputAdapter - Exchange [i_brand_id,i_class_id,i_category_id] #15 - WholeStageCodegen (50) - HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] - Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] + Exchange [i_brand_id,i_category_id,i_class_id] #15 + WholeStageCodegen + HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] + Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] InputAdapter - ReusedExchange [ss_item_sk] #2 + ReusedExchange [ss_item_sk] [ss_item_sk] #2 InputAdapter - ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 + ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #10 InputAdapter BroadcastExchange #16 - WholeStageCodegen (49) + WholeStageCodegen Project [d_date_sk] - Filter [d_week_seq,d_date_sk] + Filter [d_date_sk,d_week_seq] Subquery #3 - WholeStageCodegen (1) + WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_moy,d_dom] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_week_seq] + Filter [d_dom,d_moy,d_year] + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] + Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] + Subquery #3 + WholeStageCodegen + Project [d_week_seq] + Filter [d_dom,d_moy,d_year] + Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt index 4dc0abf9a..d70d7905c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt @@ -1,150 +1,26 @@ == Physical Plan == -TakeOrderedAndProject (26) -+- * HashAggregate (25) - +- Exchange (24) - +- * HashAggregate (23) - +- * Project (22) - +- * BroadcastHashJoin Inner BuildRight (21) - :- * Project (15) - : +- * BroadcastHashJoin Inner BuildRight (14) - : :- * Project (9) - : : +- * BroadcastHashJoin Inner BuildRight (8) - : : :- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.catalog_sales (1) - : : +- BroadcastExchange (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.customer (4) - : +- BroadcastExchange (13) - : +- * Filter (12) - : +- * ColumnarToRow (11) - : +- Scan parquet default.customer_address (10) - +- BroadcastExchange (20) - +- * Project (19) - +- * Filter (18) - +- * ColumnarToRow (17) - +- Scan parquet default.date_dim (16) - - -(1) Scan parquet default.catalog_sales -Output [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] - -(3) Filter [codegen id : 4] -Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] -Condition : (isnotnull(cs_bill_customer_sk#2) AND isnotnull(cs_sold_date_sk#1)) - -(4) Scan parquet default.customer -Output [2]: [c_customer_sk#4, c_current_addr_sk#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [c_customer_sk#4, c_current_addr_sk#5] - -(6) Filter [codegen id : 1] -Input [2]: [c_customer_sk#4, c_current_addr_sk#5] -Condition : (isnotnull(c_customer_sk#4) AND isnotnull(c_current_addr_sk#5)) - -(7) BroadcastExchange -Input [2]: [c_customer_sk#4, c_current_addr_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] - -(8) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_bill_customer_sk#2] -Right keys [1]: [c_customer_sk#4] -Join condition: None - -(9) Project [codegen id : 4] -Output [3]: [cs_sold_date_sk#1, cs_sales_price#3, c_current_addr_sk#5] -Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3, c_customer_sk#4, c_current_addr_sk#5] - -(10) Scan parquet default.customer_address -Output [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] - -(12) Filter [codegen id : 2] -Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] -Condition : isnotnull(ca_address_sk#7) - -(13) BroadcastExchange -Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] - -(14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [c_current_addr_sk#5] -Right keys [1]: [ca_address_sk#7] -Join condition: ((substr(ca_zip#9, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#8 IN (CA,WA,GA)) OR (cs_sales_price#3 > 500.00)) - -(15) Project [codegen id : 4] -Output [3]: [cs_sold_date_sk#1, cs_sales_price#3, ca_zip#9] -Input [6]: [cs_sold_date_sk#1, cs_sales_price#3, c_current_addr_sk#5, ca_address_sk#7, ca_state#8, ca_zip#9] - -(16) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#12, d_qoy#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] - -(18) Filter [codegen id : 3] -Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] -Condition : ((((isnotnull(d_qoy#13) AND isnotnull(d_year#12)) AND (d_qoy#13 = 2)) AND (d_year#12 = 2001)) AND isnotnull(d_date_sk#11)) - -(19) Project [codegen id : 3] -Output [1]: [d_date_sk#11] -Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] - -(20) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#11] -Join condition: None - -(22) Project [codegen id : 4] -Output [2]: [cs_sales_price#3, ca_zip#9] -Input [4]: [cs_sold_date_sk#1, cs_sales_price#3, ca_zip#9, d_date_sk#11] - -(23) HashAggregate [codegen id : 4] -Input [2]: [cs_sales_price#3, ca_zip#9] -Keys [1]: [ca_zip#9] -Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#3))] -Aggregate Attributes [1]: [sum#15] -Results [2]: [ca_zip#9, sum#16] - -(24) Exchange -Input [2]: [ca_zip#9, sum#16] -Arguments: hashpartitioning(ca_zip#9, 5), true, [id=#17] - -(25) HashAggregate [codegen id : 5] -Input [2]: [ca_zip#9, sum#16] -Keys [1]: [ca_zip#9] -Functions [1]: [sum(UnscaledValue(cs_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#3))#18] -Results [2]: [ca_zip#9, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#18,17,2) AS sum(cs_sales_price)#19] - -(26) TakeOrderedAndProject -Input [2]: [ca_zip#9, sum(cs_sales_price)#19] -Arguments: 100, [ca_zip#9 ASC NULLS FIRST], [ca_zip#9, sum(cs_sales_price)#19] - +TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST], output=[ca_zip#1,sum(cs_sales_price)#2]) ++- *(5) HashAggregate(keys=[ca_zip#1], functions=[sum(UnscaledValue(cs_sales_price#3))]) + +- Exchange hashpartitioning(ca_zip#1, 5) + +- *(4) HashAggregate(keys=[ca_zip#1], functions=[partial_sum(UnscaledValue(cs_sales_price#3))]) + +- *(4) Project [cs_sales_price#3, ca_zip#1] + +- *(4) BroadcastHashJoin [cs_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + :- *(4) Project [cs_sold_date_sk#4, cs_sales_price#3, ca_zip#1] + : +- *(4) BroadcastHashJoin [c_current_addr_sk#6], [ca_address_sk#7], Inner, BuildRight, ((substring(ca_zip#1, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) || ca_state#8 IN (CA,WA,GA)) || (cs_sales_price#3 > 500.00)) + : :- *(4) Project [cs_sold_date_sk#4, cs_sales_price#3, c_current_addr_sk#6] + : : +- *(4) BroadcastHashJoin [cs_bill_customer_sk#9], [c_customer_sk#10], Inner, BuildRight + : : :- *(4) Project [cs_sold_date_sk#4, cs_bill_customer_sk#9, cs_sales_price#3] + : : : +- *(4) Filter (isnotnull(cs_bill_customer_sk#9) && isnotnull(cs_sold_date_sk#4)) + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#9,cs_sales_price#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [c_customer_sk#10, c_current_addr_sk#6] + : : +- *(1) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#6)) + : : +- *(1) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [ca_address_sk#7, ca_state#8, ca_zip#1] + : +- *(2) Filter isnotnull(ca_address_sk#7) + : +- *(2) FileScan parquet default.customer_address[ca_address_sk#7,ca_state#8,ca_zip#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [d_date_sk#5] + +- *(3) Filter ((((isnotnull(d_qoy#11) && isnotnull(d_year#12)) && (d_qoy#11 = 2)) && (d_year#12 = 2001)) && isnotnull(d_date_sk#5)) + +- *(3) FileScan parquet default.date_dim[d_date_sk#5,d_year#12,d_qoy#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt index 9cac0b46f..aa1157759 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt @@ -1,39 +1,34 @@ TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] - WholeStageCodegen (5) - HashAggregate [ca_zip,sum] [sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price),sum] + WholeStageCodegen + HashAggregate [ca_zip,sum,sum(UnscaledValue(cs_sales_price))] [sum,sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price)] InputAdapter Exchange [ca_zip] #1 - WholeStageCodegen (4) - HashAggregate [ca_zip,cs_sales_price] [sum,sum] - Project [cs_sales_price,ca_zip] + WholeStageCodegen + HashAggregate [ca_zip,cs_sales_price,sum,sum] [sum,sum] + Project [ca_zip,cs_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_sales_price,ca_zip] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_zip,ca_state,cs_sales_price] - Project [cs_sold_date_sk,cs_sales_price,c_current_addr_sk] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Filter [cs_bill_customer_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_sales_price] + Project [ca_zip,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_state,ca_zip,cs_sales_price] + Project [c_current_addr_sk,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + WholeStageCodegen + Project [c_current_addr_sk,c_customer_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip] + WholeStageCodegen + Project [ca_address_sk,ca_state,ca_zip] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip] [ca_address_sk,ca_state,ca_zip] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk] - Filter [d_qoy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt index ea7e29839..589464c44 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt @@ -1,235 +1,37 @@ == Physical Plan == -* Sort (41) -+- * HashAggregate (40) - +- Exchange (39) - +- * HashAggregate (38) - +- * HashAggregate (37) - +- Exchange (36) - +- * HashAggregate (35) - +- * Project (34) - +- * BroadcastHashJoin Inner BuildRight (33) - :- * Project (27) - : +- * BroadcastHashJoin Inner BuildRight (26) - : :- * Project (20) - : : +- * BroadcastHashJoin Inner BuildRight (19) - : : :- * BroadcastHashJoin LeftAnti BuildRight (13) - : : : :- * Project (9) - : : : : +- * BroadcastHashJoin LeftSemi BuildRight (8) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.catalog_sales (1) - : : : : +- BroadcastExchange (7) - : : : : +- * Project (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.catalog_sales (4) - : : : +- BroadcastExchange (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.catalog_returns (10) - : : +- BroadcastExchange (18) - : : +- * Project (17) - : : +- * Filter (16) - : : +- * ColumnarToRow (15) - : : +- Scan parquet default.date_dim (14) - : +- BroadcastExchange (25) - : +- * Project (24) - : +- * Filter (23) - : +- * ColumnarToRow (22) - : +- Scan parquet default.customer_address (21) - +- BroadcastExchange (32) - +- * Project (31) - +- * Filter (30) - +- * ColumnarToRow (29) - +- Scan parquet default.call_center (28) - - -(1) Scan parquet default.catalog_sales -Output [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 6] -Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] - -(3) Filter [codegen id : 6] -Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Condition : ((isnotnull(cs_ship_date_sk#1) AND isnotnull(cs_ship_addr_sk#2)) AND isnotnull(cs_call_center_sk#3)) - -(4) Scan parquet default.catalog_sales -Output [2]: [cs_warehouse_sk#4, cs_order_number#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [cs_warehouse_sk#4, cs_order_number#5] - -(6) Project [codegen id : 1] -Output [2]: [cs_warehouse_sk#4 AS cs_warehouse_sk#4#8, cs_order_number#5 AS cs_order_number#5#9] -Input [2]: [cs_warehouse_sk#4, cs_order_number#5] - -(7) BroadcastExchange -Input [2]: [cs_warehouse_sk#4#8, cs_order_number#5#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#10] - -(8) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_order_number#5] -Right keys [1]: [cs_order_number#5#9] -Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#4#8) - -(9) Project [codegen id : 6] -Output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] - -(10) Scan parquet default.catalog_returns -Output [1]: [cr_order_number#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [1]: [cr_order_number#11] - -(12) BroadcastExchange -Input [1]: [cr_order_number#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(13) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_order_number#5] -Right keys [1]: [cr_order_number#11] -Join condition: None - -(14) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_date#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2002-02-01), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)] -ReadSchema: struct - -(15) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#13, d_date#14] - -(16) Filter [codegen id : 3] -Input [2]: [d_date_sk#13, d_date#14] -Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 11719)) AND (d_date#14 <= 11779)) AND isnotnull(d_date_sk#13)) - -(17) Project [codegen id : 3] -Output [1]: [d_date_sk#13] -Input [2]: [d_date_sk#13, d_date#14] - -(18) BroadcastExchange -Input [1]: [d_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] - -(19) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_ship_date_sk#1] -Right keys [1]: [d_date_sk#13] -Join condition: None - -(20) Project [codegen id : 6] -Output [5]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#13] - -(21) Scan parquet default.customer_address -Output [2]: [ca_address_sk#16, ca_state#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(22) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#16, ca_state#17] - -(23) Filter [codegen id : 4] -Input [2]: [ca_address_sk#16, ca_state#17] -Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = GA)) AND isnotnull(ca_address_sk#16)) - -(24) Project [codegen id : 4] -Output [1]: [ca_address_sk#16] -Input [2]: [ca_address_sk#16, ca_state#17] - -(25) BroadcastExchange -Input [1]: [ca_address_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] - -(26) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_ship_addr_sk#2] -Right keys [1]: [ca_address_sk#16] -Join condition: None - -(27) Project [codegen id : 6] -Output [4]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Input [6]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#16] - -(28) Scan parquet default.call_center -Output [2]: [cc_call_center_sk#19, cc_county#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/call_center] -PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 5] -Input [2]: [cc_call_center_sk#19, cc_county#20] - -(30) Filter [codegen id : 5] -Input [2]: [cc_call_center_sk#19, cc_county#20] -Condition : ((isnotnull(cc_county#20) AND (cc_county#20 = Williamson County)) AND isnotnull(cc_call_center_sk#19)) - -(31) Project [codegen id : 5] -Output [1]: [cc_call_center_sk#19] -Input [2]: [cc_call_center_sk#19, cc_county#20] - -(32) BroadcastExchange -Input [1]: [cc_call_center_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] - -(33) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_call_center_sk#3] -Right keys [1]: [cc_call_center_sk#19] -Join condition: None - -(34) Project [codegen id : 6] -Output [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Input [5]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#19] - -(35) HashAggregate [codegen id : 6] -Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Keys [1]: [cs_order_number#5] -Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23] -Results [3]: [cs_order_number#5, sum#24, sum#25] - -(36) Exchange -Input [3]: [cs_order_number#5, sum#24, sum#25] -Arguments: hashpartitioning(cs_order_number#5, 5), true, [id=#26] - -(37) HashAggregate [codegen id : 7] -Input [3]: [cs_order_number#5, sum#24, sum#25] -Keys [1]: [cs_order_number#5] -Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23] -Results [3]: [cs_order_number#5, sum#24, sum#25] - -(38) HashAggregate [codegen id : 7] -Input [3]: [cs_order_number#5, sum#24, sum#25] -Keys: [] -Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27] -Results [3]: [sum#24, sum#25, count#28] - -(39) Exchange -Input [3]: [sum#24, sum#25, count#28] -Arguments: SinglePartition, true, [id=#29] - -(40) HashAggregate [codegen id : 8] -Input [3]: [sum#24, sum#25, count#28] -Keys: [] -Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27] -Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #32] - -(41) Sort [codegen id : 8] -Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: [order count #30 ASC NULLS FIRST], true, 0 - +TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) ++- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(cs_ext_ship_cost#4)), sum(UnscaledValue(cs_net_profit#5)), count(distinct cs_order_number#6)]) + +- Exchange SinglePartition + +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5)), partial_count(distinct cs_order_number#6)]) + +- *(7) HashAggregate(keys=[cs_order_number#6], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5))]) + +- Exchange hashpartitioning(cs_order_number#6, 5) + +- *(6) HashAggregate(keys=[cs_order_number#6], functions=[partial_sum(UnscaledValue(cs_ext_ship_cost#4)), partial_sum(UnscaledValue(cs_net_profit#5))]) + +- *(6) Project [cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + +- *(6) BroadcastHashJoin [cs_call_center_sk#7], [cc_call_center_sk#8], Inner, BuildRight + :- *(6) Project [cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : +- *(6) BroadcastHashJoin [cs_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight + : :- *(6) Project [cs_ship_addr_sk#9, cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : : +- *(6) BroadcastHashJoin [cs_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : :- *(6) BroadcastHashJoin [cs_order_number#6], [cr_order_number#13], LeftAnti, BuildRight + : : : :- *(6) Project [cs_ship_date_sk#11, cs_ship_addr_sk#9, cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : : : : +- *(6) BroadcastHashJoin [cs_order_number#6], [cs_order_number#6#14], LeftSemi, BuildRight, NOT (cs_warehouse_sk#15 = cs_warehouse_sk#15#16) + : : : : :- *(6) Project [cs_ship_date_sk#11, cs_ship_addr_sk#9, cs_call_center_sk#7, cs_warehouse_sk#15, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] + : : : : : +- *(6) Filter ((isnotnull(cs_ship_date_sk#11) && isnotnull(cs_ship_addr_sk#9)) && isnotnull(cs_call_center_sk#7)) + : : : : : +- *(6) FileScan parquet default.catalog_sales[cs_ship_date_sk#11,cs_ship_addr_sk#9,cs_call_center_sk#7,cs_warehouse_sk#15,cs_order_number#6,cs_ext_ship_cost#4,cs_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) FileScan parquet default.catalog_returns[cr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#12] + : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 2002-02-01)) && (d_date#17 <= 11779)) && isnotnull(d_date_sk#12)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ca_address_sk#10] + : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = GA)) && isnotnull(ca_address_sk#10)) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [cc_call_center_sk#8] + +- *(5) Filter ((isnotnull(cc_county#19) && (cc_county#19 = Williamson County)) && isnotnull(cc_call_center_sk#8)) + +- *(5) FileScan parquet default.call_center[cc_call_center_sk#8,cc_county#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt index 169f07c2d..fe14e6333 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt @@ -1,62 +1,51 @@ -WholeStageCodegen (8) - Sort [order count ] - HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] + WholeStageCodegen + HashAggregate [count,count(cs_order_number),sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [count,count(cs_order_number),order count ,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),total net profit ,total shipping cost ] InputAdapter Exchange #1 - WholeStageCodegen (7) - HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,count] - HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] + WholeStageCodegen + HashAggregate [count,count(cs_order_number),cs_order_number,sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [count,count,count(cs_order_number),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + HashAggregate [cs_order_number,sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] InputAdapter Exchange [cs_order_number] #2 - WholeStageCodegen (6) - HashAggregate [cs_order_number,cs_ext_ship_cost,cs_net_profit] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] - Project [cs_order_number,cs_ext_ship_cost,cs_net_profit] - BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] - Project [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] - BroadcastHashJoin [cs_ship_addr_sk,ca_address_sk] - Project [cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + WholeStageCodegen + HashAggregate [cs_ext_ship_cost,cs_net_profit,cs_order_number,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + Project [cs_ext_ship_cost,cs_net_profit,cs_order_number] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] + BroadcastHashJoin [ca_address_sk,cs_ship_addr_sk] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk] BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - BroadcastHashJoin [cs_order_number,cr_order_number] - Project [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cr_order_number,cs_order_number] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk] BroadcastHashJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] - Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] + Filter [cs_call_center_sk,cs_ship_addr_sk,cs_ship_date_sk] + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Project [cs_warehouse_sk,cs_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_warehouse_sk,cs_order_number] + WholeStageCodegen + Project [cs_order_number,cs_warehouse_sk] + Scan parquet default.catalog_sales [cs_order_number,cs_warehouse_sk] [cs_order_number,cs_warehouse_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_order_number] + WholeStageCodegen + Scan parquet default.catalog_returns [cr_order_number] [cr_order_number] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen (4) + WholeStageCodegen Project [ca_address_sk] - Filter [ca_state,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_state] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] InputAdapter BroadcastExchange #7 - WholeStageCodegen (5) + WholeStageCodegen Project [cc_call_center_sk] - Filter [cc_county,cc_call_center_sk] - ColumnarToRow - InputAdapter - Scan parquet default.call_center [cc_call_center_sk,cc_county] + Filter [cc_call_center_sk,cc_county] + Scan parquet default.call_center [cc_call_center_sk,cc_county] [cc_call_center_sk,cc_county] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt index 4085b4ab9..5d0ff39f9 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt @@ -1,269 +1,50 @@ == Physical Plan == -TakeOrderedAndProject (48) -+- * HashAggregate (47) - +- Exchange (46) - +- * HashAggregate (45) - +- * Project (44) - +- * BroadcastHashJoin Inner BuildRight (43) - :- * Project (38) - : +- * BroadcastHashJoin Inner BuildRight (37) - : :- * Project (32) - : : +- * BroadcastHashJoin Inner BuildRight (31) - : : :- * Project (29) - : : : +- * BroadcastHashJoin Inner BuildRight (28) - : : : :- * Project (22) - : : : : +- * BroadcastHashJoin Inner BuildRight (21) - : : : : :- * Project (15) - : : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : : :- * Project (9) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : : : :- * Filter (3) - : : : : : : : +- * ColumnarToRow (2) - : : : : : : : +- Scan parquet default.store_sales (1) - : : : : : : +- BroadcastExchange (7) - : : : : : : +- * Filter (6) - : : : : : : +- * ColumnarToRow (5) - : : : : : : +- Scan parquet default.store_returns (4) - : : : : : +- BroadcastExchange (13) - : : : : : +- * Filter (12) - : : : : : +- * ColumnarToRow (11) - : : : : : +- Scan parquet default.catalog_sales (10) - : : : : +- BroadcastExchange (20) - : : : : +- * Project (19) - : : : : +- * Filter (18) - : : : : +- * ColumnarToRow (17) - : : : : +- Scan parquet default.date_dim (16) - : : : +- BroadcastExchange (27) - : : : +- * Project (26) - : : : +- * Filter (25) - : : : +- * ColumnarToRow (24) - : : : +- Scan parquet default.date_dim (23) - : : +- ReusedExchange (30) - : +- BroadcastExchange (36) - : +- * Filter (35) - : +- * ColumnarToRow (34) - : +- Scan parquet default.store (33) - +- BroadcastExchange (42) - +- * Filter (41) - +- * ColumnarToRow (40) - +- Scan parquet default.item (39) - - -(1) Scan parquet default.store_sales -Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 8] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] - -(3) Filter [codegen id : 8] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] -Condition : ((((isnotnull(ss_customer_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) - -(4) Scan parquet default.store_returns -Output [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] - -(6) Filter [codegen id : 1] -Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] -Condition : (((isnotnull(sr_customer_sk#9) AND isnotnull(sr_item_sk#8)) AND isnotnull(sr_ticket_number#10)) AND isnotnull(sr_returned_date_sk#7)) - -(7) BroadcastExchange -Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] -Arguments: HashedRelationBroadcastMode(List(input[2, bigint, false], input[1, bigint, false], input[3, bigint, false]),false), [id=#12] - -(8) BroadcastHashJoin [codegen id : 8] -Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] -Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10] -Join condition: None - -(9) Project [codegen id : 8] -Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11] -Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] - -(10) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] - -(12) Filter [codegen id : 2] -Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] -Condition : ((isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) AND isnotnull(cs_sold_date_sk#13)) - -(13) BroadcastExchange -Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#17] - -(14) BroadcastHashJoin [codegen id : 8] -Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] -Right keys [2]: [cast(cs_bill_customer_sk#14 as bigint), cast(cs_item_sk#15 as bigint)] -Join condition: None - -(15) Project [codegen id : 8] -Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] -Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] - -(16) Scan parquet default.date_dim -Output [2]: [d_date_sk#18, d_quarter_name#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#18, d_quarter_name#19] - -(18) Filter [codegen id : 3] -Input [2]: [d_date_sk#18, d_quarter_name#19] -Condition : ((isnotnull(d_quarter_name#19) AND (d_quarter_name#19 = 2001Q1)) AND isnotnull(d_date_sk#18)) - -(19) Project [codegen id : 3] -Output [1]: [d_date_sk#18] -Input [2]: [d_date_sk#18, d_quarter_name#19] - -(20) BroadcastExchange -Input [1]: [d_date_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] - -(21) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#18] -Join condition: None - -(22) Project [codegen id : 8] -Output [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] -Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#18] - -(23) Scan parquet default.date_dim -Output [2]: [d_date_sk#21, d_quarter_name#22] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [2]: [d_date_sk#21, d_quarter_name#22] - -(25) Filter [codegen id : 4] -Input [2]: [d_date_sk#21, d_quarter_name#22] -Condition : (d_quarter_name#22 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#21)) - -(26) Project [codegen id : 4] -Output [1]: [d_date_sk#21] -Input [2]: [d_date_sk#21, d_quarter_name#22] - -(27) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] - -(28) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [sr_returned_date_sk#7] -Right keys [1]: [cast(d_date_sk#21 as bigint)] -Join condition: None - -(29) Project [codegen id : 8] -Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] -Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#21] - -(30) ReusedExchange [Reuses operator id: 27] -Output [1]: [d_date_sk#24] - -(31) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#13] -Right keys [1]: [d_date_sk#24] -Join condition: None - -(32) Project [codegen id : 8] -Output [5]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16] -Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#24] - -(33) Scan parquet default.store -Output [2]: [s_store_sk#25, s_state#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(34) ColumnarToRow [codegen id : 6] -Input [2]: [s_store_sk#25, s_state#26] - -(35) Filter [codegen id : 6] -Input [2]: [s_store_sk#25, s_state#26] -Condition : isnotnull(s_store_sk#25) - -(36) BroadcastExchange -Input [2]: [s_store_sk#25, s_state#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] - -(37) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#25] -Join condition: None - -(38) Project [codegen id : 8] -Output [5]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26] -Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_sk#25, s_state#26] - -(39) Scan parquet default.item -Output [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(40) ColumnarToRow [codegen id : 7] -Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] - -(41) Filter [codegen id : 7] -Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] -Condition : isnotnull(i_item_sk#28) - -(42) BroadcastExchange -Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] - -(43) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#28] -Join condition: None - -(44) Project [codegen id : 8] -Output [6]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26, i_item_id#29, i_item_desc#30] -Input [8]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26, i_item_sk#28, i_item_id#29, i_item_desc#30] - -(45) HashAggregate [codegen id : 8] -Input [6]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26, i_item_id#29, i_item_desc#30] -Keys [3]: [i_item_id#29, i_item_desc#30, s_state#26] -Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#11), partial_avg(cast(sr_return_quantity#11 as bigint)), partial_stddev_samp(cast(sr_return_quantity#11 as double)), partial_count(cs_quantity#16), partial_avg(cast(cs_quantity#16 as bigint)), partial_stddev_samp(cast(cs_quantity#16 as double))] -Aggregate Attributes [18]: [count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43, count#44, sum#45, count#46, n#47, avg#48, m2#49] -Results [21]: [i_item_id#29, i_item_desc#30, s_state#26, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61, count#62, sum#63, count#64, n#65, avg#66, m2#67] - -(46) Exchange -Input [21]: [i_item_id#29, i_item_desc#30, s_state#26, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61, count#62, sum#63, count#64, n#65, avg#66, m2#67] -Arguments: hashpartitioning(i_item_id#29, i_item_desc#30, s_state#26, 5), true, [id=#68] - -(47) HashAggregate [codegen id : 9] -Input [21]: [i_item_id#29, i_item_desc#30, s_state#26, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61, count#62, sum#63, count#64, n#65, avg#66, m2#67] -Keys [3]: [i_item_id#29, i_item_desc#30, s_state#26] -Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#11), avg(cast(sr_return_quantity#11 as bigint)), stddev_samp(cast(sr_return_quantity#11 as double)), count(cs_quantity#16), avg(cast(cs_quantity#16 as bigint)), stddev_samp(cast(cs_quantity#16 as double))] -Aggregate Attributes [9]: [count(ss_quantity#6)#69, avg(cast(ss_quantity#6 as bigint))#70, stddev_samp(cast(ss_quantity#6 as double))#71, count(sr_return_quantity#11)#72, avg(cast(sr_return_quantity#11 as bigint))#73, stddev_samp(cast(sr_return_quantity#11 as double))#74, count(cs_quantity#16)#75, avg(cast(cs_quantity#16 as bigint))#76, stddev_samp(cast(cs_quantity#16 as double))#77] -Results [15]: [i_item_id#29, i_item_desc#30, s_state#26, count(ss_quantity#6)#69 AS store_sales_quantitycount#78, avg(cast(ss_quantity#6 as bigint))#70 AS store_sales_quantityave#79, stddev_samp(cast(ss_quantity#6 as double))#71 AS store_sales_quantitystdev#80, (stddev_samp(cast(ss_quantity#6 as double))#71 / avg(cast(ss_quantity#6 as bigint))#70) AS store_sales_quantitycov#81, count(sr_return_quantity#11)#72 AS as_store_returns_quantitycount#82, avg(cast(sr_return_quantity#11 as bigint))#73 AS as_store_returns_quantityave#83, stddev_samp(cast(sr_return_quantity#11 as double))#74 AS as_store_returns_quantitystdev#84, (stddev_samp(cast(sr_return_quantity#11 as double))#74 / avg(cast(sr_return_quantity#11 as bigint))#73) AS store_returns_quantitycov#85, count(cs_quantity#16)#75 AS catalog_sales_quantitycount#86, avg(cast(cs_quantity#16 as bigint))#76 AS catalog_sales_quantityave#87, (stddev_samp(cast(cs_quantity#16 as double))#77 / avg(cast(cs_quantity#16 as bigint))#76) AS catalog_sales_quantitystdev#88, (stddev_samp(cast(cs_quantity#16 as double))#77 / avg(cast(cs_quantity#16 as bigint))#76) AS catalog_sales_quantitycov#89] - -(48) TakeOrderedAndProject -Input [15]: [i_item_id#29, i_item_desc#30, s_state#26, store_sales_quantitycount#78, store_sales_quantityave#79, store_sales_quantitystdev#80, store_sales_quantitycov#81, as_store_returns_quantitycount#82, as_store_returns_quantityave#83, as_store_returns_quantitystdev#84, store_returns_quantitycov#85, catalog_sales_quantitycount#86, catalog_sales_quantityave#87, catalog_sales_quantitystdev#88, catalog_sales_quantitycov#89] -Arguments: 100, [i_item_id#29 ASC NULLS FIRST, i_item_desc#30 ASC NULLS FIRST, s_state#26 ASC NULLS FIRST], [i_item_id#29, i_item_desc#30, s_state#26, store_sales_quantitycount#78, store_sales_quantityave#79, store_sales_quantitystdev#80, store_sales_quantitycov#81, as_store_returns_quantitycount#82, as_store_returns_quantityave#83, as_store_returns_quantitystdev#84, store_returns_quantitycov#85, catalog_sales_quantitycount#86, catalog_sales_quantityave#87, catalog_sales_quantitystdev#88, catalog_sales_quantitycov#89] - +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_state#3 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_state#3,store_sales_quantitycount#4,store_sales_quantityave#5,store_sales_quantitystdev#6,store_sales_quantitycov#7,as_store_returns_quantitycount#8,as_store_returns_quantityave#9,as_store_returns_quantitystdev#10,store_returns_quantitycov#11,catalog_sales_quantitycount#12,catalog_sales_quantityave#13,catalog_sales_quantitystdev#14,catalog_sales_quantitycov#15]) ++- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[count(ss_quantity#16), avg(cast(ss_quantity#16 as bigint)), stddev_samp(cast(ss_quantity#16 as double)), count(sr_return_quantity#17), avg(cast(sr_return_quantity#17 as bigint)), stddev_samp(cast(sr_return_quantity#17 as double)), count(cs_quantity#18), avg(cast(cs_quantity#18 as bigint)), stddev_samp(cast(cs_quantity#18 as double))]) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_state#3, 5) + +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[partial_count(ss_quantity#16), partial_avg(cast(ss_quantity#16 as bigint)), partial_stddev_samp(cast(ss_quantity#16 as double)), partial_count(sr_return_quantity#17), partial_avg(cast(sr_return_quantity#17 as bigint)), partial_stddev_samp(cast(sr_return_quantity#17 as double)), partial_count(cs_quantity#18), partial_avg(cast(cs_quantity#18 as bigint)), partial_stddev_samp(cast(cs_quantity#18 as double))]) + +- *(8) Project [ss_quantity#16, sr_return_quantity#17, cs_quantity#18, s_state#3, i_item_id#1, i_item_desc#2] + +- *(8) BroadcastHashJoin [ss_item_sk#19], [i_item_sk#20], Inner, BuildRight + :- *(8) Project [ss_item_sk#19, ss_quantity#16, sr_return_quantity#17, cs_quantity#18, s_state#3] + : +- *(8) BroadcastHashJoin [ss_store_sk#21], [s_store_sk#22], Inner, BuildRight + : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_return_quantity#17, cs_quantity#18] + : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight + : : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] + : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight + : : : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] + : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#27], [d_date_sk#28], Inner, BuildRight + : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] + : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#29, sr_item_sk#30], [cast(cs_bill_customer_sk#31 as bigint), cast(cs_item_sk#32 as bigint)], Inner, BuildRight + : : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_item_sk#30, sr_customer_sk#29, sr_return_quantity#17] + : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#33 as bigint), cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#34 as bigint)], [sr_customer_sk#29, sr_item_sk#30, sr_ticket_number#35], Inner, BuildRight + : : : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_customer_sk#33, ss_store_sk#21, ss_ticket_number#34, ss_quantity#16] + : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#34) && isnotnull(ss_item_sk#19)) && isnotnull(ss_customer_sk#33)) && isnotnull(ss_sold_date_sk#27)) && isnotnull(ss_store_sk#21)) + : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#27,ss_item_sk#19,ss_customer_sk#33,ss_store_sk#21,ss_ticket_number#34,ss_quantity#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#28] + : : : : +- *(3) Filter ((isnotnull(d_quarter_name#36) && (d_quarter_name#36 = 2001Q1)) && isnotnull(d_date_sk#28)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#28,d_quarter_name#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [d_date_sk#26] + : : : +- *(4) Filter (d_quarter_name#37 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#26)) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#26,d_quarter_name#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [d_date_sk#24] + : : +- *(5) Filter (d_quarter_name#38 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#24)) + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#24,d_quarter_name#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [s_store_sk#22, s_state#3] + : +- *(6) Filter isnotnull(s_store_sk#22) + : +- *(6) FileScan parquet default.store[s_store_sk#22,s_state#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [i_item_sk#20, i_item_id#1, i_item_desc#2] + +- *(7) Filter isnotnull(i_item_sk#20) + +- *(7) FileScan parquet default.item[i_item_sk#20,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt index e9b95747c..780e633e2 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt @@ -1,71 +1,66 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov] - WholeStageCodegen (9) - HashAggregate [i_item_id,i_item_desc,s_state,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] [count(ss_quantity),avg(cast(ss_quantity as bigint)),stddev_samp(cast(ss_quantity as double)),count(sr_return_quantity),avg(cast(sr_return_quantity as bigint)),stddev_samp(cast(sr_return_quantity as double)),count(cs_quantity),avg(cast(cs_quantity as bigint)),stddev_samp(cast(cs_quantity as double)),store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] +TakeOrderedAndProject [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,i_item_desc,i_item_id,s_state,store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev] + WholeStageCodegen + HashAggregate [avg,avg,avg,avg(cast(cs_quantity as bigint)),avg(cast(sr_return_quantity as bigint)),avg(cast(ss_quantity as bigint)),count,count,count,count,count,count,count(cs_quantity),count(sr_return_quantity),count(ss_quantity),i_item_desc,i_item_id,m2,m2,m2,n,n,n,s_state,stddev_samp(cast(cs_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),stddev_samp(cast(ss_quantity as double)),sum,sum,sum] [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,avg,avg,avg,avg(cast(cs_quantity as bigint)),avg(cast(sr_return_quantity as bigint)),avg(cast(ss_quantity as bigint)),catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,count,count,count,count,count,count,count(cs_quantity),count(sr_return_quantity),count(ss_quantity),m2,m2,m2,n,n,n,stddev_samp(cast(cs_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),stddev_samp(cast(ss_quantity as double)),store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev,sum,sum,sum] InputAdapter - Exchange [i_item_id,i_item_desc,s_state] #1 - WholeStageCodegen (8) - HashAggregate [i_item_id,i_item_desc,s_state,ss_quantity,sr_return_quantity,cs_quantity] [count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] - Project [ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_id,i_item_desc] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + Exchange [i_item_desc,i_item_id,s_state] #1 + WholeStageCodegen + HashAggregate [avg,avg,avg,avg,avg,avg,count,count,count,count,count,count,count,count,count,count,count,count,cs_quantity,i_item_desc,i_item_id,m2,m2,m2,m2,m2,m2,n,n,n,n,n,n,s_state,sr_return_quantity,ss_quantity,sum,sum,sum,sum,sum,sum] [avg,avg,avg,avg,avg,avg,count,count,count,count,count,count,count,count,count,count,count,count,m2,m2,m2,m2,m2,m2,n,n,n,n,n,n,sum,sum,sum,sum,sum,sum] + Project [cs_quantity,i_item_desc,i_item_id,s_state,sr_return_quantity,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [cs_quantity,s_state,sr_return_quantity,ss_item_sk,ss_quantity] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [cs_quantity,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_sold_date_sk,cs_quantity] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_return_quantity,cs_sold_date_sk,cs_quantity] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_return_quantity,cs_sold_date_sk,cs_quantity] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity] - BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity] + WholeStageCodegen + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + WholeStageCodegen + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk] - Filter [d_quarter_name,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_quarter_name] + Filter [d_date_sk,d_quarter_name] + Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen Project [d_date_sk] - Filter [d_quarter_name,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_quarter_name] + Filter [d_date_sk,d_quarter_name] + Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] InputAdapter - ReusedExchange [d_date_sk] #5 + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk,d_quarter_name] + Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] InputAdapter - BroadcastExchange #6 - WholeStageCodegen (6) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_state] + BroadcastExchange #7 + WholeStageCodegen + Project [s_state,s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (7) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_desc,i_item_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt index 3b213efa6..fd9786c06 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt @@ -1,264 +1,45 @@ == Physical Plan == -TakeOrderedAndProject (47) -+- * HashAggregate (46) - +- Exchange (45) - +- * HashAggregate (44) - +- * Expand (43) - +- * Project (42) - +- * BroadcastHashJoin Inner BuildRight (41) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) - : :- * Project (29) - : : +- * BroadcastHashJoin Inner BuildRight (28) - : : :- * Project (23) - : : : +- * BroadcastHashJoin Inner BuildRight (22) - : : : :- * Project (17) - : : : : +- * BroadcastHashJoin Inner BuildRight (16) - : : : : :- * Project (10) - : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : +- BroadcastExchange (8) - : : : : : +- * Project (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.customer_demographics (4) - : : : : +- BroadcastExchange (15) - : : : : +- * Project (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.customer (11) - : : : +- BroadcastExchange (21) - : : : +- * Filter (20) - : : : +- * ColumnarToRow (19) - : : : +- Scan parquet default.customer_demographics (18) - : : +- BroadcastExchange (27) - : : +- * Filter (26) - : : +- * ColumnarToRow (25) - : : +- Scan parquet default.customer_address (24) - : +- BroadcastExchange (34) - : +- * Project (33) - : +- * Filter (32) - : +- * ColumnarToRow (31) - : +- Scan parquet default.date_dim (30) - +- BroadcastExchange (40) - +- * Filter (39) - +- * ColumnarToRow (38) - +- Scan parquet default.item (37) - - -(1) Scan parquet default.catalog_sales -Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 7] -Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] - -(3) Filter [codegen id : 7] -Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] -Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) - -(4) Scan parquet default.customer_demographics -Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_education_status,Unknown), IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] - -(6) Filter [codegen id : 1] -Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] -Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = F)) AND (cd_education_status#12 = Unknown)) AND isnotnull(cd_demo_sk#10)) - -(7) Project [codegen id : 1] -Output [2]: [cd_demo_sk#10, cd_dep_count#13] -Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] - -(8) BroadcastExchange -Input [2]: [cd_demo_sk#10, cd_dep_count#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] - -(9) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [cs_bill_cdemo_sk#3] -Right keys [1]: [cd_demo_sk#10] -Join condition: None - -(10) Project [codegen id : 7] -Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] -Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] - -(11) Scan parquet default.customer -Output [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] - -(13) Filter [codegen id : 2] -Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] -Condition : (((c_birth_month#18 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#15)) AND isnotnull(c_current_cdemo_sk#16)) AND isnotnull(c_current_addr_sk#17)) - -(14) Project [codegen id : 2] -Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] -Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] - -(15) BroadcastExchange -Input [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] - -(16) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [cs_bill_customer_sk#2] -Right keys [1]: [c_customer_sk#15] -Join condition: None - -(17) Project [codegen id : 7] -Output [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] -Input [13]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] - -(18) Scan parquet default.customer_demographics -Output [1]: [cd_demo_sk#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [1]: [cd_demo_sk#21] - -(20) Filter [codegen id : 3] -Input [1]: [cd_demo_sk#21] -Condition : isnotnull(cd_demo_sk#21) - -(21) BroadcastExchange -Input [1]: [cd_demo_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] - -(22) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_cdemo_sk#16] -Right keys [1]: [cd_demo_sk#21] -Join condition: None - -(23) Project [codegen id : 7] -Output [10]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19] -Input [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#21] - -(24) Scan parquet default.customer_address -Output [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(25) ColumnarToRow [codegen id : 4] -Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] - -(26) Filter [codegen id : 4] -Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] -Condition : (ca_state#25 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#23)) - -(27) BroadcastExchange -Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] - -(28) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#17] -Right keys [1]: [ca_address_sk#23] -Join condition: None - -(29) Project [codegen id : 7] -Output [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26] -Input [14]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] - -(30) Scan parquet default.date_dim -Output [2]: [d_date_sk#28, d_year#29] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] -ReadSchema: struct - -(31) ColumnarToRow [codegen id : 5] -Input [2]: [d_date_sk#28, d_year#29] - -(32) Filter [codegen id : 5] -Input [2]: [d_date_sk#28, d_year#29] -Condition : ((isnotnull(d_year#29) AND (d_year#29 = 1998)) AND isnotnull(d_date_sk#28)) - -(33) Project [codegen id : 5] -Output [1]: [d_date_sk#28] -Input [2]: [d_date_sk#28, d_year#29] - -(34) BroadcastExchange -Input [1]: [d_date_sk#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] - -(35) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#28] -Join condition: None - -(36) Project [codegen id : 7] -Output [11]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26] -Input [13]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26, d_date_sk#28] - -(37) Scan parquet default.item -Output [2]: [i_item_sk#31, i_item_id#32] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(38) ColumnarToRow [codegen id : 6] -Input [2]: [i_item_sk#31, i_item_id#32] - -(39) Filter [codegen id : 6] -Input [2]: [i_item_sk#31, i_item_id#32] -Condition : isnotnull(i_item_sk#31) - -(40) BroadcastExchange -Input [2]: [i_item_sk#31, i_item_id#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] - -(41) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [cs_item_sk#4] -Right keys [1]: [i_item_sk#31] -Join condition: None - -(42) Project [codegen id : 7] -Output [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, ca_county#24] -Input [13]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26, i_item_sk#31, i_item_id#32] - -(43) Expand [codegen id : 7] -Input [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, ca_county#24] -Arguments: [List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, ca_county#24, 0), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, null, 1), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, null, null, 3), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, null, null, null, 7), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, null, null, null, null, 15)], [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] - -(44) HashAggregate [codegen id : 7] -Input [12]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] -Keys [5]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] -Functions [7]: [partial_avg(cast(cs_quantity#5 as decimal(12,2))), partial_avg(cast(cs_list_price#6 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#8 as decimal(12,2))), partial_avg(cast(cs_sales_price#7 as decimal(12,2))), partial_avg(cast(cs_net_profit#9 as decimal(12,2))), partial_avg(cast(c_birth_year#19 as decimal(12,2))), partial_avg(cast(cd_dep_count#13 as decimal(12,2)))] -Aggregate Attributes [14]: [sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48, sum#49, count#50, sum#51, count#52] -Results [19]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66] - -(45) Exchange -Input [19]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66] -Arguments: hashpartitioning(i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, 5), true, [id=#67] - -(46) HashAggregate [codegen id : 8] -Input [19]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66] -Keys [5]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] -Functions [7]: [avg(cast(cs_quantity#5 as decimal(12,2))), avg(cast(cs_list_price#6 as decimal(12,2))), avg(cast(cs_coupon_amt#8 as decimal(12,2))), avg(cast(cs_sales_price#7 as decimal(12,2))), avg(cast(cs_net_profit#9 as decimal(12,2))), avg(cast(c_birth_year#19 as decimal(12,2))), avg(cast(cd_dep_count#13 as decimal(12,2)))] -Aggregate Attributes [7]: [avg(cast(cs_quantity#5 as decimal(12,2)))#68, avg(cast(cs_list_price#6 as decimal(12,2)))#69, avg(cast(cs_coupon_amt#8 as decimal(12,2)))#70, avg(cast(cs_sales_price#7 as decimal(12,2)))#71, avg(cast(cs_net_profit#9 as decimal(12,2)))#72, avg(cast(c_birth_year#19 as decimal(12,2)))#73, avg(cast(cd_dep_count#13 as decimal(12,2)))#74] -Results [11]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, avg(cast(cs_quantity#5 as decimal(12,2)))#68 AS agg1#75, avg(cast(cs_list_price#6 as decimal(12,2)))#69 AS agg2#76, avg(cast(cs_coupon_amt#8 as decimal(12,2)))#70 AS agg3#77, avg(cast(cs_sales_price#7 as decimal(12,2)))#71 AS agg4#78, avg(cast(cs_net_profit#9 as decimal(12,2)))#72 AS agg5#79, avg(cast(c_birth_year#19 as decimal(12,2)))#73 AS agg6#80, avg(cast(cd_dep_count#13 as decimal(12,2)))#74 AS agg7#81] - -(47) TakeOrderedAndProject -Input [11]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, agg1#75, agg2#76, agg3#77, agg4#78, agg5#79, agg6#80, agg7#81] -Arguments: 100, [ca_country#35 ASC NULLS FIRST, ca_state#36 ASC NULLS FIRST, ca_county#37 ASC NULLS FIRST, i_item_id#34 ASC NULLS FIRST], [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, agg1#75, agg2#76, agg3#77, agg4#78, agg5#79, agg6#80, agg7#81] - +TakeOrderedAndProject(limit=100, orderBy=[ca_country#1 ASC NULLS FIRST,ca_state#2 ASC NULLS FIRST,ca_county#3 ASC NULLS FIRST,i_item_id#4 ASC NULLS FIRST], output=[i_item_id#4,ca_country#1,ca_state#2,ca_county#3,agg1#5,agg2#6,agg3#7,agg4#8,agg5#9,agg6#10,agg7#11]) ++- *(8) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[avg(cast(cs_quantity#13 as decimal(12,2))), avg(cast(cs_list_price#14 as decimal(12,2))), avg(cast(cs_coupon_amt#15 as decimal(12,2))), avg(cast(cs_sales_price#16 as decimal(12,2))), avg(cast(cs_net_profit#17 as decimal(12,2))), avg(cast(c_birth_year#18 as decimal(12,2))), avg(cast(cd_dep_count#19 as decimal(12,2)))]) + +- Exchange hashpartitioning(i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12, 5) + +- *(7) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[partial_avg(cast(cs_quantity#13 as decimal(12,2))), partial_avg(cast(cs_list_price#14 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#15 as decimal(12,2))), partial_avg(cast(cs_sales_price#16 as decimal(12,2))), partial_avg(cast(cs_net_profit#17 as decimal(12,2))), partial_avg(cast(c_birth_year#18 as decimal(12,2))), partial_avg(cast(cd_dep_count#19 as decimal(12,2)))]) + +- *(7) Expand [List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, ca_county#23, 0), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, null, 1), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, null, null, 3), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, null, null, null, 7), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, null, null, null, null, 15)], [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12] + +- *(7) Project [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#24 AS i_item_id#20, ca_country#25 AS ca_country#21, ca_state#26 AS ca_state#22, ca_county#27 AS ca_county#23] + +- *(7) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#29], Inner, BuildRight + :- *(7) Project [cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, ca_county#27, ca_state#26, ca_country#25] + : +- *(7) BroadcastHashJoin [cs_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight + : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, ca_county#27, ca_state#26, ca_country#25] + : : +- *(7) BroadcastHashJoin [c_current_addr_sk#32], [ca_address_sk#33], Inner, BuildRight + : : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_current_addr_sk#32, c_birth_year#18] + : : : +- *(7) BroadcastHashJoin [c_current_cdemo_sk#34], [cd_demo_sk#35], Inner, BuildRight + : : : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_current_cdemo_sk#34, c_current_addr_sk#32, c_birth_year#18] + : : : : +- *(7) BroadcastHashJoin [cs_bill_customer_sk#36], [c_customer_sk#37], Inner, BuildRight + : : : : :- *(7) Project [cs_sold_date_sk#30, cs_bill_customer_sk#36, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19] + : : : : : +- *(7) BroadcastHashJoin [cs_bill_cdemo_sk#38], [cd_demo_sk#39], Inner, BuildRight + : : : : : :- *(7) Project [cs_sold_date_sk#30, cs_bill_customer_sk#36, cs_bill_cdemo_sk#38, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17] + : : : : : : +- *(7) Filter (((isnotnull(cs_bill_cdemo_sk#38) && isnotnull(cs_bill_customer_sk#36)) && isnotnull(cs_sold_date_sk#30)) && isnotnull(cs_item_sk#28)) + : : : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_bill_customer_sk#36,cs_bill_cdemo_sk#38,cs_item_sk#28,cs_quantity#13,cs_list_price#14,cs_sales_price#16,cs_coupon_amt#15,cs_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNu..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [c_customer_sk#37, c_current_cdemo_sk#34, c_current_addr_sk#32, c_birth_year#18] + : : : : +- *(2) Filter (((c_birth_month#42 IN (1,6,8,9,12,2) && isnotnull(c_customer_sk#37)) && isnotnull(c_current_cdemo_sk#34)) && isnotnull(c_current_addr_sk#32)) + : : : : +- *(2) FileScan parquet default.customer[c_customer_sk#37,c_current_cdemo_sk#34,c_current_addr_sk#32,c_birth_month#42,c_birth_year#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNo..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#33, ca_county#27, ca_state#26, ca_country#25] + : : +- *(4) Filter (ca_state#26 IN (MS,IN,ND,OK,NM,VA) && isnotnull(ca_address_sk#33)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#33,ca_county#27,ca_state#26,ca_country#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [d_date_sk#31] + : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 1998)) && isnotnull(d_date_sk#31)) + : +- *(5) FileScan parquet default.date_dim[d_date_sk#31,d_year#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(6) Project [i_item_sk#29, i_item_id#24] + +- *(6) Filter isnotnull(i_item_sk#29) + +- *(6) FileScan parquet default.item[i_item_sk#29,i_item_id#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt index 0d4c8cb7f..5a4e9d503 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt @@ -1,69 +1,59 @@ -TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,agg4,agg5,agg6,agg7] - WholeStageCodegen (8) - HashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] +TakeOrderedAndProject [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_county,ca_state,i_item_id] + WholeStageCodegen + HashAggregate [avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),ca_country,ca_county,ca_state,count,count,count,count,count,count,count,i_item_id,spark_grouping_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum] InputAdapter - Exchange [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id] #1 - WholeStageCodegen (7) - HashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] - Expand [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county] - Project [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county] + Exchange [ca_country,ca_county,ca_state,i_item_id,spark_grouping_id] #1 + WholeStageCodegen + HashAggregate [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id,spark_grouping_id,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Expand [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] + Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_current_addr_sk,c_birth_year] + Project [c_birth_year,c_current_addr_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count] - BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit] + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Project [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [cd_demo_sk,cd_dep_count] - Filter [cd_gender,cd_education_status,cd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] + Filter [cd_demo_sk,cd_education_status,cd_gender] + Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] - Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] + WholeStageCodegen + Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_birth_month,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Scan parquet default.customer [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [cd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk] + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk] [cd_demo_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) - Filter [ca_state,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] + WholeStageCodegen + Project [ca_address_sk,ca_country,ca_county,ca_state] + Filter [ca_address_sk,ca_state] + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_county,ca_state] [ca_address_sk,ca_country,ca_county,ca_state] InputAdapter BroadcastExchange #6 - WholeStageCodegen (5) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #7 - WholeStageCodegen (6) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id] + WholeStageCodegen + Project [i_item_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt index cb75374a8..a3cb20be7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt @@ -1,221 +1,38 @@ == Physical Plan == -TakeOrderedAndProject (39) -+- * HashAggregate (38) - +- Exchange (37) - +- * HashAggregate (36) - +- * Project (35) - +- * BroadcastHashJoin Inner BuildRight (34) - :- * Project (29) - : +- * BroadcastHashJoin Inner BuildRight (28) - : :- * Project (23) - : : +- * BroadcastHashJoin Inner BuildRight (22) - : : :- * Project (17) - : : : +- * BroadcastHashJoin Inner BuildRight (16) - : : : :- * Project (10) - : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : :- * Project (4) - : : : : : +- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.date_dim (1) - : : : : +- BroadcastExchange (8) - : : : : +- * Filter (7) - : : : : +- * ColumnarToRow (6) - : : : : +- Scan parquet default.store_sales (5) - : : : +- BroadcastExchange (15) - : : : +- * Project (14) - : : : +- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.item (11) - : : +- BroadcastExchange (21) - : : +- * Filter (20) - : : +- * ColumnarToRow (19) - : : +- Scan parquet default.customer (18) - : +- BroadcastExchange (27) - : +- * Filter (26) - : +- * ColumnarToRow (25) - : +- Scan parquet default.customer_address (24) - +- BroadcastExchange (33) - +- * Filter (32) - +- * ColumnarToRow (31) - +- Scan parquet default.store (30) - - -(1) Scan parquet default.date_dim -Output [3]: [d_date_sk#1, d_year#2, d_moy#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 6] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] - -(3) Filter [codegen id : 6] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] -Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1998)) AND isnotnull(d_date_sk#1)) - -(4) Project [codegen id : 6] -Output [1]: [d_date_sk#1] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] - -(5) Scan parquet default.store_sales -Output [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] - -(7) Filter [codegen id : 1] -Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] -Condition : (((isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) AND isnotnull(ss_customer_sk#6)) AND isnotnull(ss_store_sk#7)) - -(8) BroadcastExchange -Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] - -(9) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#4] -Join condition: None - -(10) Project [codegen id : 6] -Output [4]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] -Input [6]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] - -(11) Scan parquet default.item -Output [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] - -(13) Filter [codegen id : 2] -Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] -Condition : ((isnotnull(i_manager_id#15) AND (i_manager_id#15 = 8)) AND isnotnull(i_item_sk#10)) - -(14) Project [codegen id : 2] -Output [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] - -(15) BroadcastExchange -Input [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(16) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_item_sk#5] -Right keys [1]: [i_item_sk#10] -Join condition: None - -(17) Project [codegen id : 6] -Output [7]: [ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Input [9]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] - -(18) Scan parquet default.customer -Output [2]: [c_customer_sk#17, c_current_addr_sk#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] - -(20) Filter [codegen id : 3] -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] -Condition : (isnotnull(c_customer_sk#17) AND isnotnull(c_current_addr_sk#18)) - -(21) BroadcastExchange -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] - -(22) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#6] -Right keys [1]: [c_customer_sk#17] -Join condition: None - -(23) Project [codegen id : 6] -Output [7]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18] -Input [9]: [ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_customer_sk#17, c_current_addr_sk#18] - -(24) Scan parquet default.customer_address -Output [2]: [ca_address_sk#20, ca_zip#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] -ReadSchema: struct - -(25) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#20, ca_zip#21] - -(26) Filter [codegen id : 4] -Input [2]: [ca_address_sk#20, ca_zip#21] -Condition : (isnotnull(ca_address_sk#20) AND isnotnull(ca_zip#21)) - -(27) BroadcastExchange -Input [2]: [ca_address_sk#20, ca_zip#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] - -(28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_current_addr_sk#18] -Right keys [1]: [ca_address_sk#20] -Join condition: None - -(29) Project [codegen id : 6] -Output [7]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21] -Input [9]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18, ca_address_sk#20, ca_zip#21] - -(30) Scan parquet default.store -Output [2]: [s_store_sk#23, s_zip#24] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] -ReadSchema: struct - -(31) ColumnarToRow [codegen id : 5] -Input [2]: [s_store_sk#23, s_zip#24] - -(32) Filter [codegen id : 5] -Input [2]: [s_store_sk#23, s_zip#24] -Condition : (isnotnull(s_zip#24) AND isnotnull(s_store_sk#23)) - -(33) BroadcastExchange -Input [2]: [s_store_sk#23, s_zip#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] - -(34) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_store_sk#7] -Right keys [1]: [s_store_sk#23] -Join condition: NOT (substr(ca_zip#21, 1, 5) = substr(s_zip#24, 1, 5)) - -(35) Project [codegen id : 6] -Output [5]: [ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Input [9]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21, s_store_sk#23, s_zip#24] - -(36) HashAggregate [codegen id : 6] -Input [5]: [ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#8))] -Aggregate Attributes [1]: [sum#26] -Results [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] - -(37) Exchange -Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] -Arguments: hashpartitioning(i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, 5), true, [id=#28] - -(38) HashAggregate [codegen id : 7] -Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] -Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#8))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#8))#29] -Results [5]: [i_brand_id#11 AS brand_id#30, i_brand#12 AS brand#31, i_manufact_id#13, i_manufact#14, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#8))#29,17,2) AS ext_price#32] - -(39) TakeOrderedAndProject -Input [5]: [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] -Arguments: 100, [ext_price#32 DESC NULLS LAST, brand#31 ASC NULLS FIRST, brand_id#30 ASC NULLS FIRST, i_manufact_id#13 ASC NULLS FIRST, i_manufact#14 ASC NULLS FIRST], [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] - +TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand#2 ASC NULLS FIRST,brand_id#3 ASC NULLS FIRST,i_manufact_id#4 ASC NULLS FIRST,i_manufact#5 ASC NULLS FIRST], output=[brand_id#3,brand#2,i_manufact_id#4,i_manufact#5,ext_price#1]) ++- *(7) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[sum(UnscaledValue(ss_ext_sales_price#8))]) + +- Exchange hashpartitioning(i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5, 5) + +- *(6) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#8))]) + +- *(6) Project [ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5] + +- *(6) BroadcastHashJoin [ss_store_sk#9], [s_store_sk#10], Inner, BuildRight, NOT (substring(ca_zip#11, 1, 5) = substring(s_zip#12, 1, 5)) + :- *(6) Project [ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5, ca_zip#11] + : +- *(6) BroadcastHashJoin [c_current_addr_sk#13], [ca_address_sk#14], Inner, BuildRight + : :- *(6) Project [ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5, c_current_addr_sk#13] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#15], [c_customer_sk#16], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5] + : : : +- *(6) BroadcastHashJoin [ss_item_sk#17], [i_item_sk#18], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#17, ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8] + : : : : +- *(6) BroadcastHashJoin [d_date_sk#19], [ss_sold_date_sk#20], Inner, BuildRight + : : : : :- *(6) Project [d_date_sk#19] + : : : : : +- *(6) Filter ((((isnotnull(d_moy#21) && isnotnull(d_year#22)) && (d_moy#21 = 11)) && (d_year#22 = 1998)) && isnotnull(d_date_sk#19)) + : : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#19,d_year#22,d_moy#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [ss_sold_date_sk#20, ss_item_sk#17, ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8] + : : : : +- *(1) Filter (((isnotnull(ss_sold_date_sk#20) && isnotnull(ss_item_sk#17)) && isnotnull(ss_customer_sk#15)) && isnotnull(ss_store_sk#9)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_item_sk#17,ss_customer_sk#15,ss_store_sk#9,ss_ext_sales_price#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ca_address_sk#14, ca_zip#11] + : +- *(4) Filter (isnotnull(ca_address_sk#14) && isnotnull(ca_zip#11)) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#14,ca_zip#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [s_store_sk#10, s_zip#12] + +- *(5) Filter (isnotnull(s_zip#12) && isnotnull(s_store_sk#10)) + +- *(5) FileScan parquet default.store[s_store_sk#10,s_zip#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt index 1bbbf35e4..fb69c76b7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt @@ -1,58 +1,50 @@ -TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact] - WholeStageCodegen (7) - HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] +TakeOrderedAndProject [brand,brand_id,ext_price,i_manufact,i_manufact_id] + WholeStageCodegen + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter - Exchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 - WholeStageCodegen (6) - HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] - BroadcastHashJoin [ss_store_sk,s_store_sk,ca_zip,s_zip] - Project [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,ca_zip] + Exchange [i_brand,i_brand_id,i_manufact,i_manufact_id] #1 + WholeStageCodegen + HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] + BroadcastHashJoin [ca_zip,s_store_sk,s_zip,ss_store_sk] + Project [ca_zip,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_current_addr_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + Project [c_current_addr_sk,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_customer_sk,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_store_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] + WholeStageCodegen + Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Project [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] - Filter [i_manager_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] + WholeStageCodegen + Project [i_brand,i_brand_id,i_item_sk,i_manufact,i_manufact_id] + Filter [i_item_sk,i_manager_id] + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + WholeStageCodegen + Project [c_current_addr_sk,c_customer_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) - Filter [ca_address_sk,ca_zip] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_zip] + WholeStageCodegen + Project [ca_address_sk,ca_zip] + Filter [ca_address_sk,ca_zip] + Scan parquet default.customer_address [ca_address_sk,ca_zip] [ca_address_sk,ca_zip] InputAdapter BroadcastExchange #6 - WholeStageCodegen (5) - Filter [s_zip,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_zip] + WholeStageCodegen + Project [s_store_sk,s_zip] + Filter [s_store_sk,s_zip] + Scan parquet default.store [s_store_sk,s_zip] [s_store_sk,s_zip] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt index be37495fd..27134fbb9 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt @@ -1,218 +1,36 @@ == Physical Plan == -* Sort (39) -+- Exchange (38) - +- * Project (37) - +- * BroadcastHashJoin Inner BuildRight (36) - :- * Project (25) - : +- * BroadcastHashJoin Inner BuildRight (24) - : :- * HashAggregate (18) - : : +- Exchange (17) - : : +- * HashAggregate (16) - : : +- * Project (15) - : : +- * BroadcastHashJoin Inner BuildRight (14) - : : :- Union (9) - : : : :- * Project (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.web_sales (1) - : : : +- * Project (8) - : : : +- * Filter (7) - : : : +- * ColumnarToRow (6) - : : : +- Scan parquet default.catalog_sales (5) - : : +- BroadcastExchange (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.date_dim (10) - : +- BroadcastExchange (23) - : +- * Project (22) - : +- * Filter (21) - : +- * ColumnarToRow (20) - : +- Scan parquet default.date_dim (19) - +- BroadcastExchange (35) - +- * Project (34) - +- * BroadcastHashJoin Inner BuildRight (33) - :- * HashAggregate (27) - : +- ReusedExchange (26) - +- BroadcastExchange (32) - +- * Project (31) - +- * Filter (30) - +- * ColumnarToRow (29) - +- Scan parquet default.date_dim (28) - - -(1) Scan parquet default.web_sales -Output [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 1] -Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] - -(3) Filter [codegen id : 1] -Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] -Condition : isnotnull(ws_sold_date_sk#1) - -(4) Project [codegen id : 1] -Output [2]: [ws_sold_date_sk#1 AS sold_date_sk#3, ws_ext_sales_price#2 AS sales_price#4] -Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] - -(5) Scan parquet default.catalog_sales -Output [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 2] -Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] - -(7) Filter [codegen id : 2] -Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] -Condition : isnotnull(cs_sold_date_sk#5) - -(8) Project [codegen id : 2] -Output [2]: [cs_sold_date_sk#5 AS sold_date_sk#7, cs_ext_sales_price#6 AS sales_price#8] -Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] - -(9) Union - -(10) Scan parquet default.date_dim -Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] - -(12) Filter [codegen id : 3] -Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] -Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) - -(13) BroadcastExchange -Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] - -(14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [sold_date_sk#3] -Right keys [1]: [d_date_sk#9] -Join condition: None - -(15) Project [codegen id : 4] -Output [3]: [sales_price#4, d_week_seq#10, d_day_name#11] -Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_name#11] - -(16) HashAggregate [codegen id : 4] -Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] -Keys [1]: [d_week_seq#10] -Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))] -Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] -Results [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] - -(17) Exchange -Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Arguments: hashpartitioning(d_week_seq#10, 5), true, [id=#27] - -(18) HashAggregate [codegen id : 12] -Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Keys [1]: [d_week_seq#10] -Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))#34] -Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))#34,17,2) AS sat_sales#41] - -(19) Scan parquet default.date_dim -Output [2]: [d_week_seq#42, d_year#43] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)] -ReadSchema: struct - -(20) ColumnarToRow [codegen id : 5] -Input [2]: [d_week_seq#42, d_year#43] - -(21) Filter [codegen id : 5] -Input [2]: [d_week_seq#42, d_year#43] -Condition : ((isnotnull(d_year#43) AND (d_year#43 = 2001)) AND isnotnull(d_week_seq#42)) - -(22) Project [codegen id : 5] -Output [1]: [d_week_seq#42] -Input [2]: [d_week_seq#42, d_year#43] - -(23) BroadcastExchange -Input [1]: [d_week_seq#42] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] - -(24) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [d_week_seq#10] -Right keys [1]: [d_week_seq#42] -Join condition: None - -(25) Project [codegen id : 12] -Output [8]: [d_week_seq#10 AS d_week_seq1#45, sun_sales#35 AS sun_sales1#46, mon_sales#36 AS mon_sales1#47, tue_sales#37 AS tue_sales1#48, wed_sales#38 AS wed_sales1#49, thu_sales#39 AS thu_sales1#50, fri_sales#40 AS fri_sales1#51, sat_sales#41 AS sat_sales1#52] -Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42] - -(26) ReusedExchange [Reuses operator id: 17] -Output [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] - -(27) HashAggregate [codegen id : 11] -Input [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] -Keys [1]: [d_week_seq#10] -Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END))#60, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))#66] -Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END))#60,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END))#61,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END))#62,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END))#64,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END))#65,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))#66,17,2) AS sat_sales#41] - -(28) Scan parquet default.date_dim -Output [2]: [d_week_seq#67, d_year#68] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 10] -Input [2]: [d_week_seq#67, d_year#68] - -(30) Filter [codegen id : 10] -Input [2]: [d_week_seq#67, d_year#68] -Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2002)) AND isnotnull(d_week_seq#67)) - -(31) Project [codegen id : 10] -Output [1]: [d_week_seq#67] -Input [2]: [d_week_seq#67, d_year#68] - -(32) BroadcastExchange -Input [1]: [d_week_seq#67] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#69] - -(33) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [d_week_seq#10] -Right keys [1]: [d_week_seq#67] -Join condition: None - -(34) Project [codegen id : 11] -Output [8]: [d_week_seq#10 AS d_week_seq2#70, sun_sales#35 AS sun_sales2#71, mon_sales#36 AS mon_sales2#72, tue_sales#37 AS tue_sales2#73, wed_sales#38 AS wed_sales2#74, thu_sales#39 AS thu_sales2#75, fri_sales#40 AS fri_sales2#76, sat_sales#41 AS sat_sales2#77] -Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#67] - -(35) BroadcastExchange -Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] -Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [id=#78] - -(36) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [d_week_seq1#45] -Right keys [1]: [(d_week_seq2#70 - 53)] -Join condition: None - -(37) Project [codegen id : 12] -Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#71)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#79, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#72)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#80, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#73)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#81, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#74)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#82, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#75)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#83, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#76)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#84, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#77)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#85] -Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] - -(38) Exchange -Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85] -Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), true, [id=#86] - -(39) Sort [codegen id : 13] -Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85] -Arguments: [d_week_seq1#45 ASC NULLS FIRST], true, 0 - +*(13) Sort [d_week_seq1#1 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(d_week_seq1#1 ASC NULLS FIRST, 5) + +- *(12) Project [d_week_seq1#1, round(CheckOverflow((promote_precision(sun_sales1#2) / promote_precision(sun_sales2#3)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#4, round(CheckOverflow((promote_precision(mon_sales1#5) / promote_precision(mon_sales2#6)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#7, round(CheckOverflow((promote_precision(tue_sales1#8) / promote_precision(tue_sales2#9)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#10, round(CheckOverflow((promote_precision(wed_sales1#11) / promote_precision(wed_sales2#12)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#13, round(CheckOverflow((promote_precision(thu_sales1#14) / promote_precision(thu_sales2#15)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#16, round(CheckOverflow((promote_precision(fri_sales1#17) / promote_precision(fri_sales2#18)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#19, round(CheckOverflow((promote_precision(sat_sales1#20) / promote_precision(sat_sales2#21)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#22] + +- *(12) BroadcastHashJoin [d_week_seq1#1], [(d_week_seq2#23 - 53)], Inner, BuildRight + :- *(12) Project [d_week_seq#24 AS d_week_seq1#1, sun_sales#25 AS sun_sales1#2, mon_sales#26 AS mon_sales1#5, tue_sales#27 AS tue_sales1#8, wed_sales#28 AS wed_sales1#11, thu_sales#29 AS thu_sales1#14, fri_sales#30 AS fri_sales1#17, sat_sales#31 AS sat_sales1#20] + : +- *(12) BroadcastHashJoin [d_week_seq#24], [d_week_seq#32], Inner, BuildRight + : :- *(12) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) + : : +- Exchange hashpartitioning(d_week_seq#24, 5) + : : +- *(4) HashAggregate(keys=[d_week_seq#24], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) + : : +- *(4) Project [sales_price#34, d_week_seq#24, d_day_name#33] + : : +- *(4) BroadcastHashJoin [sold_date_sk#35], [d_date_sk#36], Inner, BuildRight + : : :- Union + : : : :- *(1) Project [ws_sold_date_sk#37 AS sold_date_sk#35, ws_ext_sales_price#38 AS sales_price#34] + : : : : +- *(1) Filter isnotnull(ws_sold_date_sk#37) + : : : : +- *(1) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- *(2) Project [cs_sold_date_sk#39 AS sold_date_sk#40, cs_ext_sales_price#41 AS sales_price#42] + : : : +- *(2) Filter isnotnull(cs_sold_date_sk#39) + : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#39,cs_ext_sales_price#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#36, d_week_seq#24, d_day_name#33] + : : +- *(3) Filter (isnotnull(d_date_sk#36) && isnotnull(d_week_seq#24)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#36,d_week_seq#24,d_day_name#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [d_week_seq#32] + : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 2001)) && isnotnull(d_week_seq#32)) + : +- *(5) FileScan parquet default.date_dim[d_week_seq#32,d_year#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint))) + +- *(11) Project [d_week_seq#24 AS d_week_seq2#23, sun_sales#25 AS sun_sales2#3, mon_sales#26 AS mon_sales2#6, tue_sales#27 AS tue_sales2#9, wed_sales#28 AS wed_sales2#12, thu_sales#29 AS thu_sales2#15, fri_sales#30 AS fri_sales2#18, sat_sales#31 AS sat_sales2#21] + +- *(11) BroadcastHashJoin [d_week_seq#24], [d_week_seq#44], Inner, BuildRight + :- *(11) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) + : +- ReusedExchange [d_week_seq#24, sum#45, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51], Exchange hashpartitioning(d_week_seq#24, 5) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(10) Project [d_week_seq#44] + +- *(10) Filter ((isnotnull(d_year#52) && (d_year#52 = 2002)) && isnotnull(d_week_seq#44)) + +- *(10) FileScan parquet default.date_dim[d_week_seq#44,d_year#52] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt index f8028aa54..7e604b292 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt @@ -1,61 +1,52 @@ -WholeStageCodegen (13) +WholeStageCodegen Sort [d_week_seq1] InputAdapter Exchange [d_week_seq1] #1 - WholeStageCodegen (12) - Project [d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] + WholeStageCodegen + Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] BroadcastHashJoin [d_week_seq1,d_week_seq2] - Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] InputAdapter Exchange [d_week_seq] #2 - WholeStageCodegen (4) - HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,d_week_seq,d_day_name] - BroadcastHashJoin [sold_date_sk,d_date_sk] + WholeStageCodegen + HashAggregate [d_day_name,d_week_seq,sales_price,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,sales_price] + BroadcastHashJoin [d_date_sk,sold_date_sk] InputAdapter Union - WholeStageCodegen (1) - Project [ws_sold_date_sk,ws_ext_sales_price] + WholeStageCodegen + Project [ws_ext_sales_price,ws_sold_date_sk] Filter [ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price] - WholeStageCodegen (2) - Project [cs_sold_date_sk,cs_ext_sales_price] + Scan parquet default.web_sales [ws_ext_sales_price,ws_sold_date_sk] [ws_ext_sales_price,ws_sold_date_sk] + WholeStageCodegen + Project [cs_ext_sales_price,cs_sold_date_sk] Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk] [cs_ext_sales_price,cs_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (3) - Filter [d_date_sk,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] + WholeStageCodegen + Project [d_date_sk,d_day_name,d_week_seq] + Filter [d_date_sk,d_week_seq] + Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] [d_date_sk,d_day_name,d_week_seq] InputAdapter BroadcastExchange #4 - WholeStageCodegen (5) + WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_week_seq,d_year] + Filter [d_week_seq,d_year] + Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] InputAdapter BroadcastExchange #5 - WholeStageCodegen (11) - Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + WholeStageCodegen + Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] InputAdapter - ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 InputAdapter BroadcastExchange #6 - WholeStageCodegen (10) + WholeStageCodegen Project [d_week_seq] - Filter [d_year,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_week_seq,d_year] + Filter [d_week_seq,d_year] + Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt index 4234fba2b..f0c1bc89c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt @@ -1,137 +1,24 @@ == Physical Plan == -TakeOrderedAndProject (24) -+- * Project (23) - +- Window (22) - +- * Sort (21) - +- Exchange (20) - +- * HashAggregate (19) - +- Exchange (18) - +- * HashAggregate (17) - +- * Project (16) - +- * BroadcastHashJoin Inner BuildRight (15) - :- * Project (9) - : +- * BroadcastHashJoin Inner BuildRight (8) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.catalog_sales (1) - : +- BroadcastExchange (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.item (4) - +- BroadcastExchange (14) - +- * Project (13) - +- * Filter (12) - +- * ColumnarToRow (11) - +- Scan parquet default.date_dim (10) - - -(1) Scan parquet default.catalog_sales -Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] - -(3) Filter [codegen id : 3] -Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] -Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) - -(4) Scan parquet default.item -Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] - -(6) Filter [codegen id : 1] -Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) - -(7) BroadcastExchange -Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] - -(8) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#4] -Join condition: None - -(9) Project [codegen id : 3] -Output [7]: [cs_sold_date_sk#1, cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Input [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] - -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#11, d_date#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#11, d_date#12] - -(12) Filter [codegen id : 2] -Input [2]: [d_date_sk#11, d_date#12] -Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) - -(13) Project [codegen id : 2] -Output [1]: [d_date_sk#11] -Input [2]: [d_date_sk#11, d_date#12] - -(14) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(15) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#11] -Join condition: None - -(16) Project [codegen id : 3] -Output [6]: [cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Input [8]: [cs_sold_date_sk#1, cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] - -(17) HashAggregate [codegen id : 3] -Input [6]: [cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#14] -Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] - -(18) Exchange -Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] -Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] - -(19) HashAggregate [codegen id : 4] -Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] -Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#3))#17] -Results [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS _w1#20, i_item_id#5] - -(20) Exchange -Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] -Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] - -(21) Sort [codegen id : 5] -Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] -Arguments: [i_class#8 ASC NULLS FIRST], false, 0 - -(22) Window -Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] -Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] - -(23) Project [codegen id : 6] -Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#5] -Input [9]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5, _we0#22] - -(24) TakeOrderedAndProject -Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] -Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] - +TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 ASC NULLS FIRST,i_item_id#3 ASC NULLS FIRST,i_item_desc#4 ASC NULLS FIRST,revenueratio#5 ASC NULLS FIRST], output=[i_item_desc#4,i_category#1,i_class#2,i_current_price#6,itemrevenue#7,revenueratio#5]) ++- *(6) Project [i_item_desc#4, i_category#1, i_class#2, i_current_price#6, itemrevenue#7, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#5, i_item_id#3] + +- Window [sum(_w1#10) windowspecdefinition(i_class#2, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#2] + +- *(5) Sort [i_class#2 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_class#2, 5) + +- *(4) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[sum(UnscaledValue(cs_ext_sales_price#11))]) + +- Exchange hashpartitioning(i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6, 5) + +- *(3) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#11))]) + +- *(3) Project [cs_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + +- *(3) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + :- *(3) Project [cs_sold_date_sk#12, cs_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + : +- *(3) BroadcastHashJoin [cs_item_sk#14], [i_item_sk#15], Inner, BuildRight + : :- *(3) Project [cs_sold_date_sk#12, cs_item_sk#14, cs_ext_sales_price#11] + : : +- *(3) Filter (isnotnull(cs_item_sk#14) && isnotnull(cs_sold_date_sk#12)) + : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_item_sk#14,cs_ext_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] + : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) + : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt index 6259c1d53..a6a8b7457 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt @@ -1,38 +1,34 @@ -TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] - WholeStageCodegen (6) - Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] +TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] + WholeStageCodegen + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] InputAdapter Window [_w1,i_class] - WholeStageCodegen (5) + WholeStageCodegen Sort [i_class] InputAdapter Exchange [i_class] #1 - WholeStageCodegen (4) - HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,_w1,sum] + WholeStageCodegen + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(cs_ext_sales_price))] InputAdapter - Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 - WholeStageCodegen (3) - HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,cs_ext_sales_price] [sum,sum] - Project [cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 + WholeStageCodegen + HashAggregate [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum] [sum,sum] + Project [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + Project [cs_ext_sales_price,cs_sold_date_sk,i_category,i_class,i_current_price,i_item_desc,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [i_category,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + WholeStageCodegen + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + Filter [i_category,i_item_sk] + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt index 788d1affd..7c1b02f29 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt @@ -1,155 +1,27 @@ == Physical Plan == -TakeOrderedAndProject (27) -+- * Filter (26) - +- * HashAggregate (25) - +- Exchange (24) - +- * HashAggregate (23) - +- * Project (22) - +- * BroadcastHashJoin Inner BuildRight (21) - :- * Project (16) - : +- * BroadcastHashJoin Inner BuildRight (15) - : :- * Project (9) - : : +- * BroadcastHashJoin Inner BuildRight (8) - : : :- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.inventory (1) - : : +- BroadcastExchange (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.warehouse (4) - : +- BroadcastExchange (14) - : +- * Project (13) - : +- * Filter (12) - : +- * ColumnarToRow (11) - : +- Scan parquet default.item (10) - +- BroadcastExchange (20) - +- * Filter (19) - +- * ColumnarToRow (18) - +- Scan parquet default.date_dim (17) - - -(1) Scan parquet default.inventory -Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/inventory] -PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] - -(3) Filter [codegen id : 4] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Condition : ((isnotnull(inv_warehouse_sk#3) AND isnotnull(inv_item_sk#2)) AND isnotnull(inv_date_sk#1)) - -(4) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#5, w_warehouse_name#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/warehouse] -PushedFilters: [IsNotNull(w_warehouse_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] - -(6) Filter [codegen id : 1] -Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] -Condition : isnotnull(w_warehouse_sk#5) - -(7) BroadcastExchange -Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] - -(8) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_warehouse_sk#3] -Right keys [1]: [w_warehouse_sk#5] -Join condition: None - -(9) Project [codegen id : 4] -Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#4, w_warehouse_name#6] -Input [6]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, w_warehouse_sk#5, w_warehouse_name#6] - -(10) Scan parquet default.item -Output [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] - -(12) Filter [codegen id : 2] -Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] -Condition : (((isnotnull(i_current_price#10) AND (i_current_price#10 >= 0.99)) AND (i_current_price#10 <= 1.49)) AND isnotnull(i_item_sk#8)) - -(13) Project [codegen id : 2] -Output [2]: [i_item_sk#8, i_item_id#9] -Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] - -(14) BroadcastExchange -Input [2]: [i_item_sk#8, i_item_id#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] - -(15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_item_sk#2] -Right keys [1]: [i_item_sk#8] -Join condition: None - -(16) Project [codegen id : 4] -Output [4]: [inv_date_sk#1, inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9] -Input [6]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#4, w_warehouse_name#6, i_item_sk#8, i_item_id#9] - -(17) Scan parquet default.date_dim -Output [2]: [d_date_sk#12, d_date#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#12, d_date#13] - -(19) Filter [codegen id : 3] -Input [2]: [d_date_sk#12, d_date#13] -Condition : (((isnotnull(d_date#13) AND (d_date#13 >= 10997)) AND (d_date#13 <= 11057)) AND isnotnull(d_date_sk#12)) - -(20) BroadcastExchange -Input [2]: [d_date_sk#12, d_date#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_date_sk#1] -Right keys [1]: [d_date_sk#12] -Join condition: None - -(22) Project [codegen id : 4] -Output [4]: [inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9, d_date#13] -Input [6]: [inv_date_sk#1, inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9, d_date_sk#12, d_date#13] - -(23) HashAggregate [codegen id : 4] -Input [4]: [inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9, d_date#13] -Keys [2]: [w_warehouse_name#6, i_item_id#9] -Functions [2]: [partial_sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] -Aggregate Attributes [2]: [sum#15, sum#16] -Results [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] - -(24) Exchange -Input [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] -Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#9, 5), true, [id=#19] - -(25) HashAggregate [codegen id : 5] -Input [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] -Keys [2]: [w_warehouse_name#6, i_item_id#9] -Functions [2]: [sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] -Aggregate Attributes [2]: [sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20, sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21] -Results [4]: [w_warehouse_name#6, i_item_id#9, sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20 AS inv_before#22, sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21 AS inv_after#23] - -(26) Filter [codegen id : 5] -Input [4]: [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23] -Condition : ((CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END >= 0.666667) AND (CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END <= 1.5)) - -(27) TakeOrderedAndProject -Input [4]: [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23] -Arguments: 100, [w_warehouse_name#6 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST], [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23] - +TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST,i_item_id#2 ASC NULLS FIRST], output=[w_warehouse_name#1,i_item_id#2,inv_before#3,inv_after#4]) ++- *(5) Filter ((CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END >= 0.666667) && (CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END <= 1.5)) + +- *(5) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(w_warehouse_name#1, i_item_id#2, 5) + +- *(4) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[partial_sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) + +- *(4) Project [inv_quantity_on_hand#6, w_warehouse_name#1, i_item_id#2, d_date#5] + +- *(4) BroadcastHashJoin [inv_date_sk#7], [d_date_sk#8], Inner, BuildRight + :- *(4) Project [inv_date_sk#7, inv_quantity_on_hand#6, w_warehouse_name#1, i_item_id#2] + : +- *(4) BroadcastHashJoin [inv_item_sk#9], [i_item_sk#10], Inner, BuildRight + : :- *(4) Project [inv_date_sk#7, inv_item_sk#9, inv_quantity_on_hand#6, w_warehouse_name#1] + : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#11], [w_warehouse_sk#12], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#7, inv_item_sk#9, inv_warehouse_sk#11, inv_quantity_on_hand#6] + : : : +- *(4) Filter ((isnotnull(inv_warehouse_sk#11) && isnotnull(inv_item_sk#9)) && isnotnull(inv_date_sk#7)) + : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#7,inv_item_sk#9,inv_warehouse_sk#11,inv_quantity_on_hand#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [w_warehouse_sk#12, w_warehouse_name#1] + : : +- *(1) Filter isnotnull(w_warehouse_sk#12) + : : +- *(1) FileScan parquet default.warehouse[w_warehouse_sk#12,w_warehouse_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [i_item_sk#10, i_item_id#2] + : +- *(2) Filter (((isnotnull(i_current_price#13) && (i_current_price#13 >= 0.99)) && (i_current_price#13 <= 1.49)) && isnotnull(i_item_sk#10)) + : +- *(2) FileScan parquet default.item[i_item_sk#10,i_item_id#2,i_current_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [d_date_sk#8, d_date#5] + +- *(3) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#8)) + +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_date#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt index 9b5483bd7..472388330 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt @@ -1,40 +1,35 @@ -TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] - WholeStageCodegen (5) - Filter [inv_before,inv_after] - HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),inv_before,inv_after,sum,sum] +TakeOrderedAndProject [i_item_id,inv_after,inv_before,w_warehouse_name] + WholeStageCodegen + Filter [inv_after,inv_before] + HashAggregate [i_item_id,sum,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),w_warehouse_name] [inv_after,inv_before,sum,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint))] InputAdapter - Exchange [w_warehouse_name,i_item_id] #1 - WholeStageCodegen (4) - HashAggregate [w_warehouse_name,i_item_id,d_date,inv_quantity_on_hand] [sum,sum,sum,sum] - Project [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_date_sk,inv_quantity_on_hand,w_warehouse_name,i_item_id] - BroadcastHashJoin [inv_item_sk,i_item_sk] + Exchange [i_item_id,w_warehouse_name] #1 + WholeStageCodegen + HashAggregate [d_date,i_item_id,inv_quantity_on_hand,sum,sum,sum,sum,w_warehouse_name] [sum,sum,sum,sum] + Project [d_date,i_item_id,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_id,inv_date_sk,inv_quantity_on_hand,w_warehouse_name] + BroadcastHashJoin [i_item_sk,inv_item_sk] Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Filter [inv_warehouse_sk,inv_item_sk,inv_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [w_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] + WholeStageCodegen + Project [w_warehouse_name,w_warehouse_sk] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Project [i_item_sk,i_item_id] + WholeStageCodegen + Project [i_item_id,i_item_sk] Filter [i_current_price,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_current_price] + Scan parquet default.item [i_current_price,i_item_id,i_item_sk] [i_current_price,i_item_id,i_item_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + WholeStageCodegen + Project [d_date,d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt index 6aae0b0c8..171688e3a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt @@ -1,155 +1,27 @@ == Physical Plan == -TakeOrderedAndProject (27) -+- * HashAggregate (26) - +- Exchange (25) - +- * HashAggregate (24) - +- * Expand (23) - +- * Project (22) - +- * BroadcastHashJoin Inner BuildRight (21) - :- * Project (16) - : +- * BroadcastHashJoin Inner BuildRight (15) - : :- * Project (10) - : : +- * BroadcastHashJoin Inner BuildRight (9) - : : :- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.inventory (1) - : : +- BroadcastExchange (8) - : : +- * Project (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.date_dim (4) - : +- BroadcastExchange (14) - : +- * Filter (13) - : +- * ColumnarToRow (12) - : +- Scan parquet default.item (11) - +- BroadcastExchange (20) - +- * Filter (19) - +- * ColumnarToRow (18) - +- Scan parquet default.warehouse (17) - - -(1) Scan parquet default.inventory -Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/inventory] -PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] - -(3) Filter [codegen id : 4] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Condition : ((isnotnull(inv_date_sk#1) AND isnotnull(inv_item_sk#2)) AND isnotnull(inv_warehouse_sk#3)) - -(4) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#6] - -(6) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#6] -Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#5)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#6] - -(8) BroadcastExchange -Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_date_sk#1] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(10) Project [codegen id : 4] -Output [3]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_date_sk#5] - -(11) Scan parquet default.item -Output [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] - -(13) Filter [codegen id : 2] -Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] -Condition : isnotnull(i_item_sk#8) - -(14) BroadcastExchange -Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] - -(15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_item_sk#2] -Right keys [1]: [i_item_sk#8] -Join condition: None - -(16) Project [codegen id : 4] -Output [6]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_brand#9, i_class#10, i_category#11, i_product_name#12] -Input [8]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] - -(17) Scan parquet default.warehouse -Output [1]: [w_warehouse_sk#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/warehouse] -PushedFilters: [IsNotNull(w_warehouse_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [1]: [w_warehouse_sk#14] - -(19) Filter [codegen id : 3] -Input [1]: [w_warehouse_sk#14] -Condition : isnotnull(w_warehouse_sk#14) - -(20) BroadcastExchange -Input [1]: [w_warehouse_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_warehouse_sk#3] -Right keys [1]: [w_warehouse_sk#14] -Join condition: None - -(22) Project [codegen id : 4] -Output [5]: [inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, i_category#11] -Input [7]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_brand#9, i_class#10, i_category#11, i_product_name#12, w_warehouse_sk#14] - -(23) Expand [codegen id : 4] -Input [5]: [inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, i_category#11] -Arguments: [List(inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, i_category#11, 0), List(inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, null, 1), List(inv_quantity_on_hand#4, i_product_name#12, i_brand#9, null, null, 3), List(inv_quantity_on_hand#4, i_product_name#12, null, null, null, 7), List(inv_quantity_on_hand#4, null, null, null, null, 15)], [inv_quantity_on_hand#4, i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] - -(24) HashAggregate [codegen id : 4] -Input [6]: [inv_quantity_on_hand#4, i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] -Keys [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] -Functions [1]: [partial_avg(cast(inv_quantity_on_hand#4 as bigint))] -Aggregate Attributes [2]: [sum#21, count#22] -Results [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] - -(25) Exchange -Input [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] -Arguments: hashpartitioning(i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, 5), true, [id=#25] - -(26) HashAggregate [codegen id : 5] -Input [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] -Keys [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] -Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] -Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#26] -Results [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, avg(cast(inv_quantity_on_hand#4 as bigint))#26 AS qoh#27] - -(27) TakeOrderedAndProject -Input [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, qoh#27] -Arguments: 100, [qoh#27 ASC NULLS FIRST, i_product_name#16 ASC NULLS FIRST, i_brand#17 ASC NULLS FIRST, i_class#18 ASC NULLS FIRST, i_category#19 ASC NULLS FIRST], [i_product_name#16, i_brand#17, i_class#18, i_category#19, qoh#27] - +TakeOrderedAndProject(limit=100, orderBy=[qoh#1 ASC NULLS FIRST,i_product_name#2 ASC NULLS FIRST,i_brand#3 ASC NULLS FIRST,i_class#4 ASC NULLS FIRST,i_category#5 ASC NULLS FIRST], output=[i_product_name#2,i_brand#3,i_class#4,i_category#5,qoh#1]) ++- *(5) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[avg(cast(inv_quantity_on_hand#7 as bigint))]) + +- Exchange hashpartitioning(i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6, 5) + +- *(4) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[partial_avg(cast(inv_quantity_on_hand#7 as bigint))]) + +- *(4) Expand [List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, i_category#11, 0), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, null, 1), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, null, null, 3), List(inv_quantity_on_hand#7, i_product_name#8, null, null, null, 7), List(inv_quantity_on_hand#7, null, null, null, null, 15)], [inv_quantity_on_hand#7, i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6] + +- *(4) Project [inv_quantity_on_hand#7, i_product_name#12 AS i_product_name#8, i_brand#13 AS i_brand#9, i_class#14 AS i_class#10, i_category#15 AS i_category#11] + +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#17], Inner, BuildRight + :- *(4) Project [inv_warehouse_sk#16, inv_quantity_on_hand#7, i_brand#13, i_class#14, i_category#15, i_product_name#12] + : +- *(4) BroadcastHashJoin [inv_item_sk#18], [i_item_sk#19], Inner, BuildRight + : :- *(4) Project [inv_item_sk#18, inv_warehouse_sk#16, inv_quantity_on_hand#7] + : : +- *(4) BroadcastHashJoin [inv_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#20, inv_item_sk#18, inv_warehouse_sk#16, inv_quantity_on_hand#7] + : : : +- *(4) Filter ((isnotnull(inv_date_sk#20) && isnotnull(inv_item_sk#18)) && isnotnull(inv_warehouse_sk#16)) + : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#20,inv_item_sk#18,inv_warehouse_sk#16,inv_quantity_on_hand#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#21] + : : +- *(1) Filter (((isnotnull(d_month_seq#22) && (d_month_seq#22 >= 1200)) && (d_month_seq#22 <= 1211)) && isnotnull(d_date_sk#21)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_month_seq#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [i_item_sk#19, i_brand#13, i_class#14, i_category#15, i_product_name#12] + : +- *(2) Filter isnotnull(i_item_sk#19) + : +- *(2) FileScan parquet default.item[i_item_sk#19,i_brand#13,i_class#14,i_category#15,i_product_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [w_warehouse_sk#17] + +- *(3) Filter isnotnull(w_warehouse_sk#17) + +- *(3) FileScan parquet default.warehouse[w_warehouse_sk#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt index 233babdf3..d10f9d4ef 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt @@ -1,40 +1,35 @@ -TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category] - WholeStageCodegen (5) - HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,sum,count] [avg(cast(inv_quantity_on_hand as bigint)),qoh,sum,count] +TakeOrderedAndProject [i_brand,i_category,i_class,i_product_name,qoh] + WholeStageCodegen + HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),count,i_brand,i_category,i_class,i_product_name,spark_grouping_id,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] InputAdapter - Exchange [i_product_name,i_brand,i_class,i_category,spark_grouping_id] #1 - WholeStageCodegen (4) - HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count] - Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] - Project [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + Exchange [i_brand,i_category,i_class,i_product_name,spark_grouping_id] #1 + WholeStageCodegen + HashAggregate [count,count,i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,spark_grouping_id,sum,sum] [count,count,sum,sum] + Expand [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] - BroadcastHashJoin [inv_item_sk,i_item_sk] - Project [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + WholeStageCodegen + Project [i_brand,i_category,i_class,i_item_sk,i_product_name] + Filter [i_item_sk] + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] [i_brand,i_category,i_class,i_item_sk,i_product_name] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [w_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.warehouse [w_warehouse_sk] + WholeStageCodegen + Project [w_warehouse_sk] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_sk] [w_warehouse_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt index 15ae5bfe2..cd1cae198 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt @@ -1,537 +1,89 @@ == Physical Plan == -* HashAggregate (71) -+- Exchange (70) - +- * HashAggregate (69) - +- Union (68) - :- * Project (51) - : +- * BroadcastHashJoin Inner BuildRight (50) - : :- * Project (44) - : : +- * BroadcastHashJoin LeftSemi BuildRight (43) - : : :- * Project (27) - : : : +- * BroadcastHashJoin LeftSemi BuildRight (26) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.catalog_sales (1) - : : : +- BroadcastExchange (25) - : : : +- * Project (24) - : : : +- * Filter (23) - : : : +- * HashAggregate (22) - : : : +- Exchange (21) - : : : +- * HashAggregate (20) - : : : +- * Project (19) - : : : +- * BroadcastHashJoin Inner BuildRight (18) - : : : :- * Project (13) - : : : : +- * BroadcastHashJoin Inner BuildRight (12) - : : : : :- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.store_sales (4) - : : : : +- BroadcastExchange (11) - : : : : +- * Project (10) - : : : : +- * Filter (9) - : : : : +- * ColumnarToRow (8) - : : : : +- Scan parquet default.date_dim (7) - : : : +- BroadcastExchange (17) - : : : +- * Filter (16) - : : : +- * ColumnarToRow (15) - : : : +- Scan parquet default.item (14) - : : +- BroadcastExchange (42) - : : +- * Project (41) - : : +- * Filter (40) - : : +- * HashAggregate (39) - : : +- Exchange (38) - : : +- * HashAggregate (37) - : : +- * Project (36) - : : +- * BroadcastHashJoin Inner BuildRight (35) - : : :- * Filter (30) - : : : +- * ColumnarToRow (29) - : : : +- Scan parquet default.store_sales (28) - : : +- BroadcastExchange (34) - : : +- * Filter (33) - : : +- * ColumnarToRow (32) - : : +- Scan parquet default.customer (31) - : +- BroadcastExchange (49) - : +- * Project (48) - : +- * Filter (47) - : +- * ColumnarToRow (46) - : +- Scan parquet default.date_dim (45) - +- * Project (67) - +- * BroadcastHashJoin Inner BuildRight (66) - :- * Project (64) - : +- * BroadcastHashJoin LeftSemi BuildRight (63) - : :- * Project (57) - : : +- * BroadcastHashJoin LeftSemi BuildRight (56) - : : :- * Filter (54) - : : : +- * ColumnarToRow (53) - : : : +- Scan parquet default.web_sales (52) - : : +- ReusedExchange (55) - : +- BroadcastExchange (62) - : +- * Project (61) - : +- * Filter (60) - : +- * HashAggregate (59) - : +- ReusedExchange (58) - +- ReusedExchange (65) - - -(1) Scan parquet default.catalog_sales -Output [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 9] -Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] - -(3) Filter [codegen id : 9] -Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] -Condition : isnotnull(cs_sold_date_sk#1) - -(4) Scan parquet default.store_sales -Output [2]: [ss_sold_date_sk#6, ss_item_sk#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 3] -Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] - -(6) Filter [codegen id : 3] -Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] -Condition : (isnotnull(ss_sold_date_sk#6) AND isnotnull(ss_item_sk#7)) - -(7) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_date#9, d_year#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(8) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_date#9, d_year#10] - -(9) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_date#9, d_year#10] -Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) - -(10) Project [codegen id : 1] -Output [2]: [d_date_sk#8, d_date#9] -Input [3]: [d_date_sk#8, d_date#9, d_year#10] - -(11) BroadcastExchange -Input [2]: [d_date_sk#8, d_date#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] - -(12) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(13) Project [codegen id : 3] -Output [2]: [ss_item_sk#7, d_date#9] -Input [4]: [ss_sold_date_sk#6, ss_item_sk#7, d_date_sk#8, d_date#9] - -(14) Scan parquet default.item -Output [2]: [i_item_sk#12, i_item_desc#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(15) ColumnarToRow [codegen id : 2] -Input [2]: [i_item_sk#12, i_item_desc#13] - -(16) Filter [codegen id : 2] -Input [2]: [i_item_sk#12, i_item_desc#13] -Condition : isnotnull(i_item_sk#12) - -(17) BroadcastExchange -Input [2]: [i_item_sk#12, i_item_desc#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] - -(18) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#7] -Right keys [1]: [i_item_sk#12] -Join condition: None - -(19) Project [codegen id : 3] -Output [3]: [d_date#9, i_item_sk#12, i_item_desc#13] -Input [4]: [ss_item_sk#7, d_date#9, i_item_sk#12, i_item_desc#13] - -(20) HashAggregate [codegen id : 3] -Input [3]: [d_date#9, i_item_sk#12, i_item_desc#13] -Keys [3]: [substr(i_item_desc#13, 1, 30) AS substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] - -(21) Exchange -Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] -Arguments: hashpartitioning(substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, 5), true, [id=#18] - -(22) HashAggregate [codegen id : 4] -Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] -Keys [3]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [2]: [i_item_sk#12 AS item_sk#20, count(1)#19 AS count(1)#21] - -(23) Filter [codegen id : 4] -Input [2]: [item_sk#20, count(1)#21] -Condition : (count(1)#21 > 4) - -(24) Project [codegen id : 4] -Output [1]: [item_sk#20] -Input [2]: [item_sk#20, count(1)#21] - -(25) BroadcastExchange -Input [1]: [item_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] - -(26) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [cs_item_sk#3] -Right keys [1]: [item_sk#20] -Join condition: None - -(27) Project [codegen id : 9] -Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] -Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] - -(28) Scan parquet default.store_sales -Output [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 6] -Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] - -(30) Filter [codegen id : 6] -Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] -Condition : isnotnull(ss_customer_sk#23) - -(31) Scan parquet default.customer -Output [1]: [c_customer_sk#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(32) ColumnarToRow [codegen id : 5] -Input [1]: [c_customer_sk#26] - -(33) Filter [codegen id : 5] -Input [1]: [c_customer_sk#26] -Condition : isnotnull(c_customer_sk#26) - -(34) BroadcastExchange -Input [1]: [c_customer_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] - -(35) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#23] -Right keys [1]: [c_customer_sk#26] -Join condition: None - -(36) Project [codegen id : 6] -Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -Input [4]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] - -(37) HashAggregate [codegen id : 6] -Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -Keys [1]: [c_customer_sk#26] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#28, isEmpty#29] -Results [3]: [c_customer_sk#26, sum#30, isEmpty#31] - -(38) Exchange -Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] -Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#32] - -(39) HashAggregate [codegen id : 7] -Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] -Keys [1]: [c_customer_sk#26] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33] -Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] - -(40) Filter [codegen id : 7] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) - -(41) Project [codegen id : 7] -Output [1]: [c_customer_sk#26] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] - -(42) BroadcastExchange -Input [1]: [c_customer_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#37] - -(43) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [cs_bill_customer_sk#2] -Right keys [1]: [c_customer_sk#26] -Join condition: None - -(44) Project [codegen id : 9] -Output [3]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5] -Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] - -(45) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#10, d_moy#38] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] -ReadSchema: struct - -(46) ColumnarToRow [codegen id : 8] -Input [3]: [d_date_sk#8, d_year#10, d_moy#38] - -(47) Filter [codegen id : 8] -Input [3]: [d_date_sk#8, d_year#10, d_moy#38] -Condition : ((((isnotnull(d_year#10) AND isnotnull(d_moy#38)) AND (d_year#10 = 2000)) AND (d_moy#38 = 2)) AND isnotnull(d_date_sk#8)) - -(48) Project [codegen id : 8] -Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#10, d_moy#38] - -(49) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] - -(50) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(51) Project [codegen id : 9] -Output [1]: [CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true) AS sales#40] -Input [4]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, d_date_sk#8] - -(52) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(53) ColumnarToRow [codegen id : 18] -Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] - -(54) Filter [codegen id : 18] -Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] -Condition : isnotnull(ws_sold_date_sk#41) - -(55) ReusedExchange [Reuses operator id: 25] -Output [1]: [item_sk#20] - -(56) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_item_sk#42] -Right keys [1]: [item_sk#20] -Join condition: None - -(57) Project [codegen id : 18] -Output [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] -Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] - -(58) ReusedExchange [Reuses operator id: 38] -Output [3]: [c_customer_sk#26, sum#46, isEmpty#47] - -(59) HashAggregate [codegen id : 16] -Input [3]: [c_customer_sk#26, sum#46, isEmpty#47] -Keys [1]: [c_customer_sk#26] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#48] -Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#48 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49] - -(60) Filter [codegen id : 16] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) - -(61) Project [codegen id : 16] -Output [1]: [c_customer_sk#26] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49] - -(62) BroadcastExchange -Input [1]: [c_customer_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] - -(63) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_bill_customer_sk#43] -Right keys [1]: [c_customer_sk#26] -Join condition: None - -(64) Project [codegen id : 18] -Output [3]: [ws_sold_date_sk#41, ws_quantity#44, ws_list_price#45] -Input [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] - -(65) ReusedExchange [Reuses operator id: 49] -Output [1]: [d_date_sk#8] - -(66) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_sold_date_sk#41] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(67) Project [codegen id : 18] -Output [1]: [CheckOverflow((promote_precision(cast(cast(ws_quantity#44 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#45 as decimal(12,2)))), DecimalType(18,2), true) AS sales#51] -Input [4]: [ws_sold_date_sk#41, ws_quantity#44, ws_list_price#45, d_date_sk#8] - -(68) Union - -(69) HashAggregate [codegen id : 19] -Input [1]: [sales#40] -Keys: [] -Functions [1]: [partial_sum(sales#40)] -Aggregate Attributes [2]: [sum#52, isEmpty#53] -Results [2]: [sum#54, isEmpty#55] - -(70) Exchange -Input [2]: [sum#54, isEmpty#55] -Arguments: SinglePartition, true, [id=#56] - -(71) HashAggregate [codegen id : 20] -Input [2]: [sum#54, isEmpty#55] -Keys: [] -Functions [1]: [sum(sales#40)] -Aggregate Attributes [1]: [sum(sales#40)#57] -Results [1]: [sum(sales#40)#57 AS sum(sales)#58] - -===== Subqueries ===== - -Subquery:1 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#35, [id=#36] -* HashAggregate (93) -+- Exchange (92) - +- * HashAggregate (91) - +- * HashAggregate (90) - +- Exchange (89) - +- * HashAggregate (88) - +- * Project (87) - +- * BroadcastHashJoin Inner BuildRight (86) - :- * Project (80) - : +- * BroadcastHashJoin Inner BuildRight (79) - : :- * Filter (74) - : : +- * ColumnarToRow (73) - : : +- Scan parquet default.store_sales (72) - : +- BroadcastExchange (78) - : +- * Filter (77) - : +- * ColumnarToRow (76) - : +- Scan parquet default.customer (75) - +- BroadcastExchange (85) - +- * Project (84) - +- * Filter (83) - +- * ColumnarToRow (82) - +- Scan parquet default.date_dim (81) - - -(72) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(73) ColumnarToRow [codegen id : 3] -Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] - -(74) Filter [codegen id : 3] -Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] -Condition : (isnotnull(ss_customer_sk#23) AND isnotnull(ss_sold_date_sk#6)) - -(75) Scan parquet default.customer -Output [1]: [c_customer_sk#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(76) ColumnarToRow [codegen id : 1] -Input [1]: [c_customer_sk#26] - -(77) Filter [codegen id : 1] -Input [1]: [c_customer_sk#26] -Condition : isnotnull(c_customer_sk#26) - -(78) BroadcastExchange -Input [1]: [c_customer_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#59] - -(79) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_customer_sk#23] -Right keys [1]: [c_customer_sk#26] -Join condition: None - -(80) Project [codegen id : 3] -Output [4]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -Input [5]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] - -(81) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_year#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(82) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#8, d_year#10] - -(83) Filter [codegen id : 2] -Input [2]: [d_date_sk#8, d_year#10] -Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) - -(84) Project [codegen id : 2] -Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_year#10] - -(85) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60] - -(86) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(87) Project [codegen id : 3] -Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -Input [5]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26, d_date_sk#8] - -(88) HashAggregate [codegen id : 3] -Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -Keys [1]: [c_customer_sk#26] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#61, isEmpty#62] -Results [3]: [c_customer_sk#26, sum#63, isEmpty#64] - -(89) Exchange -Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] -Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#65] - -(90) HashAggregate [codegen id : 4] -Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] -Keys [1]: [c_customer_sk#26] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66] -Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66 AS csales#67] - -(91) HashAggregate [codegen id : 4] -Input [1]: [csales#67] -Keys: [] -Functions [1]: [partial_max(csales#67)] -Aggregate Attributes [1]: [max#68] -Results [1]: [max#69] - -(92) Exchange -Input [1]: [max#69] -Arguments: SinglePartition, true, [id=#70] - -(93) HashAggregate [codegen id : 5] -Input [1]: [max#69] -Keys: [] -Functions [1]: [max(csales#67)] -Aggregate Attributes [1]: [max(csales#67)#71] -Results [1]: [max(csales#67)#71 AS tpcds_cmax#72] - -Subquery:2 Hosting operator id = 60 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] - - +CollectLimit 100 ++- *(20) HashAggregate(keys=[], functions=[sum(sales#1)]) + +- Exchange SinglePartition + +- *(19) HashAggregate(keys=[], functions=[partial_sum(sales#1)]) + +- Union + :- *(9) Project [CheckOverflow((promote_precision(cast(cast(cs_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#3 as decimal(12,2)))), DecimalType(18,2)) AS sales#1] + : +- *(9) BroadcastHashJoin [cs_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight + : :- *(9) Project [cs_sold_date_sk#4, cs_quantity#2, cs_list_price#3] + : : +- *(9) BroadcastHashJoin [cs_bill_customer_sk#6], [c_customer_sk#7], LeftSemi, BuildRight + : : :- *(9) Project [cs_sold_date_sk#4, cs_bill_customer_sk#6, cs_quantity#2, cs_list_price#3] + : : : +- *(9) BroadcastHashJoin [cs_item_sk#8], [item_sk#9], LeftSemi, BuildRight + : : : :- *(9) Project [cs_sold_date_sk#4, cs_bill_customer_sk#6, cs_item_sk#8, cs_quantity#2, cs_list_price#3] + : : : : +- *(9) Filter isnotnull(cs_sold_date_sk#4) + : : : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#6,cs_item_sk#8,cs_quantity#2,cs_list_price#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct 4) + : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[count(1)]) + : : : +- Exchange hashpartitioning(substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14, 5) + : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#11, 1, 30) AS substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[partial_count(1)]) + : : : +- *(3) Project [d_date#14, i_item_sk#13, i_item_desc#11] + : : : +- *(3) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#13], Inner, BuildRight + : : : :- *(3) Project [ss_item_sk#15, d_date#14] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight + : : : : :- *(3) Project [ss_sold_date_sk#16, ss_item_sk#15] + : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#16) && isnotnull(ss_item_sk#15)) + : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_item_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#5, d_date#14] + : : : : +- *(1) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#5,d_date#14,d_year#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [i_item_sk#13, i_item_desc#11] + : : : +- *(2) Filter isnotnull(i_item_sk#13) + : : : +- *(2) FileScan parquet default.item[i_item_sk#13,i_item_desc#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(7) Project [c_customer_sk#7] + : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3052 as decimal(32,6)))), DecimalType(38,8)))) + : : : +- Subquery subquery3052 + : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#21)]) + : : : +- Exchange SinglePartition + : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#21)]) + : : : +- *(4) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- Exchange hashpartitioning(c_customer_sk#7, 5) + : : : +- *(3) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- *(3) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#16, ss_quantity#18, ss_sales_price#19, c_customer_sk#7] + : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight + : : : : :- *(3) Project [ss_sold_date_sk#16, ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] + : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#22) && isnotnull(ss_sold_date_sk#16)) + : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [c_customer_sk#7] + : : : : +- *(1) Filter isnotnull(c_customer_sk#7) + : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#5] + : : : +- *(2) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#5,d_year#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(7) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- Exchange hashpartitioning(c_customer_sk#7, 5) + : : +- *(6) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) + : : +- *(6) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] + : : : +- *(6) Filter isnotnull(ss_customer_sk#22) + : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [c_customer_sk#7] + : : +- *(5) Filter isnotnull(c_customer_sk#7) + : : +- *(5) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(8) Project [d_date_sk#5] + : +- *(8) Filter ((((isnotnull(d_year#17) && isnotnull(d_moy#23)) && (d_year#17 = 2000)) && (d_moy#23 = 2)) && isnotnull(d_date_sk#5)) + : +- *(8) FileScan parquet default.date_dim[d_date_sk#5,d_year#17,d_moy#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + +- *(18) Project [CheckOverflow((promote_precision(cast(cast(ws_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#25 as decimal(12,2)))), DecimalType(18,2)) AS sales#26] + +- *(18) BroadcastHashJoin [ws_sold_date_sk#27], [d_date_sk#5], Inner, BuildRight + :- *(18) Project [ws_sold_date_sk#27, ws_quantity#24, ws_list_price#25] + : +- *(18) BroadcastHashJoin [ws_bill_customer_sk#28], [c_customer_sk#7], LeftSemi, BuildRight + : :- *(18) Project [ws_sold_date_sk#27, ws_bill_customer_sk#28, ws_quantity#24, ws_list_price#25] + : : +- *(18) BroadcastHashJoin [ws_item_sk#29], [item_sk#9], LeftSemi, BuildRight + : : :- *(18) Project [ws_sold_date_sk#27, ws_item_sk#29, ws_bill_customer_sk#28, ws_quantity#24, ws_list_price#25] + : : : +- *(18) Filter isnotnull(ws_sold_date_sk#27) + : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#27,ws_item_sk#29,ws_bill_customer_sk#28,ws_quantity#24,ws_list_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 13] -Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] - -(3) Filter [codegen id : 13] -Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] -Condition : (isnotnull(cs_bill_customer_sk#2) AND isnotnull(cs_sold_date_sk#1)) - -(4) Scan parquet default.store_sales -Output [2]: [ss_sold_date_sk#6, ss_item_sk#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 3] -Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] - -(6) Filter [codegen id : 3] -Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] -Condition : (isnotnull(ss_sold_date_sk#6) AND isnotnull(ss_item_sk#7)) - -(7) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_date#9, d_year#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(8) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_date#9, d_year#10] - -(9) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_date#9, d_year#10] -Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) - -(10) Project [codegen id : 1] -Output [2]: [d_date_sk#8, d_date#9] -Input [3]: [d_date_sk#8, d_date#9, d_year#10] - -(11) BroadcastExchange -Input [2]: [d_date_sk#8, d_date#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] - -(12) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(13) Project [codegen id : 3] -Output [2]: [ss_item_sk#7, d_date#9] -Input [4]: [ss_sold_date_sk#6, ss_item_sk#7, d_date_sk#8, d_date#9] - -(14) Scan parquet default.item -Output [2]: [i_item_sk#12, i_item_desc#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(15) ColumnarToRow [codegen id : 2] -Input [2]: [i_item_sk#12, i_item_desc#13] - -(16) Filter [codegen id : 2] -Input [2]: [i_item_sk#12, i_item_desc#13] -Condition : isnotnull(i_item_sk#12) - -(17) BroadcastExchange -Input [2]: [i_item_sk#12, i_item_desc#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] - -(18) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#7] -Right keys [1]: [i_item_sk#12] -Join condition: None - -(19) Project [codegen id : 3] -Output [3]: [d_date#9, i_item_sk#12, i_item_desc#13] -Input [4]: [ss_item_sk#7, d_date#9, i_item_sk#12, i_item_desc#13] - -(20) HashAggregate [codegen id : 3] -Input [3]: [d_date#9, i_item_sk#12, i_item_desc#13] -Keys [3]: [substr(i_item_desc#13, 1, 30) AS substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] - -(21) Exchange -Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] -Arguments: hashpartitioning(substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, 5), true, [id=#18] - -(22) HashAggregate [codegen id : 4] -Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] -Keys [3]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [2]: [i_item_sk#12 AS item_sk#20, count(1)#19 AS count(1)#21] - -(23) Filter [codegen id : 4] -Input [2]: [item_sk#20, count(1)#21] -Condition : (count(1)#21 > 4) - -(24) Project [codegen id : 4] -Output [1]: [item_sk#20] -Input [2]: [item_sk#20, count(1)#21] - -(25) BroadcastExchange -Input [1]: [item_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] - -(26) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cs_item_sk#3] -Right keys [1]: [item_sk#20] -Join condition: None - -(27) Project [codegen id : 13] -Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] -Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] - -(28) Scan parquet default.store_sales -Output [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 6] -Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] - -(30) Filter [codegen id : 6] -Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] -Condition : isnotnull(ss_customer_sk#23) - -(31) Scan parquet default.customer -Output [1]: [c_customer_sk#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(32) ColumnarToRow [codegen id : 5] -Input [1]: [c_customer_sk#26] - -(33) Filter [codegen id : 5] -Input [1]: [c_customer_sk#26] -Condition : isnotnull(c_customer_sk#26) - -(34) BroadcastExchange -Input [1]: [c_customer_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] - -(35) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#23] -Right keys [1]: [c_customer_sk#26] -Join condition: None - -(36) Project [codegen id : 6] -Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -Input [4]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] - -(37) HashAggregate [codegen id : 6] -Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -Keys [1]: [c_customer_sk#26] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#28, isEmpty#29] -Results [3]: [c_customer_sk#26, sum#30, isEmpty#31] - -(38) Exchange -Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] -Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#32] - -(39) HashAggregate [codegen id : 7] -Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] -Keys [1]: [c_customer_sk#26] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33] -Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] - -(40) Filter [codegen id : 7] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) - -(41) Project [codegen id : 7] -Output [1]: [c_customer_sk#26] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] - -(42) BroadcastExchange -Input [1]: [c_customer_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#37] - -(43) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cs_bill_customer_sk#2] -Right keys [1]: [c_customer_sk#26] -Join condition: None - -(44) Scan parquet default.customer -Output [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(45) ColumnarToRow [codegen id : 11] -Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] - -(46) Filter [codegen id : 11] -Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] -Condition : isnotnull(c_customer_sk#26) - -(47) ReusedExchange [Reuses operator id: 38] -Output [3]: [c_customer_sk#26, sum#30, isEmpty#31] - -(48) HashAggregate [codegen id : 10] -Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] -Keys [1]: [c_customer_sk#26] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33] -Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] - -(49) Filter [codegen id : 10] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) - -(50) Project [codegen id : 10] -Output [1]: [c_customer_sk#26 AS c_customer_sk#26#40] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] - -(51) BroadcastExchange -Input [1]: [c_customer_sk#26#40] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#41] - -(52) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [c_customer_sk#26] -Right keys [1]: [c_customer_sk#26#40] -Join condition: None - -(53) BroadcastExchange -Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#42] - -(54) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cs_bill_customer_sk#2] -Right keys [1]: [c_customer_sk#26] -Join condition: None - -(55) Project [codegen id : 13] -Output [5]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39] -Input [7]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5, c_customer_sk#26, c_first_name#38, c_last_name#39] - -(56) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#10, d_moy#43] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] -ReadSchema: struct - -(57) ColumnarToRow [codegen id : 12] -Input [3]: [d_date_sk#8, d_year#10, d_moy#43] - -(58) Filter [codegen id : 12] -Input [3]: [d_date_sk#8, d_year#10, d_moy#43] -Condition : ((((isnotnull(d_year#10) AND isnotnull(d_moy#43)) AND (d_year#10 = 2000)) AND (d_moy#43 = 2)) AND isnotnull(d_date_sk#8)) - -(59) Project [codegen id : 12] -Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#10, d_moy#43] - -(60) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] - -(61) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(62) Project [codegen id : 13] -Output [4]: [cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39] -Input [6]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39, d_date_sk#8] - -(63) HashAggregate [codegen id : 13] -Input [4]: [cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39] -Keys [2]: [c_last_name#39, c_first_name#38] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#45, isEmpty#46] -Results [4]: [c_last_name#39, c_first_name#38, sum#47, isEmpty#48] - -(64) Exchange -Input [4]: [c_last_name#39, c_first_name#38, sum#47, isEmpty#48] -Arguments: hashpartitioning(c_last_name#39, c_first_name#38, 5), true, [id=#49] - -(65) HashAggregate [codegen id : 14] -Input [4]: [c_last_name#39, c_first_name#38, sum#47, isEmpty#48] -Keys [2]: [c_last_name#39, c_first_name#38] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#50] -Results [3]: [c_last_name#39, c_first_name#38, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#50 AS sales#51] - -(66) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(67) ColumnarToRow [codegen id : 27] -Input [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] - -(68) Filter [codegen id : 27] -Input [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] -Condition : (isnotnull(ws_bill_customer_sk#54) AND isnotnull(ws_sold_date_sk#52)) - -(69) ReusedExchange [Reuses operator id: 25] -Output [1]: [item_sk#20] - -(70) BroadcastHashJoin [codegen id : 27] -Left keys [1]: [ws_item_sk#53] -Right keys [1]: [item_sk#20] -Join condition: None - -(71) Project [codegen id : 27] -Output [4]: [ws_sold_date_sk#52, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] -Input [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] - -(72) ReusedExchange [Reuses operator id: 38] -Output [3]: [c_customer_sk#26, sum#57, isEmpty#58] - -(73) HashAggregate [codegen id : 21] -Input [3]: [c_customer_sk#26, sum#57, isEmpty#58] -Keys [1]: [c_customer_sk#26] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59] -Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] - -(74) Filter [codegen id : 21] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) - -(75) Project [codegen id : 21] -Output [1]: [c_customer_sk#26] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] - -(76) BroadcastExchange -Input [1]: [c_customer_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#61] - -(77) BroadcastHashJoin [codegen id : 27] -Left keys [1]: [ws_bill_customer_sk#54] -Right keys [1]: [c_customer_sk#26] -Join condition: None - -(78) Scan parquet default.customer -Output [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(79) ColumnarToRow [codegen id : 25] -Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] - -(80) Filter [codegen id : 25] -Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] -Condition : isnotnull(c_customer_sk#26) - -(81) ReusedExchange [Reuses operator id: 38] -Output [3]: [c_customer_sk#26, sum#57, isEmpty#58] - -(82) HashAggregate [codegen id : 24] -Input [3]: [c_customer_sk#26, sum#57, isEmpty#58] -Keys [1]: [c_customer_sk#26] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59] -Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] - -(83) Filter [codegen id : 24] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] -Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) - -(84) Project [codegen id : 24] -Output [1]: [c_customer_sk#26 AS c_customer_sk#26#62] -Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] - -(85) BroadcastExchange -Input [1]: [c_customer_sk#26#62] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#63] - -(86) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [c_customer_sk#26] -Right keys [1]: [c_customer_sk#26#62] -Join condition: None - -(87) BroadcastExchange -Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#64] - -(88) BroadcastHashJoin [codegen id : 27] -Left keys [1]: [ws_bill_customer_sk#54] -Right keys [1]: [c_customer_sk#26] -Join condition: None - -(89) Project [codegen id : 27] -Output [5]: [ws_sold_date_sk#52, ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39] -Input [7]: [ws_sold_date_sk#52, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56, c_customer_sk#26, c_first_name#38, c_last_name#39] - -(90) ReusedExchange [Reuses operator id: 60] -Output [1]: [d_date_sk#8] - -(91) BroadcastHashJoin [codegen id : 27] -Left keys [1]: [ws_sold_date_sk#52] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(92) Project [codegen id : 27] -Output [4]: [ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39] -Input [6]: [ws_sold_date_sk#52, ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39, d_date_sk#8] - -(93) HashAggregate [codegen id : 27] -Input [4]: [ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39] -Keys [2]: [c_last_name#39, c_first_name#38] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#65, isEmpty#66] -Results [4]: [c_last_name#39, c_first_name#38, sum#67, isEmpty#68] - -(94) Exchange -Input [4]: [c_last_name#39, c_first_name#38, sum#67, isEmpty#68] -Arguments: hashpartitioning(c_last_name#39, c_first_name#38, 5), true, [id=#69] - -(95) HashAggregate [codegen id : 28] -Input [4]: [c_last_name#39, c_first_name#38, sum#67, isEmpty#68] -Keys [2]: [c_last_name#39, c_first_name#38] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))#70] -Results [3]: [c_last_name#39, c_first_name#38, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))#70 AS sales#71] - -(96) Union - -(97) TakeOrderedAndProject -Input [3]: [c_last_name#39, c_first_name#38, sales#51] -Arguments: 100, [c_last_name#39 ASC NULLS FIRST, c_first_name#38 ASC NULLS FIRST, sales#51 ASC NULLS FIRST], [c_last_name#39, c_first_name#38, sales#51] - -===== Subqueries ===== - -Subquery:1 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#35, [id=#36] -* HashAggregate (119) -+- Exchange (118) - +- * HashAggregate (117) - +- * HashAggregate (116) - +- Exchange (115) - +- * HashAggregate (114) - +- * Project (113) - +- * BroadcastHashJoin Inner BuildRight (112) - :- * Project (106) - : +- * BroadcastHashJoin Inner BuildRight (105) - : :- * Filter (100) - : : +- * ColumnarToRow (99) - : : +- Scan parquet default.store_sales (98) - : +- BroadcastExchange (104) - : +- * Filter (103) - : +- * ColumnarToRow (102) - : +- Scan parquet default.customer (101) - +- BroadcastExchange (111) - +- * Project (110) - +- * Filter (109) - +- * ColumnarToRow (108) - +- Scan parquet default.date_dim (107) - - -(98) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(99) ColumnarToRow [codegen id : 3] -Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] - -(100) Filter [codegen id : 3] -Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] -Condition : (isnotnull(ss_customer_sk#23) AND isnotnull(ss_sold_date_sk#6)) - -(101) Scan parquet default.customer -Output [1]: [c_customer_sk#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(102) ColumnarToRow [codegen id : 1] -Input [1]: [c_customer_sk#26] - -(103) Filter [codegen id : 1] -Input [1]: [c_customer_sk#26] -Condition : isnotnull(c_customer_sk#26) - -(104) BroadcastExchange -Input [1]: [c_customer_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#72] - -(105) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_customer_sk#23] -Right keys [1]: [c_customer_sk#26] -Join condition: None - -(106) Project [codegen id : 3] -Output [4]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -Input [5]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] - -(107) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_year#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(108) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#8, d_year#10] - -(109) Filter [codegen id : 2] -Input [2]: [d_date_sk#8, d_year#10] -Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) - -(110) Project [codegen id : 2] -Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_year#10] - -(111) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#73] - -(112) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(113) Project [codegen id : 3] -Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -Input [5]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26, d_date_sk#8] - -(114) HashAggregate [codegen id : 3] -Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -Keys [1]: [c_customer_sk#26] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [2]: [sum#74, isEmpty#75] -Results [3]: [c_customer_sk#26, sum#76, isEmpty#77] - -(115) Exchange -Input [3]: [c_customer_sk#26, sum#76, isEmpty#77] -Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#78] - -(116) HashAggregate [codegen id : 4] -Input [3]: [c_customer_sk#26, sum#76, isEmpty#77] -Keys [1]: [c_customer_sk#26] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#79] -Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#79 AS csales#80] - -(117) HashAggregate [codegen id : 4] -Input [1]: [csales#80] -Keys: [] -Functions [1]: [partial_max(csales#80)] -Aggregate Attributes [1]: [max#81] -Results [1]: [max#82] - -(118) Exchange -Input [1]: [max#82] -Arguments: SinglePartition, true, [id=#83] - -(119) HashAggregate [codegen id : 5] -Input [1]: [max#82] -Keys: [] -Functions [1]: [max(csales#80)] -Aggregate Attributes [1]: [max(csales#80)#84] -Results [1]: [max(csales#80)#84 AS tpcds_cmax#85] - -Subquery:2 Hosting operator id = 49 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] - -Subquery:3 Hosting operator id = 74 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] - -Subquery:4 Hosting operator id = 83 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] - - +TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,sales#3 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,sales#3]) ++- Union + :- *(14) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2)))]) + : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 5) + : +- *(13) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2)))]) + : +- *(13) Project [cs_quantity#4, cs_list_price#5, c_first_name#2, c_last_name#1] + : +- *(13) BroadcastHashJoin [cs_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight + : :- *(13) Project [cs_sold_date_sk#6, cs_quantity#4, cs_list_price#5, c_first_name#2, c_last_name#1] + : : +- *(13) BroadcastHashJoin [cs_bill_customer_sk#8], [c_customer_sk#9], Inner, BuildRight + : : :- *(13) BroadcastHashJoin [cs_bill_customer_sk#8], [c_customer_sk#9#10], LeftSemi, BuildRight + : : : :- *(13) Project [cs_sold_date_sk#6, cs_bill_customer_sk#8, cs_quantity#4, cs_list_price#5] + : : : : +- *(13) BroadcastHashJoin [cs_item_sk#11], [item_sk#12], LeftSemi, BuildRight + : : : : :- *(13) Project [cs_sold_date_sk#6, cs_bill_customer_sk#8, cs_item_sk#11, cs_quantity#4, cs_list_price#5] + : : : : : +- *(13) Filter (isnotnull(cs_bill_customer_sk#8) && isnotnull(cs_sold_date_sk#6)) + : : : : : +- *(13) FileScan parquet default.catalog_sales[cs_sold_date_sk#6,cs_bill_customer_sk#8,cs_item_sk#11,cs_quantity#4,cs_list_price#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct 4) + : : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[count(1)]) + : : : : +- Exchange hashpartitioning(substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17, 5) + : : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#14, 1, 30) AS substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[partial_count(1)]) + : : : : +- *(3) Project [d_date#17, i_item_sk#16, i_item_desc#14] + : : : : +- *(3) BroadcastHashJoin [ss_item_sk#18], [i_item_sk#16], Inner, BuildRight + : : : : :- *(3) Project [ss_item_sk#18, d_date#17] + : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight + : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_item_sk#18] + : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#19) && isnotnull(ss_item_sk#18)) + : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(1) Project [d_date_sk#7, d_date#17] + : : : : : +- *(1) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) + : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#7,d_date#17,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [i_item_sk#16, i_item_desc#14] + : : : : +- *(2) Filter isnotnull(i_item_sk#16) + : : : : +- *(2) FileScan parquet default.item[i_item_sk#16,i_item_desc#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(7) Project [c_customer_sk#9 AS c_customer_sk#9#10] + : : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3144 as decimal(32,6)))), DecimalType(38,8)))) + : : : : +- Subquery subquery3144 + : : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#24)]) + : : : : +- Exchange SinglePartition + : : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#24)]) + : : : : +- *(4) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : : +- Exchange hashpartitioning(c_customer_sk#9, 5) + : : : : +- *(3) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : : +- *(3) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight + : : : : :- *(3) Project [ss_sold_date_sk#19, ss_quantity#21, ss_sales_price#22, c_customer_sk#9] + : : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight + : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] + : : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#25) && isnotnull(ss_sold_date_sk#19)) + : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(1) Project [c_customer_sk#9] + : : : : : +- *(1) Filter isnotnull(c_customer_sk#9) + : : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [d_date_sk#7] + : : : : +- *(2) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(7) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- Exchange hashpartitioning(c_customer_sk#9, 5) + : : : +- *(6) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) + : : : +- *(6) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] + : : : +- *(6) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight + : : : :- *(6) Project [ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] + : : : : +- *(6) Filter isnotnull(ss_customer_sk#25) + : : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(5) Project [c_customer_sk#9] + : : : +- *(5) Filter isnotnull(c_customer_sk#9) + : : : +- *(5) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(11) BroadcastHashJoin [c_customer_sk#9], [c_customer_sk#9#10], LeftSemi, BuildRight + : : :- *(11) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] + : : : +- *(11) Filter isnotnull(c_customer_sk#9) + : : : +- *(11) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- ReusedExchange [c_customer_sk#9#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(12) Project [d_date_sk#7] + : +- *(12) Filter ((((isnotnull(d_year#20) && isnotnull(d_moy#26)) && (d_year#20 = 2000)) && (d_moy#26 = 2)) && isnotnull(d_date_sk#7)) + : +- *(12) FileScan parquet default.date_dim[d_date_sk#7,d_year#20,d_moy#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + +- *(28) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 5) + +- *(27) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) + +- *(27) Project [ws_quantity#27, ws_list_price#28, c_first_name#2, c_last_name#1] + +- *(27) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#7], Inner, BuildRight + :- *(27) Project [ws_sold_date_sk#29, ws_quantity#27, ws_list_price#28, c_first_name#2, c_last_name#1] + : +- *(27) BroadcastHashJoin [ws_bill_customer_sk#30], [c_customer_sk#9], Inner, BuildRight + : :- *(27) BroadcastHashJoin [ws_bill_customer_sk#30], [c_customer_sk#9#31], LeftSemi, BuildRight + : : :- *(27) Project [ws_sold_date_sk#29, ws_bill_customer_sk#30, ws_quantity#27, ws_list_price#28] + : : : +- *(27) BroadcastHashJoin [ws_item_sk#32], [item_sk#12], LeftSemi, BuildRight + : : : :- *(27) Project [ws_sold_date_sk#29, ws_item_sk#32, ws_bill_customer_sk#30, ws_quantity#27, ws_list_price#28] + : : : : +- *(27) Filter (isnotnull(ws_bill_customer_sk#30) && isnotnull(ws_sold_date_sk#29)) + : : : : +- *(27) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_item_sk#32,ws_bill_customer_sk#30,ws_quantity#27,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 6] -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] - -(3) Filter [codegen id : 6] -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) - -(4) Scan parquet default.store_returns -Output [2]: [sr_item_sk#6, sr_ticket_number#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [sr_item_sk#6, sr_ticket_number#7] - -(6) Filter [codegen id : 1] -Input [2]: [sr_item_sk#6, sr_ticket_number#7] -Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) - -(7) BroadcastExchange -Input [2]: [sr_item_sk#6, sr_ticket_number#7] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#8] - -(8) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] -Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] -Join condition: None - -(9) Project [codegen id : 6] -Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] - -(10) Scan parquet default.store -Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] - -(12) Filter [codegen id : 2] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] -Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) - -(13) Project [codegen id : 2] -Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] - -(14) BroadcastExchange -Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] - -(15) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#9] -Join condition: None - -(16) Project [codegen id : 6] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] - -(17) Scan parquet default.item -Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] - -(19) Filter [codegen id : 3] -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale)) AND isnotnull(i_item_sk#15)) - -(20) BroadcastExchange -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] - -(21) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#15] -Join condition: None - -(22) Project [codegen id : 6] -Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] - -(23) Scan parquet default.customer -Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] - -(25) Filter [codegen id : 4] -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) - -(26) BroadcastExchange -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] - -(27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#22] -Join condition: None - -(28) Project [codegen id : 6] -Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] -Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] - -(29) Scan parquet default.customer_address -Output [3]: [ca_state#27, ca_zip#28, ca_country#29] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] -ReadSchema: struct - -(30) ColumnarToRow [codegen id : 5] -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] - -(31) Filter [codegen id : 5] -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] -Condition : (isnotnull(ca_country#29) AND isnotnull(ca_zip#28)) - -(32) BroadcastExchange -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] -Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#30] - -(33) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [c_birth_country#25, s_zip#13] -Right keys [2]: [upper(ca_country#29), ca_zip#28] -Join condition: None - -(34) Project [codegen id : 6] -Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] -Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] - -(35) HashAggregate [codegen id : 6] -Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] -Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] -Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#31] -Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] - -(36) Exchange -Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] -Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#33] - -(37) HashAggregate [codegen id : 7] -Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] -Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] -Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#34] -Results [4]: [c_last_name#24, c_first_name#23, s_store_name#10, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#34,17,2) AS netpaid#35] - -(38) HashAggregate [codegen id : 7] -Input [4]: [c_last_name#24, c_first_name#23, s_store_name#10, netpaid#35] -Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] -Functions [1]: [partial_sum(netpaid#35)] -Aggregate Attributes [2]: [sum#36, isEmpty#37] -Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] - -(39) Exchange -Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] -Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, 5), true, [id=#40] - -(40) HashAggregate [codegen id : 8] -Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] -Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] -Functions [1]: [sum(netpaid#35)] -Aggregate Attributes [1]: [sum(netpaid#35)#41] -Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum(netpaid#35)#41 AS paid#42, sum(netpaid#35)#41 AS sum(netpaid#35)#43] - -(41) Filter [codegen id : 8] -Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] -Condition : (isnotnull(sum(netpaid#35)#43) AND (cast(sum(netpaid#35)#43 as decimal(33,8)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(33,8)))) - -(42) Project [codegen id : 8] -Output [4]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42] -Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] - -===== Subqueries ===== - -Subquery:1 Hosting operator id = 41 Hosting Expression = Subquery scalar-subquery#44, [id=#45] -* HashAggregate (82) -+- Exchange (81) - +- * HashAggregate (80) - +- * HashAggregate (79) - +- Exchange (78) - +- * HashAggregate (77) - +- * Project (76) - +- * BroadcastHashJoin Inner BuildRight (75) - :- * Project (70) - : +- * BroadcastHashJoin Inner BuildRight (69) - : :- * Project (64) - : : +- * BroadcastHashJoin Inner BuildRight (63) - : : :- * Project (58) - : : : +- * BroadcastHashJoin Inner BuildRight (57) - : : : :- * Project (51) - : : : : +- * BroadcastHashJoin Inner BuildRight (50) - : : : : :- * Filter (45) - : : : : : +- * ColumnarToRow (44) - : : : : : +- Scan parquet default.store_sales (43) - : : : : +- BroadcastExchange (49) - : : : : +- * Filter (48) - : : : : +- * ColumnarToRow (47) - : : : : +- Scan parquet default.store_returns (46) - : : : +- BroadcastExchange (56) - : : : +- * Project (55) - : : : +- * Filter (54) - : : : +- * ColumnarToRow (53) - : : : +- Scan parquet default.store (52) - : : +- BroadcastExchange (62) - : : +- * Filter (61) - : : +- * ColumnarToRow (60) - : : +- Scan parquet default.item (59) - : +- BroadcastExchange (68) - : +- * Filter (67) - : +- * ColumnarToRow (66) - : +- Scan parquet default.customer (65) - +- BroadcastExchange (74) - +- * Filter (73) - +- * ColumnarToRow (72) - +- Scan parquet default.customer_address (71) - - -(43) Scan parquet default.store_sales -Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(44) ColumnarToRow [codegen id : 6] -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] - -(45) Filter [codegen id : 6] -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) - -(46) Scan parquet default.store_returns -Output [2]: [sr_item_sk#6, sr_ticket_number#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] -ReadSchema: struct - -(47) ColumnarToRow [codegen id : 1] -Input [2]: [sr_item_sk#6, sr_ticket_number#7] - -(48) Filter [codegen id : 1] -Input [2]: [sr_item_sk#6, sr_ticket_number#7] -Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) - -(49) BroadcastExchange -Input [2]: [sr_item_sk#6, sr_ticket_number#7] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#46] - -(50) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] -Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] -Join condition: None - -(51) Project [codegen id : 6] -Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] - -(52) Scan parquet default.store -Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] -ReadSchema: struct - -(53) ColumnarToRow [codegen id : 2] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] - -(54) Filter [codegen id : 2] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] -Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) - -(55) Project [codegen id : 2] -Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] - -(56) BroadcastExchange -Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47] - -(57) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#9] -Join condition: None - -(58) Project [codegen id : 6] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] - -(59) Scan parquet default.item -Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(60) ColumnarToRow [codegen id : 3] -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] - -(61) Filter [codegen id : 3] -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Condition : isnotnull(i_item_sk#15) - -(62) BroadcastExchange -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] - -(63) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#15] -Join condition: None - -(64) Project [codegen id : 6] -Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] - -(65) Scan parquet default.customer -Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] -ReadSchema: struct - -(66) ColumnarToRow [codegen id : 4] -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] - -(67) Filter [codegen id : 4] -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) - -(68) BroadcastExchange -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#49] - -(69) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#22] -Join condition: None - -(70) Project [codegen id : 6] -Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] -Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] - -(71) Scan parquet default.customer_address -Output [3]: [ca_state#27, ca_zip#28, ca_country#29] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] -ReadSchema: struct - -(72) ColumnarToRow [codegen id : 5] -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] - -(73) Filter [codegen id : 5] -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] -Condition : (isnotnull(ca_country#29) AND isnotnull(ca_zip#28)) - -(74) BroadcastExchange -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] -Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#50] - -(75) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [c_birth_country#25, s_zip#13] -Right keys [2]: [upper(ca_country#29), ca_zip#28] -Join condition: None - -(76) Project [codegen id : 6] -Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] -Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] - -(77) HashAggregate [codegen id : 6] -Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] -Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] -Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#51] -Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] - -(78) Exchange -Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] -Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#53] - -(79) HashAggregate [codegen id : 7] -Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] -Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] -Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#54] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#54,17,2) AS netpaid#35] - -(80) HashAggregate [codegen id : 7] -Input [1]: [netpaid#35] -Keys: [] -Functions [1]: [partial_avg(netpaid#35)] -Aggregate Attributes [2]: [sum#55, count#56] -Results [2]: [sum#57, count#58] - -(81) Exchange -Input [2]: [sum#57, count#58] -Arguments: SinglePartition, true, [id=#59] - -(82) HashAggregate [codegen id : 8] -Input [2]: [sum#57, count#58] -Keys: [] -Functions [1]: [avg(netpaid#35)] -Aggregate Attributes [1]: [avg(netpaid#35)#60] -Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#35)#60)), DecimalType(24,8), true) AS (CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6)))#61] - - +*(8) Project [c_last_name#1, c_first_name#2, s_store_name#3, paid#4] ++- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery3246 as decimal(33,8)))) + : +- Subquery subquery3246 + : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) + : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) + : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + : :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(3) Filter isnotnull(i_item_sk#22) + : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + : +- *(5) Filter isnotnull(ca_zip#18) + : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct + +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 5) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) + +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = pale)) && isnotnull(i_item_sk#22)) + : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + +- *(5) Filter isnotnull(ca_zip#18) + +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt index 58b57dfaa..d8bda2e69 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt @@ -1,125 +1,111 @@ -WholeStageCodegen (8) - Project [c_last_name,c_first_name,s_store_name,paid] +WholeStageCodegen + Project [c_first_name,c_last_name,paid,s_store_name] Filter [sum(netpaid)] Subquery #1 - WholeStageCodegen (8) - HashAggregate [sum,count] [avg(netpaid),(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),sum,count] + WholeStageCodegen + HashAggregate [avg(netpaid),count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] InputAdapter Exchange #8 - WholeStageCodegen (7) - HashAggregate [netpaid] [sum,count,sum,count] - HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + WholeStageCodegen + HashAggregate [count,count,netpaid,sum,sum] [count,count,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] InputAdapter - Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #9 - WholeStageCodegen (6) - HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] - Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] - BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #9 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] InputAdapter - BroadcastExchange #10 - WholeStageCodegen (1) - Filter [sr_ticket_number,sr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] InputAdapter - BroadcastExchange #11 - WholeStageCodegen (2) - Project [s_store_sk,s_store_name,s_state,s_zip] + BroadcastExchange #4 + WholeStageCodegen + Project [s_state,s_store_name,s_store_sk,s_zip] Filter [s_market_id,s_store_sk,s_zip] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] InputAdapter - BroadcastExchange #12 - WholeStageCodegen (3) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastExchange #10 + WholeStageCodegen + Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + Filter [i_item_sk] + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] InputAdapter - BroadcastExchange #13 - WholeStageCodegen (4) - Filter [c_customer_sk,c_birth_country] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + BroadcastExchange #6 + WholeStageCodegen + Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] + Filter [c_birth_country,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] InputAdapter - BroadcastExchange #14 - WholeStageCodegen (5) - Filter [ca_country,ca_zip] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_state,ca_zip,ca_country] - HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum(netpaid),sum,isEmpty] + BroadcastExchange #7 + WholeStageCodegen + Project [ca_country,ca_state,ca_zip] + Filter [ca_zip] + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] + HashAggregate [c_first_name,c_last_name,s_store_name,sum,sum(netpaid)] [paid,sum,sum(netpaid),sum(netpaid)] InputAdapter - Exchange [c_last_name,c_first_name,s_store_name] #1 - WholeStageCodegen (7) - HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] - HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + Exchange [c_first_name,c_last_name,s_store_name] #1 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,netpaid,s_store_name,sum,sum] [sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] InputAdapter - Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 - WholeStageCodegen (6) - HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] - Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] - BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [sr_ticket_number,sr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - Project [s_store_sk,s_store_name,s_state,s_zip] + WholeStageCodegen + Project [s_state,s_store_name,s_store_sk,s_zip] Filter [s_market_id,s_store_sk,s_zip] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) - Filter [i_color,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + WholeStageCodegen + Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + Filter [i_color,i_item_sk] + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] InputAdapter BroadcastExchange #6 - WholeStageCodegen (4) - Filter [c_customer_sk,c_birth_country] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + WholeStageCodegen + Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] + Filter [c_birth_country,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] InputAdapter BroadcastExchange #7 - WholeStageCodegen (5) - Filter [ca_country,ca_zip] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_state,ca_zip,ca_country] + WholeStageCodegen + Project [ca_country,ca_state,ca_zip] + Filter [ca_zip] + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt index 0cf8c16a0..35e443a34 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt @@ -1,477 +1,82 @@ == Physical Plan == -* Project (42) -+- * Filter (41) - +- * HashAggregate (40) - +- Exchange (39) - +- * HashAggregate (38) - +- * HashAggregate (37) - +- Exchange (36) - +- * HashAggregate (35) - +- * Project (34) - +- * BroadcastHashJoin Inner BuildRight (33) - :- * Project (28) - : +- * BroadcastHashJoin Inner BuildRight (27) - : :- * Project (22) - : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Project (16) - : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : :- * Project (9) - : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.store_sales (1) - : : : : +- BroadcastExchange (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.store_returns (4) - : : : +- BroadcastExchange (14) - : : : +- * Project (13) - : : : +- * Filter (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.store (10) - : : +- BroadcastExchange (20) - : : +- * Filter (19) - : : +- * ColumnarToRow (18) - : : +- Scan parquet default.item (17) - : +- BroadcastExchange (26) - : +- * Filter (25) - : +- * ColumnarToRow (24) - : +- Scan parquet default.customer (23) - +- BroadcastExchange (32) - +- * Filter (31) - +- * ColumnarToRow (30) - +- Scan parquet default.customer_address (29) - - -(1) Scan parquet default.store_sales -Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 6] -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] - -(3) Filter [codegen id : 6] -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) - -(4) Scan parquet default.store_returns -Output [2]: [sr_item_sk#6, sr_ticket_number#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [sr_item_sk#6, sr_ticket_number#7] - -(6) Filter [codegen id : 1] -Input [2]: [sr_item_sk#6, sr_ticket_number#7] -Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) - -(7) BroadcastExchange -Input [2]: [sr_item_sk#6, sr_ticket_number#7] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#8] - -(8) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] -Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] -Join condition: None - -(9) Project [codegen id : 6] -Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] - -(10) Scan parquet default.store -Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] - -(12) Filter [codegen id : 2] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] -Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) - -(13) Project [codegen id : 2] -Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] - -(14) BroadcastExchange -Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] - -(15) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#9] -Join condition: None - -(16) Project [codegen id : 6] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] - -(17) Scan parquet default.item -Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon), IsNotNull(i_item_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] - -(19) Filter [codegen id : 3] -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Condition : ((isnotnull(i_color#18) AND (i_color#18 = chiffon)) AND isnotnull(i_item_sk#15)) - -(20) BroadcastExchange -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] - -(21) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#15] -Join condition: None - -(22) Project [codegen id : 6] -Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] - -(23) Scan parquet default.customer -Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] - -(25) Filter [codegen id : 4] -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) - -(26) BroadcastExchange -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] - -(27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#22] -Join condition: None - -(28) Project [codegen id : 6] -Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] -Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] - -(29) Scan parquet default.customer_address -Output [3]: [ca_state#27, ca_zip#28, ca_country#29] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] -ReadSchema: struct - -(30) ColumnarToRow [codegen id : 5] -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] - -(31) Filter [codegen id : 5] -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] -Condition : (isnotnull(ca_country#29) AND isnotnull(ca_zip#28)) - -(32) BroadcastExchange -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] -Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#30] - -(33) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [c_birth_country#25, s_zip#13] -Right keys [2]: [upper(ca_country#29), ca_zip#28] -Join condition: None - -(34) Project [codegen id : 6] -Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] -Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] - -(35) HashAggregate [codegen id : 6] -Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] -Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] -Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#31] -Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] - -(36) Exchange -Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] -Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#33] - -(37) HashAggregate [codegen id : 7] -Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] -Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] -Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#34] -Results [4]: [c_last_name#24, c_first_name#23, s_store_name#10, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#34,17,2) AS netpaid#35] - -(38) HashAggregate [codegen id : 7] -Input [4]: [c_last_name#24, c_first_name#23, s_store_name#10, netpaid#35] -Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] -Functions [1]: [partial_sum(netpaid#35)] -Aggregate Attributes [2]: [sum#36, isEmpty#37] -Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] - -(39) Exchange -Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] -Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, 5), true, [id=#40] - -(40) HashAggregate [codegen id : 8] -Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] -Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] -Functions [1]: [sum(netpaid#35)] -Aggregate Attributes [1]: [sum(netpaid#35)#41] -Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum(netpaid#35)#41 AS paid#42, sum(netpaid#35)#41 AS sum(netpaid#35)#43] - -(41) Filter [codegen id : 8] -Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] -Condition : (isnotnull(sum(netpaid#35)#43) AND (cast(sum(netpaid#35)#43 as decimal(33,8)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(33,8)))) - -(42) Project [codegen id : 8] -Output [4]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42] -Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] - -===== Subqueries ===== - -Subquery:1 Hosting operator id = 41 Hosting Expression = Subquery scalar-subquery#44, [id=#45] -* HashAggregate (82) -+- Exchange (81) - +- * HashAggregate (80) - +- * HashAggregate (79) - +- Exchange (78) - +- * HashAggregate (77) - +- * Project (76) - +- * BroadcastHashJoin Inner BuildRight (75) - :- * Project (70) - : +- * BroadcastHashJoin Inner BuildRight (69) - : :- * Project (64) - : : +- * BroadcastHashJoin Inner BuildRight (63) - : : :- * Project (58) - : : : +- * BroadcastHashJoin Inner BuildRight (57) - : : : :- * Project (51) - : : : : +- * BroadcastHashJoin Inner BuildRight (50) - : : : : :- * Filter (45) - : : : : : +- * ColumnarToRow (44) - : : : : : +- Scan parquet default.store_sales (43) - : : : : +- BroadcastExchange (49) - : : : : +- * Filter (48) - : : : : +- * ColumnarToRow (47) - : : : : +- Scan parquet default.store_returns (46) - : : : +- BroadcastExchange (56) - : : : +- * Project (55) - : : : +- * Filter (54) - : : : +- * ColumnarToRow (53) - : : : +- Scan parquet default.store (52) - : : +- BroadcastExchange (62) - : : +- * Filter (61) - : : +- * ColumnarToRow (60) - : : +- Scan parquet default.item (59) - : +- BroadcastExchange (68) - : +- * Filter (67) - : +- * ColumnarToRow (66) - : +- Scan parquet default.customer (65) - +- BroadcastExchange (74) - +- * Filter (73) - +- * ColumnarToRow (72) - +- Scan parquet default.customer_address (71) - - -(43) Scan parquet default.store_sales -Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(44) ColumnarToRow [codegen id : 6] -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] - -(45) Filter [codegen id : 6] -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) - -(46) Scan parquet default.store_returns -Output [2]: [sr_item_sk#6, sr_ticket_number#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] -ReadSchema: struct - -(47) ColumnarToRow [codegen id : 1] -Input [2]: [sr_item_sk#6, sr_ticket_number#7] - -(48) Filter [codegen id : 1] -Input [2]: [sr_item_sk#6, sr_ticket_number#7] -Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) - -(49) BroadcastExchange -Input [2]: [sr_item_sk#6, sr_ticket_number#7] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#46] - -(50) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] -Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] -Join condition: None - -(51) Project [codegen id : 6] -Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] - -(52) Scan parquet default.store -Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] -ReadSchema: struct - -(53) ColumnarToRow [codegen id : 2] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] - -(54) Filter [codegen id : 2] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] -Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) - -(55) Project [codegen id : 2] -Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] -Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] - -(56) BroadcastExchange -Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47] - -(57) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#9] -Join condition: None - -(58) Project [codegen id : 6] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] - -(59) Scan parquet default.item -Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(60) ColumnarToRow [codegen id : 3] -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] - -(61) Filter [codegen id : 3] -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Condition : isnotnull(i_item_sk#15) - -(62) BroadcastExchange -Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] - -(63) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#15] -Join condition: None - -(64) Project [codegen id : 6] -Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] - -(65) Scan parquet default.customer -Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] -ReadSchema: struct - -(66) ColumnarToRow [codegen id : 4] -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] - -(67) Filter [codegen id : 4] -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) - -(68) BroadcastExchange -Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#49] - -(69) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#22] -Join condition: None - -(70) Project [codegen id : 6] -Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] -Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] - -(71) Scan parquet default.customer_address -Output [3]: [ca_state#27, ca_zip#28, ca_country#29] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] -ReadSchema: struct - -(72) ColumnarToRow [codegen id : 5] -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] - -(73) Filter [codegen id : 5] -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] -Condition : (isnotnull(ca_country#29) AND isnotnull(ca_zip#28)) - -(74) BroadcastExchange -Input [3]: [ca_state#27, ca_zip#28, ca_country#29] -Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#50] - -(75) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [c_birth_country#25, s_zip#13] -Right keys [2]: [upper(ca_country#29), ca_zip#28] -Join condition: None - -(76) Project [codegen id : 6] -Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] -Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] - -(77) HashAggregate [codegen id : 6] -Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] -Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] -Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#51] -Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] - -(78) Exchange -Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] -Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#53] - -(79) HashAggregate [codegen id : 7] -Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] -Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] -Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#54] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#54,17,2) AS netpaid#35] - -(80) HashAggregate [codegen id : 7] -Input [1]: [netpaid#35] -Keys: [] -Functions [1]: [partial_avg(netpaid#35)] -Aggregate Attributes [2]: [sum#55, count#56] -Results [2]: [sum#57, count#58] - -(81) Exchange -Input [2]: [sum#57, count#58] -Arguments: SinglePartition, true, [id=#59] - -(82) HashAggregate [codegen id : 8] -Input [2]: [sum#57, count#58] -Keys: [] -Functions [1]: [avg(netpaid#35)] -Aggregate Attributes [1]: [avg(netpaid#35)#60] -Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#35)#60)), DecimalType(24,8), true) AS (CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6)))#61] - - +*(8) Project [c_last_name#1, c_first_name#2, s_store_name#3, paid#4] ++- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery3290 as decimal(33,8)))) + : +- Subquery subquery3290 + : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) + : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) + : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + : :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : : +- *(3) Filter isnotnull(i_item_sk#22) + : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + : +- *(5) Filter isnotnull(ca_zip#18) + : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct + +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 5) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) + +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) + +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) + +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) + +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] + +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight + :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] + : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight + : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] + : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight + : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] + : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) + : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] + : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) + : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] + : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = chiffon)) && isnotnull(i_item_sk#22)) + : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) + +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] + +- *(5) Filter isnotnull(ca_zip#18) + +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt index 58b57dfaa..d8bda2e69 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt @@ -1,125 +1,111 @@ -WholeStageCodegen (8) - Project [c_last_name,c_first_name,s_store_name,paid] +WholeStageCodegen + Project [c_first_name,c_last_name,paid,s_store_name] Filter [sum(netpaid)] Subquery #1 - WholeStageCodegen (8) - HashAggregate [sum,count] [avg(netpaid),(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),sum,count] + WholeStageCodegen + HashAggregate [avg(netpaid),count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] InputAdapter Exchange #8 - WholeStageCodegen (7) - HashAggregate [netpaid] [sum,count,sum,count] - HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + WholeStageCodegen + HashAggregate [count,count,netpaid,sum,sum] [count,count,sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] InputAdapter - Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #9 - WholeStageCodegen (6) - HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] - Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] - BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #9 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] InputAdapter - BroadcastExchange #10 - WholeStageCodegen (1) - Filter [sr_ticket_number,sr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + BroadcastExchange #3 + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] InputAdapter - BroadcastExchange #11 - WholeStageCodegen (2) - Project [s_store_sk,s_store_name,s_state,s_zip] + BroadcastExchange #4 + WholeStageCodegen + Project [s_state,s_store_name,s_store_sk,s_zip] Filter [s_market_id,s_store_sk,s_zip] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] InputAdapter - BroadcastExchange #12 - WholeStageCodegen (3) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastExchange #10 + WholeStageCodegen + Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + Filter [i_item_sk] + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] InputAdapter - BroadcastExchange #13 - WholeStageCodegen (4) - Filter [c_customer_sk,c_birth_country] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + BroadcastExchange #6 + WholeStageCodegen + Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] + Filter [c_birth_country,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] InputAdapter - BroadcastExchange #14 - WholeStageCodegen (5) - Filter [ca_country,ca_zip] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_state,ca_zip,ca_country] - HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum(netpaid),sum,isEmpty] + BroadcastExchange #7 + WholeStageCodegen + Project [ca_country,ca_state,ca_zip] + Filter [ca_zip] + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] + HashAggregate [c_first_name,c_last_name,s_store_name,sum,sum(netpaid)] [paid,sum,sum(netpaid),sum(netpaid)] InputAdapter - Exchange [c_last_name,c_first_name,s_store_name] #1 - WholeStageCodegen (7) - HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] - HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] + Exchange [c_first_name,c_last_name,s_store_name] #1 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,netpaid,s_store_name,sum,sum] [sum,sum] + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] InputAdapter - Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 - WholeStageCodegen (6) - HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] - Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] - BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] - Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] + Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] + Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] + BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] + Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [sr_ticket_number,sr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - Project [s_store_sk,s_store_name,s_state,s_zip] + WholeStageCodegen + Project [s_state,s_store_name,s_store_sk,s_zip] Filter [s_market_id,s_store_sk,s_zip] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] + Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) - Filter [i_color,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] + WholeStageCodegen + Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + Filter [i_color,i_item_sk] + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] InputAdapter BroadcastExchange #6 - WholeStageCodegen (4) - Filter [c_customer_sk,c_birth_country] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] + WholeStageCodegen + Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] + Filter [c_birth_country,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] InputAdapter BroadcastExchange #7 - WholeStageCodegen (5) - Filter [ca_country,ca_zip] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_state,ca_zip,ca_country] + WholeStageCodegen + Project [ca_country,ca_state,ca_zip] + Filter [ca_zip] + Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt index 6bdd709a7..c1f44f753 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt @@ -1,269 +1,50 @@ == Physical Plan == -TakeOrderedAndProject (48) -+- * HashAggregate (47) - +- Exchange (46) - +- * HashAggregate (45) - +- * Project (44) - +- * BroadcastHashJoin Inner BuildRight (43) - :- * Project (38) - : +- * BroadcastHashJoin Inner BuildRight (37) - : :- * Project (32) - : : +- * BroadcastHashJoin Inner BuildRight (31) - : : :- * Project (29) - : : : +- * BroadcastHashJoin Inner BuildRight (28) - : : : :- * Project (22) - : : : : +- * BroadcastHashJoin Inner BuildRight (21) - : : : : :- * Project (15) - : : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : : :- * Project (9) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : : : :- * Filter (3) - : : : : : : : +- * ColumnarToRow (2) - : : : : : : : +- Scan parquet default.store_sales (1) - : : : : : : +- BroadcastExchange (7) - : : : : : : +- * Filter (6) - : : : : : : +- * ColumnarToRow (5) - : : : : : : +- Scan parquet default.store_returns (4) - : : : : : +- BroadcastExchange (13) - : : : : : +- * Filter (12) - : : : : : +- * ColumnarToRow (11) - : : : : : +- Scan parquet default.catalog_sales (10) - : : : : +- BroadcastExchange (20) - : : : : +- * Project (19) - : : : : +- * Filter (18) - : : : : +- * ColumnarToRow (17) - : : : : +- Scan parquet default.date_dim (16) - : : : +- BroadcastExchange (27) - : : : +- * Project (26) - : : : +- * Filter (25) - : : : +- * ColumnarToRow (24) - : : : +- Scan parquet default.date_dim (23) - : : +- ReusedExchange (30) - : +- BroadcastExchange (36) - : +- * Filter (35) - : +- * ColumnarToRow (34) - : +- Scan parquet default.store (33) - +- BroadcastExchange (42) - +- * Filter (41) - +- * ColumnarToRow (40) - +- Scan parquet default.item (39) - - -(1) Scan parquet default.store_sales -Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 8] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] - -(3) Filter [codegen id : 8] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] -Condition : ((((isnotnull(ss_customer_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) - -(4) Scan parquet default.store_returns -Output [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] - -(6) Filter [codegen id : 1] -Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] -Condition : (((isnotnull(sr_customer_sk#9) AND isnotnull(sr_item_sk#8)) AND isnotnull(sr_ticket_number#10)) AND isnotnull(sr_returned_date_sk#7)) - -(7) BroadcastExchange -Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] -Arguments: HashedRelationBroadcastMode(List(input[2, bigint, false], input[1, bigint, false], input[3, bigint, false]),false), [id=#12] - -(8) BroadcastHashJoin [codegen id : 8] -Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] -Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10] -Join condition: None - -(9) Project [codegen id : 8] -Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_net_loss#11] -Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] - -(10) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] - -(12) Filter [codegen id : 2] -Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] -Condition : ((isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) AND isnotnull(cs_sold_date_sk#13)) - -(13) BroadcastExchange -Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#17] - -(14) BroadcastHashJoin [codegen id : 8] -Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] -Right keys [2]: [cast(cs_bill_customer_sk#14 as bigint), cast(cs_item_sk#15 as bigint)] -Join condition: None - -(15) Project [codegen id : 8] -Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16] -Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_net_loss#11, cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] - -(16) Scan parquet default.date_dim -Output [3]: [d_date_sk#18, d_year#19, d_moy#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#18, d_year#19, d_moy#20] - -(18) Filter [codegen id : 3] -Input [3]: [d_date_sk#18, d_year#19, d_moy#20] -Condition : ((((isnotnull(d_moy#20) AND isnotnull(d_year#19)) AND (d_moy#20 = 4)) AND (d_year#19 = 2001)) AND isnotnull(d_date_sk#18)) - -(19) Project [codegen id : 3] -Output [1]: [d_date_sk#18] -Input [3]: [d_date_sk#18, d_year#19, d_moy#20] - -(20) BroadcastExchange -Input [1]: [d_date_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] - -(21) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#18] -Join condition: None - -(22) Project [codegen id : 8] -Output [7]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16] -Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16, d_date_sk#18] - -(23) Scan parquet default.date_dim -Output [3]: [d_date_sk#22, d_year#23, d_moy#24] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [3]: [d_date_sk#22, d_year#23, d_moy#24] - -(25) Filter [codegen id : 4] -Input [3]: [d_date_sk#22, d_year#23, d_moy#24] -Condition : (((((isnotnull(d_moy#24) AND isnotnull(d_year#23)) AND (d_moy#24 >= 4)) AND (d_moy#24 <= 10)) AND (d_year#23 = 2001)) AND isnotnull(d_date_sk#22)) - -(26) Project [codegen id : 4] -Output [1]: [d_date_sk#22] -Input [3]: [d_date_sk#22, d_year#23, d_moy#24] - -(27) BroadcastExchange -Input [1]: [d_date_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] - -(28) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [sr_returned_date_sk#7] -Right keys [1]: [cast(d_date_sk#22 as bigint)] -Join condition: None - -(29) Project [codegen id : 8] -Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16] -Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16, d_date_sk#22] - -(30) ReusedExchange [Reuses operator id: 27] -Output [1]: [d_date_sk#26] - -(31) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#13] -Right keys [1]: [d_date_sk#26] -Join condition: None - -(32) Project [codegen id : 8] -Output [5]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16] -Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16, d_date_sk#26] - -(33) Scan parquet default.store -Output [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(34) ColumnarToRow [codegen id : 6] -Input [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] - -(35) Filter [codegen id : 6] -Input [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] -Condition : isnotnull(s_store_sk#27) - -(36) BroadcastExchange -Input [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#30] - -(37) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#27] -Join condition: None - -(38) Project [codegen id : 8] -Output [6]: [ss_item_sk#2, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29] -Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_sk#27, s_store_id#28, s_store_name#29] - -(39) Scan parquet default.item -Output [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(40) ColumnarToRow [codegen id : 7] -Input [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] - -(41) Filter [codegen id : 7] -Input [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] -Condition : isnotnull(i_item_sk#31) - -(42) BroadcastExchange -Input [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34] - -(43) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#31] -Join condition: None - -(44) Project [codegen id : 8] -Output [7]: [ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29, i_item_id#32, i_item_desc#33] -Input [9]: [ss_item_sk#2, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29, i_item_sk#31, i_item_id#32, i_item_desc#33] - -(45) HashAggregate [codegen id : 8] -Input [7]: [ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29, i_item_id#32, i_item_desc#33] -Keys [4]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29] -Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#11)), partial_sum(UnscaledValue(cs_net_profit#16))] -Aggregate Attributes [3]: [sum#35, sum#36, sum#37] -Results [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, sum#38, sum#39, sum#40] - -(46) Exchange -Input [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, sum#38, sum#39, sum#40] -Arguments: hashpartitioning(i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, 5), true, [id=#41] - -(47) HashAggregate [codegen id : 9] -Input [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, sum#38, sum#39, sum#40] -Keys [4]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29] -Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#11)), sum(UnscaledValue(cs_net_profit#16))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#42, sum(UnscaledValue(sr_net_loss#11))#43, sum(UnscaledValue(cs_net_profit#16))#44] -Results [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#42,17,2) AS store_sales_profit#45, MakeDecimal(sum(UnscaledValue(sr_net_loss#11))#43,17,2) AS store_returns_loss#46, MakeDecimal(sum(UnscaledValue(cs_net_profit#16))#44,17,2) AS catalog_sales_profit#47] - -(48) TakeOrderedAndProject -Input [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, store_sales_profit#45, store_returns_loss#46, catalog_sales_profit#47] -Arguments: 100, [i_item_id#32 ASC NULLS FIRST, i_item_desc#33 ASC NULLS FIRST, s_store_id#28 ASC NULLS FIRST, s_store_name#29 ASC NULLS FIRST], [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, store_sales_profit#45, store_returns_loss#46, catalog_sales_profit#47] - +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_store_id#3 ASC NULLS FIRST,s_store_name#4 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_store_id#3,s_store_name#4,store_sales_profit#5,store_returns_loss#6,catalog_sales_profit#7]) ++- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[sum(UnscaledValue(ss_net_profit#8)), sum(UnscaledValue(sr_net_loss#9)), sum(UnscaledValue(cs_net_profit#10))]) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4, 5) + +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[partial_sum(UnscaledValue(ss_net_profit#8)), partial_sum(UnscaledValue(sr_net_loss#9)), partial_sum(UnscaledValue(cs_net_profit#10))]) + +- *(8) Project [ss_net_profit#8, sr_net_loss#9, cs_net_profit#10, s_store_id#3, s_store_name#4, i_item_id#1, i_item_desc#2] + +- *(8) BroadcastHashJoin [ss_item_sk#11], [i_item_sk#12], Inner, BuildRight + :- *(8) Project [ss_item_sk#11, ss_net_profit#8, sr_net_loss#9, cs_net_profit#10, s_store_id#3, s_store_name#4] + : +- *(8) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight + : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_net_loss#9, cs_net_profit#10] + : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#15], [d_date_sk#16], Inner, BuildRight + : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] + : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#17], [cast(d_date_sk#18 as bigint)], Inner, BuildRight + : : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] + : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] + : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#21, sr_item_sk#22], [cast(cs_bill_customer_sk#23 as bigint), cast(cs_item_sk#24 as bigint)], Inner, BuildRight + : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_item_sk#22, sr_customer_sk#21, sr_net_loss#9] + : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#25 as bigint), cast(ss_item_sk#11 as bigint), cast(ss_ticket_number#26 as bigint)], [sr_customer_sk#21, sr_item_sk#22, sr_ticket_number#27], Inner, BuildRight + : : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_customer_sk#25, ss_store_sk#13, ss_ticket_number#26, ss_net_profit#8] + : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#26) && isnotnull(ss_item_sk#11)) && isnotnull(ss_customer_sk#25)) && isnotnull(ss_sold_date_sk#19)) && isnotnull(ss_store_sk#13)) + : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_net_profit#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#20] + : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 4)) && (d_year#29 = 2001)) && isnotnull(d_date_sk#20)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [d_date_sk#18] + : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 4)) && (d_moy#31 <= 10)) && (d_year#30 = 2001)) && isnotnull(d_date_sk#18)) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [d_date_sk#16] + : : +- *(5) Filter (((((isnotnull(d_year#32) && isnotnull(d_moy#33)) && (d_moy#33 >= 4)) && (d_moy#33 <= 10)) && (d_year#32 = 2001)) && isnotnull(d_date_sk#16)) + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32,d_moy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] + : +- *(6) Filter isnotnull(s_store_sk#14) + : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] + +- *(7) Filter isnotnull(i_item_sk#12) + +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt index 4a40bdaff..df34b61ae 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt @@ -1,71 +1,66 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_profit,store_returns_loss,catalog_sales_profit] - WholeStageCodegen (9) - HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(cs_net_profit)),store_sales_profit,store_returns_loss,catalog_sales_profit,sum,sum,sum] +TakeOrderedAndProject [catalog_sales_profit,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_loss,store_sales_profit] + WholeStageCodegen + HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit))] [catalog_sales_profit,store_returns_loss,store_sales_profit,sum,sum,sum,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit))] InputAdapter - Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 - WholeStageCodegen (8) - HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_net_profit,sr_net_loss,cs_net_profit] [sum,sum,sum,sum,sum,sum] - Project [ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit] + Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 + WholeStageCodegen + HashAggregate [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [cs_net_profit,s_store_id,s_store_name,sr_net_loss,ss_item_sk,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [cs_net_profit,sr_net_loss,ss_item_sk,ss_net_profit,ss_store_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_sold_date_sk,cs_net_profit] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_returned_date_sk,sr_net_loss,cs_sold_date_sk,cs_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_net_profit,sr_returned_date_sk,sr_net_loss,cs_sold_date_sk,cs_net_profit] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_net_profit,sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_net_loss] - BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit] + Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + Project [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss] + WholeStageCodegen + Project [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] + WholeStageCodegen + Project [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter - ReusedExchange [d_date_sk] #5 + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter - BroadcastExchange #6 - WholeStageCodegen (6) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id,s_store_name] + BroadcastExchange #7 + WholeStageCodegen + Project [s_store_id,s_store_name,s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (7) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_desc,i_item_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt index 12e953427..c5c7da628 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt @@ -1,193 +1,32 @@ == Physical Plan == -TakeOrderedAndProject (34) -+- * HashAggregate (33) - +- Exchange (32) - +- * HashAggregate (31) - +- * Project (30) - +- * BroadcastHashJoin Inner BuildRight (29) - :- * Project (23) - : +- * BroadcastHashJoin Inner BuildRight (22) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.catalog_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.customer_demographics (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.date_dim (11) - : +- BroadcastExchange (21) - : +- * Filter (20) - : +- * ColumnarToRow (19) - : +- Scan parquet default.item (18) - +- BroadcastExchange (28) - +- * Project (27) - +- * Filter (26) - +- * ColumnarToRow (25) - +- Scan parquet default.promotion (24) - - -(1) Scan parquet default.catalog_sales -Output [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] - -(3) Filter [codegen id : 5] -Input [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] -Condition : (((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#3)) AND isnotnull(cs_promo_sk#4)) - -(4) Scan parquet default.customer_demographics -Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] - -(6) Filter [codegen id : 1] -Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] -Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) - -(7) Project [codegen id : 1] -Output [1]: [cd_demo_sk#9] -Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] - -(8) BroadcastExchange -Input [1]: [cd_demo_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_bill_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#9] -Join condition: None - -(10) Project [codegen id : 5] -Output [7]: [cs_sold_date_sk#1, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] -Input [9]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cd_demo_sk#9] - -(11) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#14, d_year#15] - -(13) Filter [codegen id : 2] -Input [2]: [d_date_sk#14, d_year#15] -Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2000)) AND isnotnull(d_date_sk#14)) - -(14) Project [codegen id : 2] -Output [1]: [d_date_sk#14] -Input [2]: [d_date_sk#14, d_year#15] - -(15) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#14] -Join condition: None - -(17) Project [codegen id : 5] -Output [6]: [cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] -Input [8]: [cs_sold_date_sk#1, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, d_date_sk#14] - -(18) Scan parquet default.item -Output [2]: [i_item_sk#17, i_item_id#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_sk#17, i_item_id#18] - -(20) Filter [codegen id : 3] -Input [2]: [i_item_sk#17, i_item_id#18] -Condition : isnotnull(i_item_sk#17) - -(21) BroadcastExchange -Input [2]: [i_item_sk#17, i_item_id#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] - -(22) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#17] -Join condition: None - -(23) Project [codegen id : 5] -Output [6]: [cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18] -Input [8]: [cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_sk#17, i_item_id#18] - -(24) Scan parquet default.promotion -Output [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] -Batched: true -Location [not included in comparison]/{warehouse_dir}/promotion] -PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] -ReadSchema: struct - -(25) ColumnarToRow [codegen id : 4] -Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] - -(26) Filter [codegen id : 4] -Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] -Condition : (((p_channel_email#21 = N) OR (p_channel_event#22 = N)) AND isnotnull(p_promo_sk#20)) - -(27) Project [codegen id : 4] -Output [1]: [p_promo_sk#20] -Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] - -(28) BroadcastExchange -Input [1]: [p_promo_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] - -(29) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_promo_sk#4] -Right keys [1]: [p_promo_sk#20] -Join condition: None - -(30) Project [codegen id : 5] -Output [5]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18] -Input [7]: [cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18, p_promo_sk#20] - -(31) HashAggregate [codegen id : 5] -Input [5]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18] -Keys [1]: [i_item_id#18] -Functions [4]: [partial_avg(cast(cs_quantity#5 as bigint)), partial_avg(UnscaledValue(cs_list_price#6)), partial_avg(UnscaledValue(cs_coupon_amt#8)), partial_avg(UnscaledValue(cs_sales_price#7))] -Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31] -Results [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] - -(32) Exchange -Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] -Arguments: hashpartitioning(i_item_id#18, 5), true, [id=#40] - -(33) HashAggregate [codegen id : 6] -Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] -Keys [1]: [i_item_id#18] -Functions [4]: [avg(cast(cs_quantity#5 as bigint)), avg(UnscaledValue(cs_list_price#6)), avg(UnscaledValue(cs_coupon_amt#8)), avg(UnscaledValue(cs_sales_price#7))] -Aggregate Attributes [4]: [avg(cast(cs_quantity#5 as bigint))#41, avg(UnscaledValue(cs_list_price#6))#42, avg(UnscaledValue(cs_coupon_amt#8))#43, avg(UnscaledValue(cs_sales_price#7))#44] -Results [5]: [i_item_id#18, avg(cast(cs_quantity#5 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(cs_list_price#6))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(cs_coupon_amt#8))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(cs_sales_price#7))#44 / 100.0) as decimal(11,6)) AS agg4#48] - -(34) TakeOrderedAndProject -Input [5]: [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] -Arguments: 100, [i_item_id#18 ASC NULLS FIRST], [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] - +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,agg1#2,agg2#3,agg3#4,agg4#5]) ++- *(6) HashAggregate(keys=[i_item_id#1], functions=[avg(cast(cs_quantity#6 as bigint)), avg(UnscaledValue(cs_list_price#7)), avg(UnscaledValue(cs_coupon_amt#8)), avg(UnscaledValue(cs_sales_price#9))]) + +- Exchange hashpartitioning(i_item_id#1, 5) + +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_avg(cast(cs_quantity#6 as bigint)), partial_avg(UnscaledValue(cs_list_price#7)), partial_avg(UnscaledValue(cs_coupon_amt#8)), partial_avg(UnscaledValue(cs_sales_price#9))]) + +- *(5) Project [cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8, i_item_id#1] + +- *(5) BroadcastHashJoin [cs_promo_sk#10], [p_promo_sk#11], Inner, BuildRight + :- *(5) Project [cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8, i_item_id#1] + : +- *(5) BroadcastHashJoin [cs_item_sk#12], [i_item_sk#13], Inner, BuildRight + : :- *(5) Project [cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] + : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#14, cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] + : : : +- *(5) BroadcastHashJoin [cs_bill_cdemo_sk#16], [cd_demo_sk#17], Inner, BuildRight + : : : :- *(5) Project [cs_sold_date_sk#14, cs_bill_cdemo_sk#16, cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] + : : : : +- *(5) Filter (((isnotnull(cs_bill_cdemo_sk#16) && isnotnull(cs_sold_date_sk#14)) && isnotnull(cs_item_sk#12)) && isnotnull(cs_promo_sk#10)) + : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#14,cs_bill_cdemo_sk#16,cs_item_sk#12,cs_promo_sk#10,cs_quantity#6,cs_list_price#7,cs_sales_price#9,cs_coupon_amt#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_pro..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#15] + : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_item_sk#13, i_item_id#1] + : +- *(3) Filter isnotnull(i_item_sk#13) + : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [p_promo_sk#11] + +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) + +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt index 94ce760c2..bfb96b05c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt @@ -1,50 +1,42 @@ -TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] - WholeStageCodegen (6) - HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(cast(cs_quantity as bigint)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_sales_price)),agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] +TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_sales_price)),avg(cast(cs_quantity as bigint)),count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_sales_price)),avg(cast(cs_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen (5) - HashAggregate [i_item_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] - Project [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + WholeStageCodegen + HashAggregate [count,count,count,count,count,count,count,count,cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] + Project [cs_coupon_amt,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt] + Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt] - BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_item_sk,cs_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt] + Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Project [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] + Filter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [cd_demo_sk] - Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id] + WholeStageCodegen + Project [i_item_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen Project [p_promo_sk] Filter [p_channel_email,p_channel_event,p_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event] + Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] [p_channel_email,p_channel_event,p_promo_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt index 16aeff7a2..c166032cd 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt @@ -1,193 +1,33 @@ == Physical Plan == -TakeOrderedAndProject (34) -+- * HashAggregate (33) - +- Exchange (32) - +- * HashAggregate (31) - +- * Expand (30) - +- * Project (29) - +- * BroadcastHashJoin Inner BuildRight (28) - :- * Project (23) - : +- * BroadcastHashJoin Inner BuildRight (22) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.customer_demographics (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.date_dim (11) - : +- BroadcastExchange (21) - : +- * Filter (20) - : +- * ColumnarToRow (19) - : +- Scan parquet default.store (18) - +- BroadcastExchange (27) - +- * Filter (26) - +- * ColumnarToRow (25) - +- Scan parquet default.item (24) - - -(1) Scan parquet default.store_sales -Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] - -(3) Filter [codegen id : 5] -Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] -Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) - -(4) Scan parquet default.customer_demographics -Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] - -(6) Filter [codegen id : 1] -Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] -Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) - -(7) Project [codegen id : 1] -Output [1]: [cd_demo_sk#9] -Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] - -(8) BroadcastExchange -Input [1]: [cd_demo_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_cdemo_sk#3] -Right keys [1]: [cd_demo_sk#9] -Join condition: None - -(10) Project [codegen id : 5] -Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] -Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] - -(11) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#14, d_year#15] - -(13) Filter [codegen id : 2] -Input [2]: [d_date_sk#14, d_year#15] -Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14)) - -(14) Project [codegen id : 2] -Output [1]: [d_date_sk#14] -Input [2]: [d_date_sk#14, d_year#15] - -(15) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#14] -Join condition: None - -(17) Project [codegen id : 5] -Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] -Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] - -(18) Scan parquet default.store -Output [2]: [s_store_sk#17, s_state#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#17, s_state#18] - -(20) Filter [codegen id : 3] -Input [2]: [s_store_sk#17, s_state#18] -Condition : ((isnotnull(s_state#18) AND (s_state#18 = TN)) AND isnotnull(s_store_sk#17)) - -(21) BroadcastExchange -Input [2]: [s_store_sk#17, s_state#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] - -(22) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#17] -Join condition: None - -(23) Project [codegen id : 5] -Output [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18] -Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17, s_state#18] - -(24) Scan parquet default.item -Output [2]: [i_item_sk#20, i_item_id#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(25) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#20, i_item_id#21] - -(26) Filter [codegen id : 4] -Input [2]: [i_item_sk#20, i_item_id#21] -Condition : isnotnull(i_item_sk#20) - -(27) BroadcastExchange -Input [2]: [i_item_sk#20, i_item_id#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] - -(28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#20] -Join condition: None - -(29) Project [codegen id : 5] -Output [6]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18] -Input [8]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18, i_item_sk#20, i_item_id#21] - -(30) Expand [codegen id : 5] -Input [6]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18] -Arguments: [List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18, 0), List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, null, 1), List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, null, null, 3)], [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#23, s_state#24, spark_grouping_id#25] - -(31) HashAggregate [codegen id : 5] -Input [7]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#23, s_state#24, spark_grouping_id#25] -Keys [3]: [i_item_id#23, s_state#24, spark_grouping_id#25] -Functions [4]: [partial_avg(cast(ss_quantity#5 as bigint)), partial_avg(UnscaledValue(ss_list_price#6)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [8]: [sum#26, count#27, sum#28, count#29, sum#30, count#31, sum#32, count#33] -Results [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] - -(32) Exchange -Input [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] -Arguments: hashpartitioning(i_item_id#23, s_state#24, spark_grouping_id#25, 5), true, [id=#42] - -(33) HashAggregate [codegen id : 6] -Input [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] -Keys [3]: [i_item_id#23, s_state#24, spark_grouping_id#25] -Functions [4]: [avg(cast(ss_quantity#5 as bigint)), avg(UnscaledValue(ss_list_price#6)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [4]: [avg(cast(ss_quantity#5 as bigint))#43, avg(UnscaledValue(ss_list_price#6))#44, avg(UnscaledValue(ss_coupon_amt#8))#45, avg(UnscaledValue(ss_sales_price#7))#46] -Results [7]: [i_item_id#23, s_state#24, cast((shiftright(spark_grouping_id#25, 0) & 1) as tinyint) AS g_state#47, avg(cast(ss_quantity#5 as bigint))#43 AS agg1#48, cast((avg(UnscaledValue(ss_list_price#6))#44 / 100.0) as decimal(11,6)) AS agg2#49, cast((avg(UnscaledValue(ss_coupon_amt#8))#45 / 100.0) as decimal(11,6)) AS agg3#50, cast((avg(UnscaledValue(ss_sales_price#7))#46 / 100.0) as decimal(11,6)) AS agg4#51] - -(34) TakeOrderedAndProject -Input [7]: [i_item_id#23, s_state#24, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] -Arguments: 100, [i_item_id#23 ASC NULLS FIRST, s_state#24 ASC NULLS FIRST], [i_item_id#23, s_state#24, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] - +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,s_state#2 ASC NULLS FIRST], output=[i_item_id#1,s_state#2,g_state#3,agg1#4,agg2#5,agg3#6,agg4#7]) ++- *(6) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[avg(cast(ss_quantity#9 as bigint)), avg(UnscaledValue(ss_list_price#10)), avg(UnscaledValue(ss_coupon_amt#11)), avg(UnscaledValue(ss_sales_price#12))]) + +- Exchange hashpartitioning(i_item_id#1, s_state#2, spark_grouping_id#8, 5) + +- *(5) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[partial_avg(cast(ss_quantity#9 as bigint)), partial_avg(UnscaledValue(ss_list_price#10)), partial_avg(UnscaledValue(ss_coupon_amt#11)), partial_avg(UnscaledValue(ss_sales_price#12))]) + +- *(5) Expand [List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, s_state#14, 0), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, null, 1), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, null, null, 3)], [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#1, s_state#2, spark_grouping_id#8] + +- *(5) Project [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#15 AS i_item_id#13, s_state#16 AS s_state#14] + +- *(5) BroadcastHashJoin [ss_item_sk#17], [i_item_sk#18], Inner, BuildRight + :- *(5) Project [ss_item_sk#17, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, s_state#16] + : +- *(5) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#20], Inner, BuildRight + : :- *(5) Project [ss_item_sk#17, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] + : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : : :- *(5) Project [ss_sold_date_sk#21, ss_item_sk#17, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] + : : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#23], [cd_demo_sk#24], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#21, ss_item_sk#17, ss_cdemo_sk#23, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] + : : : : +- *(5) Filter (((isnotnull(ss_cdemo_sk#23) && isnotnull(ss_sold_date_sk#21)) && isnotnull(ss_store_sk#19)) && isnotnull(ss_item_sk#17)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_item_sk#17,ss_cdemo_sk#23,ss_store_sk#19,ss_quantity#9,ss_list_price#10,ss_sales_price#12,ss_coupon_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#22] + : : +- *(2) Filter ((isnotnull(d_year#28) && (d_year#28 = 2002)) && isnotnull(d_date_sk#22)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#22,d_year#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [s_store_sk#20, s_state#16] + : +- *(3) Filter ((isnotnull(s_state#16) && (s_state#16 = TN)) && isnotnull(s_store_sk#20)) + : +- *(3) FileScan parquet default.store[s_store_sk#20,s_state#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [i_item_sk#18, i_item_id#15] + +- *(4) Filter isnotnull(i_item_sk#18) + +- *(4) FileScan parquet default.item[i_item_sk#18,i_item_id#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt index 204094d44..e3a2d7ca1 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt @@ -1,50 +1,43 @@ -TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] - WholeStageCodegen (6) - HashAggregate [i_item_id,s_state,spark_grouping_id,sum,count,sum,count,sum,count,sum,count] [avg(cast(ss_quantity as bigint)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] +TakeOrderedAndProject [agg1,agg2,agg3,agg4,g_state,i_item_id,s_state] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,i_item_id,s_state,spark_grouping_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,g_state,sum,sum,sum,sum] InputAdapter Exchange [i_item_id,s_state,spark_grouping_id] #1 - WholeStageCodegen (5) - HashAggregate [i_item_id,s_state,spark_grouping_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] - Expand [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state] - Project [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Filter [ss_cdemo_sk,ss_sold_date_sk,ss_store_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + WholeStageCodegen + HashAggregate [count,count,count,count,count,count,count,count,i_item_id,s_state,spark_grouping_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Expand [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + Project [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_state,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [cd_demo_sk] - Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [s_state,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_state] + WholeStageCodegen + Project [s_state,s_store_sk] + Filter [s_state,s_store_sk] + Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id] + WholeStageCodegen + Project [i_item_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt index 9788040bb..b76609a84 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt @@ -1,437 +1,66 @@ == Physical Plan == -CollectLimit (71) -+- BroadcastNestedLoopJoin Inner BuildRight (70) - :- BroadcastNestedLoopJoin Inner BuildRight (58) - : :- BroadcastNestedLoopJoin Inner BuildRight (46) - : : :- BroadcastNestedLoopJoin Inner BuildRight (34) - : : : :- BroadcastNestedLoopJoin Inner BuildRight (22) - : : : : :- * HashAggregate (10) - : : : : : +- Exchange (9) - : : : : : +- * HashAggregate (8) - : : : : : +- * HashAggregate (7) - : : : : : +- Exchange (6) - : : : : : +- * HashAggregate (5) - : : : : : +- * Project (4) - : : : : : +- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.store_sales (1) - : : : : +- BroadcastExchange (21) - : : : : +- * HashAggregate (20) - : : : : +- Exchange (19) - : : : : +- * HashAggregate (18) - : : : : +- * HashAggregate (17) - : : : : +- Exchange (16) - : : : : +- * HashAggregate (15) - : : : : +- * Project (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.store_sales (11) - : : : +- BroadcastExchange (33) - : : : +- * HashAggregate (32) - : : : +- Exchange (31) - : : : +- * HashAggregate (30) - : : : +- * HashAggregate (29) - : : : +- Exchange (28) - : : : +- * HashAggregate (27) - : : : +- * Project (26) - : : : +- * Filter (25) - : : : +- * ColumnarToRow (24) - : : : +- Scan parquet default.store_sales (23) - : : +- BroadcastExchange (45) - : : +- * HashAggregate (44) - : : +- Exchange (43) - : : +- * HashAggregate (42) - : : +- * HashAggregate (41) - : : +- Exchange (40) - : : +- * HashAggregate (39) - : : +- * Project (38) - : : +- * Filter (37) - : : +- * ColumnarToRow (36) - : : +- Scan parquet default.store_sales (35) - : +- BroadcastExchange (57) - : +- * HashAggregate (56) - : +- Exchange (55) - : +- * HashAggregate (54) - : +- * HashAggregate (53) - : +- Exchange (52) - : +- * HashAggregate (51) - : +- * Project (50) - : +- * Filter (49) - : +- * ColumnarToRow (48) - : +- Scan parquet default.store_sales (47) - +- BroadcastExchange (69) - +- * HashAggregate (68) - +- Exchange (67) - +- * HashAggregate (66) - +- * HashAggregate (65) - +- Exchange (64) - +- * HashAggregate (63) - +- * Project (62) - +- * Filter (61) - +- * ColumnarToRow (60) - +- Scan parquet default.store_sales (59) - - -(1) Scan parquet default.store_sales -Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 1] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(3) Filter [codegen id : 1] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 77.00)))) - -(4) Project [codegen id : 1] -Output [1]: [ss_list_price#3] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(5) HashAggregate [codegen id : 1] -Input [1]: [ss_list_price#3] -Keys [1]: [ss_list_price#3] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6] -Results [4]: [ss_list_price#3, sum#7, count#8, count#9] - -(6) Exchange -Input [4]: [ss_list_price#3, sum#7, count#8, count#9] -Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#10] - -(7) HashAggregate [codegen id : 2] -Input [4]: [ss_list_price#3, sum#7, count#8, count#9] -Keys [1]: [ss_list_price#3] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6] -Results [4]: [ss_list_price#3, sum#7, count#8, count#9] - -(8) HashAggregate [codegen id : 2] -Input [4]: [ss_list_price#3, sum#7, count#8, count#9] -Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6, count(ss_list_price#3)#11] -Results [4]: [sum#7, count#8, count#9, count#12] - -(9) Exchange -Input [4]: [sum#7, count#8, count#9, count#12] -Arguments: SinglePartition, true, [id=#13] - -(10) HashAggregate [codegen id : 3] -Input [4]: [sum#7, count#8, count#9, count#12] -Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6, count(ss_list_price#3)#11] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#5 / 100.0) as decimal(11,6)) AS B1_LP#14, count(ss_list_price#3)#6 AS B1_CNT#15, count(ss_list_price#3)#11 AS B1_CNTD#16] - -(11) Scan parquet default.store_sales -Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 4] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(13) Filter [codegen id : 4] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 51.00)))) - -(14) Project [codegen id : 4] -Output [1]: [ss_list_price#3] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(15) HashAggregate [codegen id : 4] -Input [1]: [ss_list_price#3] -Keys [1]: [ss_list_price#3] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18] -Results [4]: [ss_list_price#3, sum#19, count#20, count#21] - -(16) Exchange -Input [4]: [ss_list_price#3, sum#19, count#20, count#21] -Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#22] - -(17) HashAggregate [codegen id : 5] -Input [4]: [ss_list_price#3, sum#19, count#20, count#21] -Keys [1]: [ss_list_price#3] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18] -Results [4]: [ss_list_price#3, sum#19, count#20, count#21] - -(18) HashAggregate [codegen id : 5] -Input [4]: [ss_list_price#3, sum#19, count#20, count#21] -Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18, count(ss_list_price#3)#23] -Results [4]: [sum#19, count#20, count#21, count#24] - -(19) Exchange -Input [4]: [sum#19, count#20, count#21, count#24] -Arguments: SinglePartition, true, [id=#25] - -(20) HashAggregate [codegen id : 6] -Input [4]: [sum#19, count#20, count#21, count#24] -Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18, count(ss_list_price#3)#23] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#17 / 100.0) as decimal(11,6)) AS B2_LP#26, count(ss_list_price#3)#18 AS B2_CNT#27, count(ss_list_price#3)#23 AS B2_CNTD#28] - -(21) BroadcastExchange -Input [3]: [B2_LP#26, B2_CNT#27, B2_CNTD#28] -Arguments: IdentityBroadcastMode, [id=#29] - -(22) BroadcastNestedLoopJoin -Join condition: None - -(23) Scan parquet default.store_sales -Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 7] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(25) Filter [codegen id : 7] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 99.00)))) - -(26) Project [codegen id : 7] -Output [1]: [ss_list_price#3] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(27) HashAggregate [codegen id : 7] -Input [1]: [ss_list_price#3] -Keys [1]: [ss_list_price#3] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31] -Results [4]: [ss_list_price#3, sum#32, count#33, count#34] - -(28) Exchange -Input [4]: [ss_list_price#3, sum#32, count#33, count#34] -Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#35] - -(29) HashAggregate [codegen id : 8] -Input [4]: [ss_list_price#3, sum#32, count#33, count#34] -Keys [1]: [ss_list_price#3] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31] -Results [4]: [ss_list_price#3, sum#32, count#33, count#34] - -(30) HashAggregate [codegen id : 8] -Input [4]: [ss_list_price#3, sum#32, count#33, count#34] -Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31, count(ss_list_price#3)#36] -Results [4]: [sum#32, count#33, count#34, count#37] - -(31) Exchange -Input [4]: [sum#32, count#33, count#34, count#37] -Arguments: SinglePartition, true, [id=#38] - -(32) HashAggregate [codegen id : 9] -Input [4]: [sum#32, count#33, count#34, count#37] -Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31, count(ss_list_price#3)#36] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#30 / 100.0) as decimal(11,6)) AS B3_LP#39, count(ss_list_price#3)#31 AS B3_CNT#40, count(ss_list_price#3)#36 AS B3_CNTD#41] - -(33) BroadcastExchange -Input [3]: [B3_LP#39, B3_CNT#40, B3_CNTD#41] -Arguments: IdentityBroadcastMode, [id=#42] - -(34) BroadcastNestedLoopJoin -Join condition: None - -(35) Scan parquet default.store_sales -Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)] -ReadSchema: struct - -(36) ColumnarToRow [codegen id : 10] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(37) Filter [codegen id : 10] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 58.00)))) - -(38) Project [codegen id : 10] -Output [1]: [ss_list_price#3] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(39) HashAggregate [codegen id : 10] -Input [1]: [ss_list_price#3] -Keys [1]: [ss_list_price#3] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44] -Results [4]: [ss_list_price#3, sum#45, count#46, count#47] - -(40) Exchange -Input [4]: [ss_list_price#3, sum#45, count#46, count#47] -Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#48] - -(41) HashAggregate [codegen id : 11] -Input [4]: [ss_list_price#3, sum#45, count#46, count#47] -Keys [1]: [ss_list_price#3] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44] -Results [4]: [ss_list_price#3, sum#45, count#46, count#47] - -(42) HashAggregate [codegen id : 11] -Input [4]: [ss_list_price#3, sum#45, count#46, count#47] -Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44, count(ss_list_price#3)#49] -Results [4]: [sum#45, count#46, count#47, count#50] - -(43) Exchange -Input [4]: [sum#45, count#46, count#47, count#50] -Arguments: SinglePartition, true, [id=#51] - -(44) HashAggregate [codegen id : 12] -Input [4]: [sum#45, count#46, count#47, count#50] -Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44, count(ss_list_price#3)#49] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#43 / 100.0) as decimal(11,6)) AS B4_LP#52, count(ss_list_price#3)#44 AS B4_CNT#53, count(ss_list_price#3)#49 AS B4_CNTD#54] - -(45) BroadcastExchange -Input [3]: [B4_LP#52, B4_CNT#53, B4_CNTD#54] -Arguments: IdentityBroadcastMode, [id=#55] - -(46) BroadcastNestedLoopJoin -Join condition: None - -(47) Scan parquet default.store_sales -Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)] -ReadSchema: struct - -(48) ColumnarToRow [codegen id : 13] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(49) Filter [codegen id : 13] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 37.00)))) - -(50) Project [codegen id : 13] -Output [1]: [ss_list_price#3] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(51) HashAggregate [codegen id : 13] -Input [1]: [ss_list_price#3] -Keys [1]: [ss_list_price#3] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57] -Results [4]: [ss_list_price#3, sum#58, count#59, count#60] - -(52) Exchange -Input [4]: [ss_list_price#3, sum#58, count#59, count#60] -Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#61] - -(53) HashAggregate [codegen id : 14] -Input [4]: [ss_list_price#3, sum#58, count#59, count#60] -Keys [1]: [ss_list_price#3] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57] -Results [4]: [ss_list_price#3, sum#58, count#59, count#60] - -(54) HashAggregate [codegen id : 14] -Input [4]: [ss_list_price#3, sum#58, count#59, count#60] -Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57, count(ss_list_price#3)#62] -Results [4]: [sum#58, count#59, count#60, count#63] - -(55) Exchange -Input [4]: [sum#58, count#59, count#60, count#63] -Arguments: SinglePartition, true, [id=#64] - -(56) HashAggregate [codegen id : 15] -Input [4]: [sum#58, count#59, count#60, count#63] -Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57, count(ss_list_price#3)#62] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#56 / 100.0) as decimal(11,6)) AS B5_LP#65, count(ss_list_price#3)#57 AS B5_CNT#66, count(ss_list_price#3)#62 AS B5_CNTD#67] - -(57) BroadcastExchange -Input [3]: [B5_LP#65, B5_CNT#66, B5_CNTD#67] -Arguments: IdentityBroadcastMode, [id=#68] - -(58) BroadcastNestedLoopJoin -Join condition: None - -(59) Scan parquet default.store_sales -Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)] -ReadSchema: struct - -(60) ColumnarToRow [codegen id : 16] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(61) Filter [codegen id : 16] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] -Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 27.00)))) - -(62) Project [codegen id : 16] -Output [1]: [ss_list_price#3] -Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] - -(63) HashAggregate [codegen id : 16] -Input [1]: [ss_list_price#3] -Keys [1]: [ss_list_price#3] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70] -Results [4]: [ss_list_price#3, sum#71, count#72, count#73] - -(64) Exchange -Input [4]: [ss_list_price#3, sum#71, count#72, count#73] -Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#74] - -(65) HashAggregate [codegen id : 17] -Input [4]: [ss_list_price#3, sum#71, count#72, count#73] -Keys [1]: [ss_list_price#3] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70] -Results [4]: [ss_list_price#3, sum#71, count#72, count#73] - -(66) HashAggregate [codegen id : 17] -Input [4]: [ss_list_price#3, sum#71, count#72, count#73] -Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70, count(ss_list_price#3)#75] -Results [4]: [sum#71, count#72, count#73, count#76] - -(67) Exchange -Input [4]: [sum#71, count#72, count#73, count#76] -Arguments: SinglePartition, true, [id=#77] - -(68) HashAggregate [codegen id : 18] -Input [4]: [sum#71, count#72, count#73, count#76] -Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70, count(ss_list_price#3)#75] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#69 / 100.0) as decimal(11,6)) AS B6_LP#78, count(ss_list_price#3)#70 AS B6_CNT#79, count(ss_list_price#3)#75 AS B6_CNTD#80] - -(69) BroadcastExchange -Input [3]: [B6_LP#78, B6_CNT#79, B6_CNTD#80] -Arguments: IdentityBroadcastMode, [id=#81] - -(70) BroadcastNestedLoopJoin -Join condition: None - -(71) CollectLimit -Input [18]: [B1_LP#14, B1_CNT#15, B1_CNTD#16, B2_LP#26, B2_CNT#27, B2_CNTD#28, B3_LP#39, B3_CNT#40, B3_CNTD#41, B4_LP#52, B4_CNT#53, B4_CNTD#54, B5_LP#65, B5_CNT#66, B5_CNTD#67, B6_LP#78, B6_CNT#79, B6_CNTD#80] -Arguments: 100 - +CollectLimit 100 ++- BroadcastNestedLoopJoin BuildRight, Inner + :- BroadcastNestedLoopJoin BuildRight, Inner + : :- BroadcastNestedLoopJoin BuildRight, Inner + : : :- BroadcastNestedLoopJoin BuildRight, Inner + : : : :- BroadcastNestedLoopJoin BuildRight, Inner + : : : : :- *(3) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_list_price#1)), count(ss_list_price#1), count(distinct ss_list_price#1)]) + : : : : : +- Exchange SinglePartition + : : : : : +- *(2) HashAggregate(keys=[], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1), partial_count(distinct ss_list_price#1)]) + : : : : : +- *(2) HashAggregate(keys=[ss_list_price#1], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1)]) + : : : : : +- Exchange hashpartitioning(ss_list_price#1, 5) + : : : : : +- *(1) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) + : : : : : +- *(1) Project [ss_list_price#1] + : : : : : +- *(1) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 0)) && (ss_quantity#2 <= 5)) && ((((ss_list_price#1 >= 8.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 18.00)) || ((ss_coupon_amt#3 >= 459.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1459.00))) || ((ss_wholesale_cost#4 >= 57.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 77.00)))) + : : : : : +- *(1) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)], ReadSchema: struct= 6)) && (ss_quantity#2 <= 10)) && ((((ss_list_price#1 >= 90.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 100.00)) || ((ss_coupon_amt#3 >= 2323.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 3323.00))) || ((ss_wholesale_cost#4 >= 31.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 51.00)))) + : : : : +- *(4) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)], ReadSchema: struct= 11)) && (ss_quantity#2 <= 15)) && ((((ss_list_price#1 >= 142.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 152.00)) || ((ss_coupon_amt#3 >= 12214.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 13214.00))) || ((ss_wholesale_cost#4 >= 79.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 99.00)))) + : : : +- *(7) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)], ReadSchema: struct= 16)) && (ss_quantity#2 <= 20)) && ((((ss_list_price#1 >= 135.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 145.00)) || ((ss_coupon_amt#3 >= 6071.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 7071.00))) || ((ss_wholesale_cost#4 >= 38.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 58.00)))) + : : +- *(10) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct= 21)) && (ss_quantity#2 <= 25)) && ((((ss_list_price#1 >= 122.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 132.00)) || ((ss_coupon_amt#3 >= 836.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1836.00))) || ((ss_wholesale_cost#4 >= 17.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 37.00)))) + : +- *(13) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)], ReadSchema: struct= 26)) && (ss_quantity#2 <= 30)) && ((((ss_list_price#1 >= 154.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 164.00)) || ((ss_coupon_amt#3 >= 7326.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 8326.00))) || ((ss_wholesale_cost#4 >= 7.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 27.00)))) + +- *(16) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 8] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] - -(3) Filter [codegen id : 8] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] -Condition : ((((isnotnull(ss_customer_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) - -(4) Scan parquet default.store_returns -Output [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] - -(6) Filter [codegen id : 1] -Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] -Condition : (((isnotnull(sr_customer_sk#9) AND isnotnull(sr_item_sk#8)) AND isnotnull(sr_ticket_number#10)) AND isnotnull(sr_returned_date_sk#7)) - -(7) BroadcastExchange -Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] -Arguments: HashedRelationBroadcastMode(List(input[2, bigint, false], input[1, bigint, false], input[3, bigint, false]),false), [id=#12] - -(8) BroadcastHashJoin [codegen id : 8] -Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] -Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10] -Join condition: None - -(9) Project [codegen id : 8] -Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11] -Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] - -(10) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] - -(12) Filter [codegen id : 2] -Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] -Condition : ((isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) AND isnotnull(cs_sold_date_sk#13)) - -(13) BroadcastExchange -Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#17] - -(14) BroadcastHashJoin [codegen id : 8] -Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] -Right keys [2]: [cast(cs_bill_customer_sk#14 as bigint), cast(cs_item_sk#15 as bigint)] -Join condition: None - -(15) Project [codegen id : 8] -Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] -Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] - -(16) Scan parquet default.date_dim -Output [3]: [d_date_sk#18, d_year#19, d_moy#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#18, d_year#19, d_moy#20] - -(18) Filter [codegen id : 3] -Input [3]: [d_date_sk#18, d_year#19, d_moy#20] -Condition : ((((isnotnull(d_moy#20) AND isnotnull(d_year#19)) AND (d_moy#20 = 9)) AND (d_year#19 = 1999)) AND isnotnull(d_date_sk#18)) - -(19) Project [codegen id : 3] -Output [1]: [d_date_sk#18] -Input [3]: [d_date_sk#18, d_year#19, d_moy#20] - -(20) BroadcastExchange -Input [1]: [d_date_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] - -(21) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#18] -Join condition: None - -(22) Project [codegen id : 8] -Output [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] -Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#18] - -(23) Scan parquet default.date_dim -Output [3]: [d_date_sk#22, d_year#23, d_moy#24] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [3]: [d_date_sk#22, d_year#23, d_moy#24] - -(25) Filter [codegen id : 4] -Input [3]: [d_date_sk#22, d_year#23, d_moy#24] -Condition : (((((isnotnull(d_moy#24) AND isnotnull(d_year#23)) AND (d_moy#24 >= 9)) AND (d_moy#24 <= 12)) AND (d_year#23 = 1999)) AND isnotnull(d_date_sk#22)) - -(26) Project [codegen id : 4] -Output [1]: [d_date_sk#22] -Input [3]: [d_date_sk#22, d_year#23, d_moy#24] - -(27) BroadcastExchange -Input [1]: [d_date_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] - -(28) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [sr_returned_date_sk#7] -Right keys [1]: [cast(d_date_sk#22 as bigint)] -Join condition: None - -(29) Project [codegen id : 8] -Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] -Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#22] - -(30) Scan parquet default.date_dim -Output [2]: [d_date_sk#26, d_year#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(31) ColumnarToRow [codegen id : 5] -Input [2]: [d_date_sk#26, d_year#27] - -(32) Filter [codegen id : 5] -Input [2]: [d_date_sk#26, d_year#27] -Condition : (d_year#27 IN (1999,2000,2001) AND isnotnull(d_date_sk#26)) - -(33) Project [codegen id : 5] -Output [1]: [d_date_sk#26] -Input [2]: [d_date_sk#26, d_year#27] - -(34) BroadcastExchange -Input [1]: [d_date_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] - -(35) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#13] -Right keys [1]: [d_date_sk#26] -Join condition: None - -(36) Project [codegen id : 8] -Output [5]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16] -Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#26] - -(37) Scan parquet default.store -Output [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(38) ColumnarToRow [codegen id : 6] -Input [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] - -(39) Filter [codegen id : 6] -Input [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] -Condition : isnotnull(s_store_sk#29) - -(40) BroadcastExchange -Input [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] - -(41) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#29] -Join condition: None - -(42) Project [codegen id : 8] -Output [6]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31] -Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_sk#29, s_store_id#30, s_store_name#31] - -(43) Scan parquet default.item -Output [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(44) ColumnarToRow [codegen id : 7] -Input [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] - -(45) Filter [codegen id : 7] -Input [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] -Condition : isnotnull(i_item_sk#33) - -(46) BroadcastExchange -Input [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] - -(47) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#33] -Join condition: None - -(48) Project [codegen id : 8] -Output [7]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31, i_item_id#34, i_item_desc#35] -Input [9]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31, i_item_sk#33, i_item_id#34, i_item_desc#35] - -(49) HashAggregate [codegen id : 8] -Input [7]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31, i_item_id#34, i_item_desc#35] -Keys [4]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31] -Functions [3]: [partial_sum(cast(ss_quantity#6 as bigint)), partial_sum(cast(sr_return_quantity#11 as bigint)), partial_sum(cast(cs_quantity#16 as bigint))] -Aggregate Attributes [3]: [sum#37, sum#38, sum#39] -Results [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum#40, sum#41, sum#42] - -(50) Exchange -Input [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum#40, sum#41, sum#42] -Arguments: hashpartitioning(i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, 5), true, [id=#43] - -(51) HashAggregate [codegen id : 9] -Input [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum#40, sum#41, sum#42] -Keys [4]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31] -Functions [3]: [sum(cast(ss_quantity#6 as bigint)), sum(cast(sr_return_quantity#11 as bigint)), sum(cast(cs_quantity#16 as bigint))] -Aggregate Attributes [3]: [sum(cast(ss_quantity#6 as bigint))#44, sum(cast(sr_return_quantity#11 as bigint))#45, sum(cast(cs_quantity#16 as bigint))#46] -Results [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum(cast(ss_quantity#6 as bigint))#44 AS store_sales_quantity#47, sum(cast(sr_return_quantity#11 as bigint))#45 AS store_returns_quantity#48, sum(cast(cs_quantity#16 as bigint))#46 AS catalog_sales_quantity#49] - -(52) TakeOrderedAndProject -Input [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, store_sales_quantity#47, store_returns_quantity#48, catalog_sales_quantity#49] -Arguments: 100, [i_item_id#34 ASC NULLS FIRST, i_item_desc#35 ASC NULLS FIRST, s_store_id#30 ASC NULLS FIRST, s_store_name#31 ASC NULLS FIRST], [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, store_sales_quantity#47, store_returns_quantity#48, catalog_sales_quantity#49] - +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_store_id#3 ASC NULLS FIRST,s_store_name#4 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_store_id#3,s_store_name#4,store_sales_quantity#5,store_returns_quantity#6,catalog_sales_quantity#7]) ++- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[sum(cast(ss_quantity#8 as bigint)), sum(cast(sr_return_quantity#9 as bigint)), sum(cast(cs_quantity#10 as bigint))]) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4, 5) + +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[partial_sum(cast(ss_quantity#8 as bigint)), partial_sum(cast(sr_return_quantity#9 as bigint)), partial_sum(cast(cs_quantity#10 as bigint))]) + +- *(8) Project [ss_quantity#8, sr_return_quantity#9, cs_quantity#10, s_store_id#3, s_store_name#4, i_item_id#1, i_item_desc#2] + +- *(8) BroadcastHashJoin [ss_item_sk#11], [i_item_sk#12], Inner, BuildRight + :- *(8) Project [ss_item_sk#11, ss_quantity#8, sr_return_quantity#9, cs_quantity#10, s_store_id#3, s_store_name#4] + : +- *(8) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight + : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_quantity#8, sr_return_quantity#9, cs_quantity#10] + : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#15], [d_date_sk#16], Inner, BuildRight + : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_quantity#8, sr_return_quantity#9, cs_sold_date_sk#15, cs_quantity#10] + : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#17], [cast(d_date_sk#18 as bigint)], Inner, BuildRight + : : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_quantity#8, sr_returned_date_sk#17, sr_return_quantity#9, cs_sold_date_sk#15, cs_quantity#10] + : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_quantity#8, sr_returned_date_sk#17, sr_return_quantity#9, cs_sold_date_sk#15, cs_quantity#10] + : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#21, sr_item_sk#22], [cast(cs_bill_customer_sk#23 as bigint), cast(cs_item_sk#24 as bigint)], Inner, BuildRight + : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_quantity#8, sr_returned_date_sk#17, sr_item_sk#22, sr_customer_sk#21, sr_return_quantity#9] + : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#25 as bigint), cast(ss_item_sk#11 as bigint), cast(ss_ticket_number#26 as bigint)], [sr_customer_sk#21, sr_item_sk#22, sr_ticket_number#27], Inner, BuildRight + : : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_customer_sk#25, ss_store_sk#13, ss_ticket_number#26, ss_quantity#8] + : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#26) && isnotnull(ss_item_sk#11)) && isnotnull(ss_customer_sk#25)) && isnotnull(ss_sold_date_sk#19)) && isnotnull(ss_store_sk#13)) + : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_quantity#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#20] + : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 9)) && (d_year#29 = 1999)) && isnotnull(d_date_sk#20)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [d_date_sk#18] + : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 9)) && (d_moy#31 <= 12)) && (d_year#30 = 1999)) && isnotnull(d_date_sk#18)) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), Equ..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [d_date_sk#16] + : : +- *(5) Filter (d_year#32 IN (1999,2000,2001) && isnotnull(d_date_sk#16)) + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] + : +- *(6) Filter isnotnull(s_store_sk#14) + : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] + +- *(7) Filter isnotnull(i_item_sk#12) + +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt index 26e4e3a55..f8c72710f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt @@ -1,77 +1,66 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_quantity,store_returns_quantity,catalog_sales_quantity] - WholeStageCodegen (9) - HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(cast(ss_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(cs_quantity as bigint)),store_sales_quantity,store_returns_quantity,catalog_sales_quantity,sum,sum,sum] +TakeOrderedAndProject [catalog_sales_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_quantity,store_sales_quantity] + WholeStageCodegen + HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum,sum(cast(cs_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(ss_quantity as bigint))] [catalog_sales_quantity,store_returns_quantity,store_sales_quantity,sum,sum,sum,sum(cast(cs_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(ss_quantity as bigint))] InputAdapter - Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 - WholeStageCodegen (8) - HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_quantity,sr_return_quantity,cs_quantity] [sum,sum,sum,sum,sum,sum] - Project [ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name,i_item_id,i_item_desc] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] + Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 + WholeStageCodegen + HashAggregate [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [cs_quantity,s_store_id,s_store_name,sr_return_quantity,ss_item_sk,ss_quantity] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [cs_quantity,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_sold_date_sk,cs_quantity] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_return_quantity,cs_sold_date_sk,cs_quantity] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_return_quantity,cs_sold_date_sk,cs_quantity] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity] - BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity] + WholeStageCodegen + Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] + WholeStageCodegen + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #6 - WholeStageCodegen (5) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #7 - WholeStageCodegen (6) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id,s_store_name] + WholeStageCodegen + Project [s_store_id,s_store_name,s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] InputAdapter BroadcastExchange #8 - WholeStageCodegen (7) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] + WholeStageCodegen + Project [i_item_desc,i_item_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt index bec06d146..55173cb25 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt @@ -1,122 +1,20 @@ == Physical Plan == -TakeOrderedAndProject (21) -+- * HashAggregate (20) - +- Exchange (19) - +- * HashAggregate (18) - +- * Project (17) - +- * BroadcastHashJoin Inner BuildRight (16) - :- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Project (4) - : : +- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.date_dim (1) - : +- BroadcastExchange (8) - : +- * Filter (7) - : +- * ColumnarToRow (6) - : +- Scan parquet default.store_sales (5) - +- BroadcastExchange (15) - +- * Project (14) - +- * Filter (13) - +- * ColumnarToRow (12) - +- Scan parquet default.item (11) - - -(1) Scan parquet default.date_dim -Output [3]: [d_date_sk#1, d_year#2, d_moy#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] - -(3) Filter [codegen id : 3] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] -Condition : ((isnotnull(d_moy#3) AND (d_moy#3 = 11)) AND isnotnull(d_date_sk#1)) - -(4) Project [codegen id : 3] -Output [2]: [d_date_sk#1, d_year#2] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] - -(5) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] - -(7) Filter [codegen id : 1] -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) - -(8) BroadcastExchange -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#4] -Join condition: None - -(10) Project [codegen id : 3] -Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] -Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] - -(11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] - -(13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] -Condition : ((isnotnull(i_manufact_id#11) AND (i_manufact_id#11 = 128)) AND isnotnull(i_item_sk#8)) - -(14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] - -(15) BroadcastExchange -Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#5] -Right keys [1]: [i_item_sk#8] -Join condition: None - -(17) Project [codegen id : 3] -Output [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] -Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_brand_id#9, i_brand#10] - -(18) HashAggregate [codegen id : 3] -Input [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum#13] -Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] - -(19) Exchange -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), true, [id=#15] - -(20) HashAggregate [codegen id : 4] -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] -Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS sum_agg#19] - -(21) TakeOrderedAndProject -Input [4]: [d_year#2, brand_id#17, brand#18, sum_agg#19] -Arguments: 100, [d_year#2 ASC NULLS FIRST, sum_agg#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, sum_agg#19] - +TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,sum_agg#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,sum_agg#2]) ++- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) + +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 5) + +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) + +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] + +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight + :- *(3) Project [d_year#1, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight + : :- *(3) Project [d_date_sk#10, d_year#1] + : : +- *(3) Filter ((isnotnull(d_moy#12) && (d_moy#12 = 11)) && isnotnull(d_date_sk#10)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] + +- *(2) Filter ((isnotnull(i_manufact_id#13) && (i_manufact_id#13 = 128)) && isnotnull(i_item_sk#9)) + +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manufact_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt index ed3a06904..23253bbd2 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt @@ -1,31 +1,26 @@ -TakeOrderedAndProject [d_year,sum_agg,brand_id,brand] - WholeStageCodegen (4) - HashAggregate [d_year,i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,sum_agg,sum] +TakeOrderedAndProject [brand,brand_id,d_year,sum_agg] + WholeStageCodegen + HashAggregate [d_year,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,sum,sum(UnscaledValue(ss_ext_sales_price)),sum_agg] InputAdapter Exchange [d_year,i_brand,i_brand_id] #1 - WholeStageCodegen (3) - HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] - Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [d_year,ss_item_sk,ss_ext_sales_price] + WholeStageCodegen + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_year] - Filter [d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [ss_sold_date_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + WholeStageCodegen + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Project [i_item_sk,i_brand_id,i_brand] - Filter [i_manufact_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id] + WholeStageCodegen + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manufact_id] + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manufact_id] [i_brand,i_brand_id,i_item_sk,i_manufact_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt index fffcc5ca3..d96b8f5b0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt @@ -1,303 +1,52 @@ == Physical Plan == -TakeOrderedAndProject (53) -+- * Project (52) - +- * BroadcastHashJoin Inner BuildRight (51) - :- * Project (45) - : +- * BroadcastHashJoin Inner BuildRight (44) - : :- * Project (39) - : : +- * BroadcastHashJoin Inner BuildRight (38) - : : :- * Filter (20) - : : : +- * HashAggregate (19) - : : : +- Exchange (18) - : : : +- * HashAggregate (17) - : : : +- * Project (16) - : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : :- * Project (10) - : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.web_returns (1) - : : : : +- BroadcastExchange (8) - : : : : +- * Project (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.date_dim (4) - : : : +- BroadcastExchange (14) - : : : +- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.customer_address (11) - : : +- BroadcastExchange (37) - : : +- * Filter (36) - : : +- * HashAggregate (35) - : : +- Exchange (34) - : : +- * HashAggregate (33) - : : +- * HashAggregate (32) - : : +- Exchange (31) - : : +- * HashAggregate (30) - : : +- * Project (29) - : : +- * BroadcastHashJoin Inner BuildRight (28) - : : :- * Project (26) - : : : +- * BroadcastHashJoin Inner BuildRight (25) - : : : :- * Filter (23) - : : : : +- * ColumnarToRow (22) - : : : : +- Scan parquet default.web_returns (21) - : : : +- ReusedExchange (24) - : : +- ReusedExchange (27) - : +- BroadcastExchange (43) - : +- * Filter (42) - : +- * ColumnarToRow (41) - : +- Scan parquet default.customer (40) - +- BroadcastExchange (50) - +- * Project (49) - +- * Filter (48) - +- * ColumnarToRow (47) - +- Scan parquet default.customer_address (46) - - -(1) Scan parquet default.web_returns -Output [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] - -(3) Filter [codegen id : 3] -Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] -Condition : ((isnotnull(wr_returned_date_sk#1) AND isnotnull(wr_returning_addr_sk#3)) AND isnotnull(wr_returning_customer_sk#2)) - -(4) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_year#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_year#6] - -(6) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_year#6] -Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2002)) AND isnotnull(d_date_sk#5)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_year#6] - -(8) BroadcastExchange -Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [wr_returned_date_sk#1] -Right keys [1]: [cast(d_date_sk#5 as bigint)] -Join condition: None - -(10) Project [codegen id : 3] -Output [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] -Input [5]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, d_date_sk#5] - -(11) Scan parquet default.customer_address -Output [2]: [ca_address_sk#8, ca_state#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [ca_address_sk#8, ca_state#9] - -(13) Filter [codegen id : 2] -Input [2]: [ca_address_sk#8, ca_state#9] -Condition : (isnotnull(ca_address_sk#8) AND isnotnull(ca_state#9)) - -(14) BroadcastExchange -Input [2]: [ca_address_sk#8, ca_state#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] - -(15) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [wr_returning_addr_sk#3] -Right keys [1]: [cast(ca_address_sk#8 as bigint)] -Join condition: None - -(16) Project [codegen id : 3] -Output [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] -Input [5]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, ca_address_sk#8, ca_state#9] - -(17) HashAggregate [codegen id : 3] -Input [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] -Keys [2]: [wr_returning_customer_sk#2, ca_state#9] -Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#4))] -Aggregate Attributes [1]: [sum#11] -Results [3]: [wr_returning_customer_sk#2, ca_state#9, sum#12] - -(18) Exchange -Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#12] -Arguments: hashpartitioning(wr_returning_customer_sk#2, ca_state#9, 5), true, [id=#13] - -(19) HashAggregate [codegen id : 11] -Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#12] -Keys [2]: [wr_returning_customer_sk#2, ca_state#9] -Functions [1]: [sum(UnscaledValue(wr_return_amt#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#4))#14] -Results [3]: [wr_returning_customer_sk#2 AS ctr_customer_sk#15, ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(wr_return_amt#4))#14,17,2) AS ctr_total_return#17] - -(20) Filter [codegen id : 11] -Input [3]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17] -Condition : isnotnull(ctr_total_return#17) - -(21) Scan parquet default.web_returns -Output [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk)] -ReadSchema: struct - -(22) ColumnarToRow [codegen id : 6] -Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] - -(23) Filter [codegen id : 6] -Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] -Condition : (isnotnull(wr_returned_date_sk#1) AND isnotnull(wr_returning_addr_sk#3)) - -(24) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(25) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [wr_returned_date_sk#1] -Right keys [1]: [cast(d_date_sk#5 as bigint)] -Join condition: None - -(26) Project [codegen id : 6] -Output [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] -Input [5]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, d_date_sk#5] - -(27) ReusedExchange [Reuses operator id: 14] -Output [2]: [ca_address_sk#8, ca_state#9] - -(28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [wr_returning_addr_sk#3] -Right keys [1]: [cast(ca_address_sk#8 as bigint)] -Join condition: None - -(29) Project [codegen id : 6] -Output [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] -Input [5]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, ca_address_sk#8, ca_state#9] - -(30) HashAggregate [codegen id : 6] -Input [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] -Keys [2]: [wr_returning_customer_sk#2, ca_state#9] -Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#4))] -Aggregate Attributes [1]: [sum#18] -Results [3]: [wr_returning_customer_sk#2, ca_state#9, sum#19] - -(31) Exchange -Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#19] -Arguments: hashpartitioning(wr_returning_customer_sk#2, ca_state#9, 5), true, [id=#20] - -(32) HashAggregate [codegen id : 7] -Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#19] -Keys [2]: [wr_returning_customer_sk#2, ca_state#9] -Functions [1]: [sum(UnscaledValue(wr_return_amt#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#4))#21] -Results [2]: [ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(wr_return_amt#4))#21,17,2) AS ctr_total_return#17] - -(33) HashAggregate [codegen id : 7] -Input [2]: [ctr_state#16, ctr_total_return#17] -Keys [1]: [ctr_state#16] -Functions [1]: [partial_avg(ctr_total_return#17)] -Aggregate Attributes [2]: [sum#22, count#23] -Results [3]: [ctr_state#16, sum#24, count#25] - -(34) Exchange -Input [3]: [ctr_state#16, sum#24, count#25] -Arguments: hashpartitioning(ctr_state#16, 5), true, [id=#26] - -(35) HashAggregate [codegen id : 8] -Input [3]: [ctr_state#16, sum#24, count#25] -Keys [1]: [ctr_state#16] -Functions [1]: [avg(ctr_total_return#17)] -Aggregate Attributes [1]: [avg(ctr_total_return#17)#27] -Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#17)#27) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16 AS ctr_state#16#29] - -(36) Filter [codegen id : 8] -Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] -Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) - -(37) BroadcastExchange -Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#30] - -(38) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ctr_state#16] -Right keys [1]: [ctr_state#16#29] -Join condition: (cast(ctr_total_return#17 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) - -(39) Project [codegen id : 11] -Output [2]: [ctr_customer_sk#15, ctr_total_return#17] -Input [5]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] - -(40) Scan parquet default.customer -Output [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(41) ColumnarToRow [codegen id : 9] -Input [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] - -(42) Filter [codegen id : 9] -Input [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] -Condition : (isnotnull(c_customer_sk#31) AND isnotnull(c_current_addr_sk#33)) - -(43) BroadcastExchange -Input [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#45] - -(44) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ctr_customer_sk#15] -Right keys [1]: [cast(c_customer_sk#31 as bigint)] -Join condition: None - -(45) Project [codegen id : 11] -Output [14]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] -Input [16]: [ctr_customer_sk#15, ctr_total_return#17, c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] - -(46) Scan parquet default.customer_address -Output [2]: [ca_address_sk#8, ca_state#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(47) ColumnarToRow [codegen id : 10] -Input [2]: [ca_address_sk#8, ca_state#9] - -(48) Filter [codegen id : 10] -Input [2]: [ca_address_sk#8, ca_state#9] -Condition : ((isnotnull(ca_state#9) AND (ca_state#9 = GA)) AND isnotnull(ca_address_sk#8)) - -(49) Project [codegen id : 10] -Output [1]: [ca_address_sk#8] -Input [2]: [ca_address_sk#8, ca_state#9] - -(50) BroadcastExchange -Input [1]: [ca_address_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#46] - -(51) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [c_current_addr_sk#33] -Right keys [1]: [ca_address_sk#8] -Join condition: None - -(52) Project [codegen id : 11] -Output [13]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ctr_total_return#17] -Input [15]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ca_address_sk#8] - -(53) TakeOrderedAndProject -Input [13]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ctr_total_return#17] -Arguments: 100, [c_customer_id#32 ASC NULLS FIRST, c_salutation#34 ASC NULLS FIRST, c_first_name#35 ASC NULLS FIRST, c_last_name#36 ASC NULLS FIRST, c_preferred_cust_flag#37 ASC NULLS FIRST, c_birth_day#38 ASC NULLS FIRST, c_birth_month#39 ASC NULLS FIRST, c_birth_year#40 ASC NULLS FIRST, c_birth_country#41 ASC NULLS FIRST, c_login#42 ASC NULLS FIRST, c_email_address#43 ASC NULLS FIRST, c_last_review_date#44 ASC NULLS FIRST, ctr_total_return#17 ASC NULLS FIRST], [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ctr_total_return#17] - +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salutation#2 ASC NULLS FIRST,c_first_name#3 ASC NULLS FIRST,c_last_name#4 ASC NULLS FIRST,c_preferred_cust_flag#5 ASC NULLS FIRST,c_birth_day#6 ASC NULLS FIRST,c_birth_month#7 ASC NULLS FIRST,c_birth_year#8 ASC NULLS FIRST,c_birth_country#9 ASC NULLS FIRST,c_login#10 ASC NULLS FIRST,c_email_address#11 ASC NULLS FIRST,c_last_review_date#12 ASC NULLS FIRST,ctr_total_return#13 ASC NULLS FIRST], output=[c_customer_id#1,c_salutation#2,c_first_name#3,c_last_name#4,c_preferred_cust_flag#5,c_birth_day#6,c_birth_month#7,c_birth_year#8,c_birth_country#9,c_login#10,c_email_address#11,c_last_review_date#12,ctr_total_return#13]) ++- *(11) Project [c_customer_id#1, c_salutation#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_day#6, c_birth_month#7, c_birth_year#8, c_birth_country#9, c_login#10, c_email_address#11, c_last_review_date#12, ctr_total_return#13] + +- *(11) BroadcastHashJoin [c_current_addr_sk#14], [ca_address_sk#15], Inner, BuildRight + :- *(11) Project [ctr_total_return#13, c_customer_id#1, c_current_addr_sk#14, c_salutation#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_day#6, c_birth_month#7, c_birth_year#8, c_birth_country#9, c_login#10, c_email_address#11, c_last_review_date#12] + : +- *(11) BroadcastHashJoin [ctr_customer_sk#16], [cast(c_customer_sk#17 as bigint)], Inner, BuildRight + : :- *(11) Project [ctr_customer_sk#16, ctr_total_return#13] + : : +- *(11) BroadcastHashJoin [ctr_state#18], [ctr_state#18#19], Inner, BuildRight, (cast(ctr_total_return#13 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) + : : :- *(11) Filter isnotnull(ctr_total_return#13) + : : : +- *(11) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) + : : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 5) + : : : +- *(3) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) + : : : +- *(3) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] + : : : +- *(3) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight + : : : :- *(3) Project [wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : : +- *(3) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight + : : : : :- *(3) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : : : +- *(3) Filter ((isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) && isnotnull(wr_returning_customer_sk#21)) + : : : : : +- *(3) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#15, ca_state#22] + : : : +- *(2) Filter (isnotnull(ca_address_sk#15) && isnotnull(ca_state#22)) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) + : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) + : : +- *(8) HashAggregate(keys=[ctr_state#18], functions=[avg(ctr_total_return#13)]) + : : +- Exchange hashpartitioning(ctr_state#18, 5) + : : +- *(7) HashAggregate(keys=[ctr_state#18], functions=[partial_avg(ctr_total_return#13)]) + : : +- *(7) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) + : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 5) + : : +- *(6) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) + : : +- *(6) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] + : : +- *(6) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight + : : :- *(6) Project [wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : +- *(6) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight + : : : :- *(6) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] + : : : : +- *(6) Filter (isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) + : : : : +- *(6) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt index f9ea8ad58..a17fc2aea 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt @@ -1,78 +1,70 @@ -TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] - WholeStageCodegen (11) - Project [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] +TakeOrderedAndProject [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] + WholeStageCodegen + Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] - BroadcastHashJoin [ctr_customer_sk,c_customer_sk] + Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] + BroadcastHashJoin [c_customer_sk,ctr_customer_sk] Project [ctr_customer_sk,ctr_total_return] - BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] + BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,ctr_state,ctr_total_return] Filter [ctr_total_return] - HashAggregate [wr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(wr_return_amt)),ctr_customer_sk,ctr_state,ctr_total_return,sum] + HashAggregate [ca_state,sum,sum(UnscaledValue(wr_return_amt)),wr_returning_customer_sk] [ctr_customer_sk,ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] InputAdapter - Exchange [wr_returning_customer_sk,ca_state] #1 - WholeStageCodegen (3) - HashAggregate [wr_returning_customer_sk,ca_state,wr_return_amt] [sum,sum] - Project [wr_returning_customer_sk,wr_return_amt,ca_state] - BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] - Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] - BroadcastHashJoin [wr_returned_date_sk,d_date_sk] - Filter [wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + Exchange [ca_state,wr_returning_customer_sk] #1 + WholeStageCodegen + HashAggregate [ca_state,sum,sum,wr_return_amt,wr_returning_customer_sk] [sum,sum] + Project [ca_state,wr_return_amt,wr_returning_customer_sk] + BroadcastHashJoin [ca_address_sk,wr_returning_addr_sk] + Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Project [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + Filter [wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [ca_address_sk,ca_state] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_state] + WholeStageCodegen + Project [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_state] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] InputAdapter BroadcastExchange #4 - WholeStageCodegen (8) + WholeStageCodegen Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,sum,count] + HashAggregate [avg(ctr_total_return),count,ctr_state,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_state,sum] InputAdapter Exchange [ctr_state] #5 - WholeStageCodegen (7) - HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count] - HashAggregate [wr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(wr_return_amt)),ctr_state,ctr_total_return,sum] + WholeStageCodegen + HashAggregate [count,count,ctr_state,ctr_total_return,sum,sum] [count,count,sum,sum] + HashAggregate [ca_state,sum,sum(UnscaledValue(wr_return_amt)),wr_returning_customer_sk] [ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] InputAdapter - Exchange [wr_returning_customer_sk,ca_state] #6 - WholeStageCodegen (6) - HashAggregate [wr_returning_customer_sk,ca_state,wr_return_amt] [sum,sum] - Project [wr_returning_customer_sk,wr_return_amt,ca_state] - BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] - Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] - BroadcastHashJoin [wr_returned_date_sk,d_date_sk] - Filter [wr_returned_date_sk,wr_returning_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + Exchange [ca_state,wr_returning_customer_sk] #6 + WholeStageCodegen + HashAggregate [ca_state,sum,sum,wr_return_amt,wr_returning_customer_sk] [sum,sum] + Project [ca_state,wr_return_amt,wr_returning_customer_sk] + BroadcastHashJoin [ca_address_sk,wr_returning_addr_sk] + Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Project [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + Filter [wr_returned_date_sk,wr_returning_addr_sk] + Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] InputAdapter - ReusedExchange [d_date_sk] #2 + ReusedExchange [d_date_sk] [d_date_sk] #2 InputAdapter - ReusedExchange [ca_address_sk,ca_state] #3 + ReusedExchange [ca_address_sk,ca_state] [ca_address_sk,ca_state] #3 InputAdapter BroadcastExchange #7 - WholeStageCodegen (9) - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + WholeStageCodegen + Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] InputAdapter BroadcastExchange #8 - WholeStageCodegen (10) + WholeStageCodegen Project [ca_address_sk] - Filter [ca_state,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_state] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt index 0ed0929fa..991349307 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt @@ -1,563 +1,100 @@ == Physical Plan == -* Sort (99) -+- Exchange (98) - +- * Project (97) - +- * BroadcastHashJoin Inner BuildRight (96) - :- * Project (82) - : +- * BroadcastHashJoin Inner BuildRight (81) - : :- * BroadcastHashJoin Inner BuildRight (67) - : : :- * Project (53) - : : : +- * BroadcastHashJoin Inner BuildRight (52) - : : : :- * BroadcastHashJoin Inner BuildRight (35) - : : : : :- * HashAggregate (18) - : : : : : +- Exchange (17) - : : : : : +- * HashAggregate (16) - : : : : : +- * Project (15) - : : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : : :- * Project (9) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : : : :- * Filter (3) - : : : : : : : +- * ColumnarToRow (2) - : : : : : : : +- Scan parquet default.store_sales (1) - : : : : : : +- BroadcastExchange (7) - : : : : : : +- * Filter (6) - : : : : : : +- * ColumnarToRow (5) - : : : : : : +- Scan parquet default.date_dim (4) - : : : : : +- BroadcastExchange (13) - : : : : : +- * Filter (12) - : : : : : +- * ColumnarToRow (11) - : : : : : +- Scan parquet default.customer_address (10) - : : : : +- BroadcastExchange (34) - : : : : +- * HashAggregate (33) - : : : : +- Exchange (32) - : : : : +- * HashAggregate (31) - : : : : +- * Project (30) - : : : : +- * BroadcastHashJoin Inner BuildRight (29) - : : : : :- * Project (27) - : : : : : +- * BroadcastHashJoin Inner BuildRight (26) - : : : : : :- * Filter (21) - : : : : : : +- * ColumnarToRow (20) - : : : : : : +- Scan parquet default.store_sales (19) - : : : : : +- BroadcastExchange (25) - : : : : : +- * Filter (24) - : : : : : +- * ColumnarToRow (23) - : : : : : +- Scan parquet default.date_dim (22) - : : : : +- ReusedExchange (28) - : : : +- BroadcastExchange (51) - : : : +- * HashAggregate (50) - : : : +- Exchange (49) - : : : +- * HashAggregate (48) - : : : +- * Project (47) - : : : +- * BroadcastHashJoin Inner BuildRight (46) - : : : :- * Project (44) - : : : : +- * BroadcastHashJoin Inner BuildRight (43) - : : : : :- * Filter (38) - : : : : : +- * ColumnarToRow (37) - : : : : : +- Scan parquet default.store_sales (36) - : : : : +- BroadcastExchange (42) - : : : : +- * Filter (41) - : : : : +- * ColumnarToRow (40) - : : : : +- Scan parquet default.date_dim (39) - : : : +- ReusedExchange (45) - : : +- BroadcastExchange (66) - : : +- * HashAggregate (65) - : : +- Exchange (64) - : : +- * HashAggregate (63) - : : +- * Project (62) - : : +- * BroadcastHashJoin Inner BuildRight (61) - : : :- * Project (59) - : : : +- * BroadcastHashJoin Inner BuildRight (58) - : : : :- * Filter (56) - : : : : +- * ColumnarToRow (55) - : : : : +- Scan parquet default.web_sales (54) - : : : +- ReusedExchange (57) - : : +- ReusedExchange (60) - : +- BroadcastExchange (80) - : +- * HashAggregate (79) - : +- Exchange (78) - : +- * HashAggregate (77) - : +- * Project (76) - : +- * BroadcastHashJoin Inner BuildRight (75) - : :- * Project (73) - : : +- * BroadcastHashJoin Inner BuildRight (72) - : : :- * Filter (70) - : : : +- * ColumnarToRow (69) - : : : +- Scan parquet default.web_sales (68) - : : +- ReusedExchange (71) - : +- ReusedExchange (74) - +- BroadcastExchange (95) - +- * HashAggregate (94) - +- Exchange (93) - +- * HashAggregate (92) - +- * Project (91) - +- * BroadcastHashJoin Inner BuildRight (90) - :- * Project (88) - : +- * BroadcastHashJoin Inner BuildRight (87) - : :- * Filter (85) - : : +- * ColumnarToRow (84) - : : +- Scan parquet default.web_sales (83) - : +- ReusedExchange (86) - +- ReusedExchange (89) - - -(1) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] - -(3) Filter [codegen id : 3] -Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] -Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] -Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 1)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4)) - -(7) BroadcastExchange -Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] - -(8) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#4] -Join condition: None - -(9) Project [codegen id : 3] -Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#5, d_qoy#6] -Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#4, d_year#5, d_qoy#6] - -(10) Scan parquet default.customer_address -Output [2]: [ca_address_sk#8, ca_county#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [ca_address_sk#8, ca_county#9] - -(12) Filter [codegen id : 2] -Input [2]: [ca_address_sk#8, ca_county#9] -Condition : (isnotnull(ca_address_sk#8) AND isnotnull(ca_county#9)) - -(13) BroadcastExchange -Input [2]: [ca_address_sk#8, ca_county#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] - -(14) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_addr_sk#2] -Right keys [1]: [ca_address_sk#8] -Join condition: None - -(15) Project [codegen id : 3] -Output [4]: [ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_county#9] -Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_address_sk#8, ca_county#9] - -(16) HashAggregate [codegen id : 3] -Input [4]: [ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_county#9] -Keys [3]: [ca_county#9, d_qoy#6, d_year#5] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#11] -Results [4]: [ca_county#9, d_qoy#6, d_year#5, sum#12] - -(17) Exchange -Input [4]: [ca_county#9, d_qoy#6, d_year#5, sum#12] -Arguments: hashpartitioning(ca_county#9, d_qoy#6, d_year#5, 5), true, [id=#13] - -(18) HashAggregate [codegen id : 24] -Input [4]: [ca_county#9, d_qoy#6, d_year#5, sum#12] -Keys [3]: [ca_county#9, d_qoy#6, d_year#5] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#14] -Results [3]: [ca_county#9, d_year#5, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#14,17,2) AS store_sales#15] - -(19) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] -ReadSchema: struct - -(20) ColumnarToRow [codegen id : 6] -Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] - -(21) Filter [codegen id : 6] -Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] -Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) - -(22) Scan parquet default.date_dim -Output [3]: [d_date_sk#16, d_year#17, d_qoy#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(23) ColumnarToRow [codegen id : 4] -Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] - -(24) Filter [codegen id : 4] -Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] -Condition : ((((isnotnull(d_qoy#18) AND isnotnull(d_year#17)) AND (d_qoy#18 = 2)) AND (d_year#17 = 2000)) AND isnotnull(d_date_sk#16)) - -(25) BroadcastExchange -Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] - -(26) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#16] -Join condition: None - -(27) Project [codegen id : 6] -Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#17, d_qoy#18] -Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#16, d_year#17, d_qoy#18] - -(28) ReusedExchange [Reuses operator id: 13] -Output [2]: [ca_address_sk#20, ca_county#21] - -(29) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_addr_sk#2] -Right keys [1]: [ca_address_sk#20] -Join condition: None - -(30) Project [codegen id : 6] -Output [4]: [ss_ext_sales_price#3, d_year#17, d_qoy#18, ca_county#21] -Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#17, d_qoy#18, ca_address_sk#20, ca_county#21] - -(31) HashAggregate [codegen id : 6] -Input [4]: [ss_ext_sales_price#3, d_year#17, d_qoy#18, ca_county#21] -Keys [3]: [ca_county#21, d_qoy#18, d_year#17] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#22] -Results [4]: [ca_county#21, d_qoy#18, d_year#17, sum#23] - -(32) Exchange -Input [4]: [ca_county#21, d_qoy#18, d_year#17, sum#23] -Arguments: hashpartitioning(ca_county#21, d_qoy#18, d_year#17, 5), true, [id=#24] - -(33) HashAggregate [codegen id : 7] -Input [4]: [ca_county#21, d_qoy#18, d_year#17, sum#23] -Keys [3]: [ca_county#21, d_qoy#18, d_year#17] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#25] -Results [2]: [ca_county#21, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#25,17,2) AS store_sales#26] - -(34) BroadcastExchange -Input [2]: [ca_county#21, store_sales#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] - -(35) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ca_county#9] -Right keys [1]: [ca_county#21] -Join condition: None - -(36) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] -ReadSchema: struct - -(37) ColumnarToRow [codegen id : 10] -Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] - -(38) Filter [codegen id : 10] -Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] -Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) - -(39) Scan parquet default.date_dim -Output [3]: [d_date_sk#28, d_year#29, d_qoy#30] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(40) ColumnarToRow [codegen id : 8] -Input [3]: [d_date_sk#28, d_year#29, d_qoy#30] - -(41) Filter [codegen id : 8] -Input [3]: [d_date_sk#28, d_year#29, d_qoy#30] -Condition : ((((isnotnull(d_qoy#30) AND isnotnull(d_year#29)) AND (d_qoy#30 = 3)) AND (d_year#29 = 2000)) AND isnotnull(d_date_sk#28)) - -(42) BroadcastExchange -Input [3]: [d_date_sk#28, d_year#29, d_qoy#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] - -(43) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#28] -Join condition: None - -(44) Project [codegen id : 10] -Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#29, d_qoy#30] -Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#28, d_year#29, d_qoy#30] - -(45) ReusedExchange [Reuses operator id: 13] -Output [2]: [ca_address_sk#32, ca_county#33] - -(46) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_addr_sk#2] -Right keys [1]: [ca_address_sk#32] -Join condition: None - -(47) Project [codegen id : 10] -Output [4]: [ss_ext_sales_price#3, d_year#29, d_qoy#30, ca_county#33] -Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#29, d_qoy#30, ca_address_sk#32, ca_county#33] - -(48) HashAggregate [codegen id : 10] -Input [4]: [ss_ext_sales_price#3, d_year#29, d_qoy#30, ca_county#33] -Keys [3]: [ca_county#33, d_qoy#30, d_year#29] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#34] -Results [4]: [ca_county#33, d_qoy#30, d_year#29, sum#35] - -(49) Exchange -Input [4]: [ca_county#33, d_qoy#30, d_year#29, sum#35] -Arguments: hashpartitioning(ca_county#33, d_qoy#30, d_year#29, 5), true, [id=#36] - -(50) HashAggregate [codegen id : 11] -Input [4]: [ca_county#33, d_qoy#30, d_year#29, sum#35] -Keys [3]: [ca_county#33, d_qoy#30, d_year#29] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#37] -Results [2]: [ca_county#33, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#37,17,2) AS store_sales#38] - -(51) BroadcastExchange -Input [2]: [ca_county#33, store_sales#38] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#39] - -(52) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ca_county#21] -Right keys [1]: [ca_county#33] -Join condition: None - -(53) Project [codegen id : 24] -Output [5]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38] -Input [7]: [ca_county#9, d_year#5, store_sales#15, ca_county#21, store_sales#26, ca_county#33, store_sales#38] - -(54) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] -ReadSchema: struct - -(55) ColumnarToRow [codegen id : 14] -Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] - -(56) Filter [codegen id : 14] -Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] -Condition : (isnotnull(ws_sold_date_sk#40) AND isnotnull(ws_bill_addr_sk#41)) - -(57) ReusedExchange [Reuses operator id: 7] -Output [3]: [d_date_sk#43, d_year#44, d_qoy#45] - -(58) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_sold_date_sk#40] -Right keys [1]: [d_date_sk#43] -Join condition: None - -(59) Project [codegen id : 14] -Output [4]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#44, d_qoy#45] -Input [6]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42, d_date_sk#43, d_year#44, d_qoy#45] - -(60) ReusedExchange [Reuses operator id: 13] -Output [2]: [ca_address_sk#46, ca_county#47] - -(61) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_bill_addr_sk#41] -Right keys [1]: [ca_address_sk#46] -Join condition: None - -(62) Project [codegen id : 14] -Output [4]: [ws_ext_sales_price#42, d_year#44, d_qoy#45, ca_county#47] -Input [6]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#44, d_qoy#45, ca_address_sk#46, ca_county#47] - -(63) HashAggregate [codegen id : 14] -Input [4]: [ws_ext_sales_price#42, d_year#44, d_qoy#45, ca_county#47] -Keys [3]: [ca_county#47, d_qoy#45, d_year#44] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#42))] -Aggregate Attributes [1]: [sum#48] -Results [4]: [ca_county#47, d_qoy#45, d_year#44, sum#49] - -(64) Exchange -Input [4]: [ca_county#47, d_qoy#45, d_year#44, sum#49] -Arguments: hashpartitioning(ca_county#47, d_qoy#45, d_year#44, 5), true, [id=#50] - -(65) HashAggregate [codegen id : 15] -Input [4]: [ca_county#47, d_qoy#45, d_year#44, sum#49] -Keys [3]: [ca_county#47, d_qoy#45, d_year#44] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#42))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#42))#51] -Results [2]: [ca_county#47, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#42))#51,17,2) AS web_sales#52] - -(66) BroadcastExchange -Input [2]: [ca_county#47, web_sales#52] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#53] - -(67) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ca_county#9] -Right keys [1]: [ca_county#47] -Join condition: None - -(68) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] -ReadSchema: struct - -(69) ColumnarToRow [codegen id : 18] -Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] - -(70) Filter [codegen id : 18] -Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] -Condition : (isnotnull(ws_sold_date_sk#40) AND isnotnull(ws_bill_addr_sk#41)) - -(71) ReusedExchange [Reuses operator id: 25] -Output [3]: [d_date_sk#54, d_year#55, d_qoy#56] - -(72) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_sold_date_sk#40] -Right keys [1]: [d_date_sk#54] -Join condition: None - -(73) Project [codegen id : 18] -Output [4]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#55, d_qoy#56] -Input [6]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42, d_date_sk#54, d_year#55, d_qoy#56] - -(74) ReusedExchange [Reuses operator id: 13] -Output [2]: [ca_address_sk#57, ca_county#58] - -(75) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_bill_addr_sk#41] -Right keys [1]: [ca_address_sk#57] -Join condition: None - -(76) Project [codegen id : 18] -Output [4]: [ws_ext_sales_price#42, d_year#55, d_qoy#56, ca_county#58] -Input [6]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#55, d_qoy#56, ca_address_sk#57, ca_county#58] - -(77) HashAggregate [codegen id : 18] -Input [4]: [ws_ext_sales_price#42, d_year#55, d_qoy#56, ca_county#58] -Keys [3]: [ca_county#58, d_qoy#56, d_year#55] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#42))] -Aggregate Attributes [1]: [sum#59] -Results [4]: [ca_county#58, d_qoy#56, d_year#55, sum#60] - -(78) Exchange -Input [4]: [ca_county#58, d_qoy#56, d_year#55, sum#60] -Arguments: hashpartitioning(ca_county#58, d_qoy#56, d_year#55, 5), true, [id=#61] - -(79) HashAggregate [codegen id : 19] -Input [4]: [ca_county#58, d_qoy#56, d_year#55, sum#60] -Keys [3]: [ca_county#58, d_qoy#56, d_year#55] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#42))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#42))#62] -Results [2]: [ca_county#58, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#42))#62,17,2) AS web_sales#63] - -(80) BroadcastExchange -Input [2]: [ca_county#58, web_sales#63] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#64] - -(81) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ca_county#47] -Right keys [1]: [ca_county#58] -Join condition: (CASE WHEN (web_sales#52 > 0.00) THEN CheckOverflow((promote_precision(web_sales#63) / promote_precision(web_sales#52)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#15 > 0.00) THEN CheckOverflow((promote_precision(store_sales#26) / promote_precision(store_sales#15)), DecimalType(37,20), true) ELSE null END) - -(82) Project [codegen id : 24] -Output [8]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38, ca_county#47, web_sales#52, web_sales#63] -Input [9]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38, ca_county#47, web_sales#52, ca_county#58, web_sales#63] - -(83) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] -ReadSchema: struct - -(84) ColumnarToRow [codegen id : 22] -Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] - -(85) Filter [codegen id : 22] -Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] -Condition : (isnotnull(ws_sold_date_sk#40) AND isnotnull(ws_bill_addr_sk#41)) - -(86) ReusedExchange [Reuses operator id: 42] -Output [3]: [d_date_sk#65, d_year#66, d_qoy#67] - -(87) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [ws_sold_date_sk#40] -Right keys [1]: [d_date_sk#65] -Join condition: None - -(88) Project [codegen id : 22] -Output [4]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#66, d_qoy#67] -Input [6]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42, d_date_sk#65, d_year#66, d_qoy#67] - -(89) ReusedExchange [Reuses operator id: 13] -Output [2]: [ca_address_sk#68, ca_county#69] - -(90) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [ws_bill_addr_sk#41] -Right keys [1]: [ca_address_sk#68] -Join condition: None - -(91) Project [codegen id : 22] -Output [4]: [ws_ext_sales_price#42, d_year#66, d_qoy#67, ca_county#69] -Input [6]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#66, d_qoy#67, ca_address_sk#68, ca_county#69] - -(92) HashAggregate [codegen id : 22] -Input [4]: [ws_ext_sales_price#42, d_year#66, d_qoy#67, ca_county#69] -Keys [3]: [ca_county#69, d_qoy#67, d_year#66] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#42))] -Aggregate Attributes [1]: [sum#70] -Results [4]: [ca_county#69, d_qoy#67, d_year#66, sum#71] - -(93) Exchange -Input [4]: [ca_county#69, d_qoy#67, d_year#66, sum#71] -Arguments: hashpartitioning(ca_county#69, d_qoy#67, d_year#66, 5), true, [id=#72] - -(94) HashAggregate [codegen id : 23] -Input [4]: [ca_county#69, d_qoy#67, d_year#66, sum#71] -Keys [3]: [ca_county#69, d_qoy#67, d_year#66] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#42))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#42))#73] -Results [2]: [ca_county#69, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#42))#73,17,2) AS web_sales#74] - -(95) BroadcastExchange -Input [2]: [ca_county#69, web_sales#74] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#75] - -(96) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ca_county#47] -Right keys [1]: [ca_county#69] -Join condition: (CASE WHEN (web_sales#63 > 0.00) THEN CheckOverflow((promote_precision(web_sales#74) / promote_precision(web_sales#63)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#26 > 0.00) THEN CheckOverflow((promote_precision(store_sales#38) / promote_precision(store_sales#26)), DecimalType(37,20), true) ELSE null END) - -(97) Project [codegen id : 24] -Output [6]: [ca_county#9, d_year#5, CheckOverflow((promote_precision(web_sales#63) / promote_precision(web_sales#52)), DecimalType(37,20), true) AS web_q1_q2_increase#76, CheckOverflow((promote_precision(store_sales#26) / promote_precision(store_sales#15)), DecimalType(37,20), true) AS store_q1_q2_increase#77, CheckOverflow((promote_precision(web_sales#74) / promote_precision(web_sales#63)), DecimalType(37,20), true) AS web_q2_q3_increase#78, CheckOverflow((promote_precision(store_sales#38) / promote_precision(store_sales#26)), DecimalType(37,20), true) AS store_q2_q3_increase#79] -Input [10]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38, ca_county#47, web_sales#52, web_sales#63, ca_county#69, web_sales#74] - -(98) Exchange -Input [6]: [ca_county#9, d_year#5, web_q1_q2_increase#76, store_q1_q2_increase#77, web_q2_q3_increase#78, store_q2_q3_increase#79] -Arguments: rangepartitioning(ca_county#9 ASC NULLS FIRST, 5), true, [id=#80] - -(99) Sort [codegen id : 25] -Input [6]: [ca_county#9, d_year#5, web_q1_q2_increase#76, store_q1_q2_increase#77, web_q2_q3_increase#78, store_q2_q3_increase#79] -Arguments: [ca_county#9 ASC NULLS FIRST], true, 0 - +*(25) Sort [ca_county#1 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(ca_county#1 ASC NULLS FIRST, 5) + +- *(24) Project [ca_county#1, d_year#2, CheckOverflow((promote_precision(web_sales#3) / promote_precision(web_sales#4)), DecimalType(37,20)) AS web_q1_q2_increase#5, CheckOverflow((promote_precision(store_sales#6) / promote_precision(store_sales#7)), DecimalType(37,20)) AS store_q1_q2_increase#8, CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) AS web_q2_q3_increase#10, CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) AS store_q2_q3_increase#12] + +- *(24) BroadcastHashJoin [ca_county#13], [ca_county#14], Inner, BuildRight, (CASE WHEN (web_sales#3 > 0.00) THEN CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#6 > 0.00) THEN CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) ELSE null END) + :- *(24) Project [ca_county#1, d_year#2, store_sales#7, store_sales#6, store_sales#11, ca_county#13, web_sales#4, web_sales#3] + : +- *(24) BroadcastHashJoin [ca_county#13], [ca_county#15], Inner, BuildRight, (CASE WHEN (web_sales#4 > 0.00) THEN CheckOverflow((promote_precision(web_sales#3) / promote_precision(web_sales#4)), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#7 > 0.00) THEN CheckOverflow((promote_precision(store_sales#6) / promote_precision(store_sales#7)), DecimalType(37,20)) ELSE null END) + : :- *(24) BroadcastHashJoin [ca_county#1], [ca_county#13], Inner, BuildRight + : : :- *(24) Project [ca_county#1, d_year#2, store_sales#7, store_sales#6, store_sales#11] + : : : +- *(24) BroadcastHashJoin [ca_county#16], [ca_county#17], Inner, BuildRight + : : : :- *(24) BroadcastHashJoin [ca_county#1], [ca_county#16], Inner, BuildRight + : : : : :- *(24) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : : +- Exchange hashpartitioning(ca_county#1, d_qoy#18, d_year#2, 5) + : : : : : +- *(3) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : : +- *(3) Project [ss_ext_sales_price#19, d_year#2, d_qoy#18, ca_county#1] + : : : : : +- *(3) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#21], Inner, BuildRight + : : : : : :- *(3) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#2, d_qoy#18] + : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight + : : : : : : :- *(3) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] + : : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) + : : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(1) Project [d_date_sk#23, d_year#2, d_qoy#18] + : : : : : : +- *(1) Filter ((((isnotnull(d_qoy#18) && isnotnull(d_year#2)) && (d_qoy#18 = 1)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#23)) + : : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#23,d_year#2,d_qoy#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [ca_address_sk#21, ca_county#1] + : : : : : +- *(2) Filter (isnotnull(ca_address_sk#21) && isnotnull(ca_county#1)) + : : : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#21,ca_county#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : : +- *(7) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : +- Exchange hashpartitioning(ca_county#16, d_qoy#24, d_year#25, 5) + : : : : +- *(6) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : : +- *(6) Project [ss_ext_sales_price#19, d_year#25, d_qoy#24, ca_county#16] + : : : : +- *(6) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#26], Inner, BuildRight + : : : : :- *(6) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#25, d_qoy#24] + : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#27], Inner, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] + : : : : : : +- *(6) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(4) Project [d_date_sk#27, d_year#25, d_qoy#24] + : : : : : +- *(4) Filter ((((isnotnull(d_qoy#24) && isnotnull(d_year#25)) && (d_qoy#24 = 2)) && (d_year#25 = 2000)) && isnotnull(d_date_sk#27)) + : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#27,d_year#25,d_qoy#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- ReusedExchange [ca_address_sk#26, ca_county#16], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : +- *(11) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : +- Exchange hashpartitioning(ca_county#17, d_qoy#28, d_year#29, 5) + : : : +- *(10) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) + : : : +- *(10) Project [ss_ext_sales_price#19, d_year#29, d_qoy#28, ca_county#17] + : : : +- *(10) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#30], Inner, BuildRight + : : : :- *(10) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#29, d_qoy#28] + : : : : +- *(10) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#31], Inner, BuildRight + : : : : :- *(10) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] + : : : : : +- *(10) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) + : : : : : +- *(10) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(8) Project [d_date_sk#31, d_year#29, d_qoy#28] + : : : : +- *(8) Filter ((((isnotnull(d_qoy#28) && isnotnull(d_year#29)) && (d_qoy#28 = 3)) && (d_year#29 = 2000)) && isnotnull(d_date_sk#31)) + : : : : +- *(8) FileScan parquet default.date_dim[d_date_sk#31,d_year#29,d_qoy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [ca_address_sk#30, ca_county#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- *(15) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) + : : +- Exchange hashpartitioning(ca_county#13, d_qoy#32, d_year#33, 5) + : : +- *(14) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) + : : +- *(14) Project [ws_ext_sales_price#34, d_year#33, d_qoy#32, ca_county#13] + : : +- *(14) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#36], Inner, BuildRight + : : :- *(14) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#33, d_qoy#32] + : : : +- *(14) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#38], Inner, BuildRight + : : : :- *(14) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] + : : : : +- *(14) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) + : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#38, d_year#33, d_qoy#32], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#36, ca_county#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(19) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) + : +- Exchange hashpartitioning(ca_county#15, d_qoy#39, d_year#40, 5) + : +- *(18) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) + : +- *(18) Project [ws_ext_sales_price#34, d_year#40, d_qoy#39, ca_county#15] + : +- *(18) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#41], Inner, BuildRight + : :- *(18) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#40, d_qoy#39] + : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#42], Inner, BuildRight + : : :- *(18) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] + : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) + : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#42, d_year#40, d_qoy#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#41, ca_county#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(23) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) + +- Exchange hashpartitioning(ca_county#14, d_qoy#43, d_year#44, 5) + +- *(22) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) + +- *(22) Project [ws_ext_sales_price#34, d_year#44, d_qoy#43, ca_county#14] + +- *(22) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#45], Inner, BuildRight + :- *(22) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#44, d_qoy#43] + : +- *(22) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#46], Inner, BuildRight + : :- *(22) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] + : : +- *(22) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) + : : +- *(22) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#46, d_year#44, d_qoy#43], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [ca_address_sk#45, ca_county#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt index 5a5d61f9d..bf41c1709 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt @@ -1,150 +1,140 @@ -WholeStageCodegen (25) +WholeStageCodegen Sort [ca_county] InputAdapter Exchange [ca_county] #1 - WholeStageCodegen (24) - Project [ca_county,d_year,web_sales,web_sales,store_sales,store_sales,web_sales,store_sales] - BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales] - Project [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales,web_sales] - BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales] + WholeStageCodegen + Project [ca_county,d_year,store_sales,store_sales,store_sales,web_sales,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,store_sales,store_sales,web_sales,web_sales] + Project [ca_county,ca_county,d_year,store_sales,store_sales,store_sales,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,store_sales,store_sales,web_sales,web_sales] BroadcastHashJoin [ca_county,ca_county] Project [ca_county,d_year,store_sales,store_sales,store_sales] BroadcastHashJoin [ca_county,ca_county] BroadcastHashJoin [ca_county,ca_county] - HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [ca_county,d_qoy,d_year] #2 - WholeStageCodegen (3) - HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + WholeStageCodegen + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [d_qoy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] + WholeStageCodegen + Project [d_date_sk,d_qoy,d_year] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - Filter [ca_address_sk,ca_county] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_county] + WholeStageCodegen + Project [ca_address_sk,ca_county] + Filter [ca_address_sk,ca_county] + Scan parquet default.customer_address [ca_address_sk,ca_county] [ca_address_sk,ca_county] InputAdapter BroadcastExchange #5 - WholeStageCodegen (7) - HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + WholeStageCodegen + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [ca_county,d_qoy,d_year] #6 - WholeStageCodegen (6) - HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + WholeStageCodegen + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #7 - WholeStageCodegen (4) - Filter [d_qoy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] + WholeStageCodegen + Project [d_date_sk,d_qoy,d_year] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] InputAdapter - ReusedExchange [ca_address_sk,ca_county] #4 + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #8 - WholeStageCodegen (11) - HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] + WholeStageCodegen + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [ca_county,d_qoy,d_year] #9 - WholeStageCodegen (10) - HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] + WholeStageCodegen + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] + Project [ca_county,d_qoy,d_year,ss_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + Filter [ss_addr_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #10 - WholeStageCodegen (8) - Filter [d_qoy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] + WholeStageCodegen + Project [d_date_sk,d_qoy,d_year] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] InputAdapter - ReusedExchange [ca_address_sk,ca_county] #4 + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #11 - WholeStageCodegen (15) - HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + WholeStageCodegen + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] InputAdapter Exchange [ca_county,d_qoy,d_year] #12 - WholeStageCodegen (14) - HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] - Project [ws_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + WholeStageCodegen + HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] #3 + ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 InputAdapter - ReusedExchange [ca_address_sk,ca_county] #4 + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #13 - WholeStageCodegen (19) - HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + WholeStageCodegen + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] InputAdapter Exchange [ca_county,d_qoy,d_year] #14 - WholeStageCodegen (18) - HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] - Project [ws_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + WholeStageCodegen + HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] #7 + ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #7 InputAdapter - ReusedExchange [ca_address_sk,ca_county] #4 + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #15 - WholeStageCodegen (23) - HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] + WholeStageCodegen + HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] InputAdapter Exchange [ca_county,d_qoy,d_year] #16 - WholeStageCodegen (22) - HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] - Project [ws_ext_sales_price,d_year,d_qoy,ca_county] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] + WholeStageCodegen + HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] + Project [ca_county,d_qoy,d_year,ws_ext_sales_price] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] #10 + ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #10 InputAdapter - ReusedExchange [ca_address_sk,ca_county] #4 + ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt index 27f93fd7a..f677286fe 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt @@ -1,175 +1,30 @@ == Physical Plan == -CollectLimit (31) -+- * Project (30) - +- * BroadcastHashJoin Inner BuildRight (29) - :- * Project (27) - : +- * BroadcastHashJoin Inner BuildRight (26) - : :- * Project (10) - : : +- * BroadcastHashJoin Inner BuildRight (9) - : : :- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.catalog_sales (1) - : : +- BroadcastExchange (8) - : : +- * Project (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.item (4) - : +- BroadcastExchange (25) - : +- * Filter (24) - : +- * HashAggregate (23) - : +- Exchange (22) - : +- * HashAggregate (21) - : +- * Project (20) - : +- * BroadcastHashJoin Inner BuildRight (19) - : :- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.catalog_sales (11) - : +- BroadcastExchange (18) - : +- * Project (17) - : +- * Filter (16) - : +- * ColumnarToRow (15) - : +- Scan parquet default.date_dim (14) - +- ReusedExchange (28) - - -(1) Scan parquet default.catalog_sales -Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 6] -Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] - -(3) Filter [codegen id : 6] -Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] -Condition : ((isnotnull(cs_item_sk#2) AND isnotnull(cs_ext_discount_amt#3)) AND isnotnull(cs_sold_date_sk#1)) - -(4) Scan parquet default.item -Output [2]: [i_item_sk#4, i_manufact_id#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [i_item_sk#4, i_manufact_id#5] - -(6) Filter [codegen id : 1] -Input [2]: [i_item_sk#4, i_manufact_id#5] -Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 977)) AND isnotnull(i_item_sk#4)) - -(7) Project [codegen id : 1] -Output [1]: [i_item_sk#4] -Input [2]: [i_item_sk#4, i_manufact_id#5] - -(8) BroadcastExchange -Input [1]: [i_item_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] - -(9) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#4] -Join condition: None - -(10) Project [codegen id : 6] -Output [3]: [cs_sold_date_sk#1, cs_ext_discount_amt#3, i_item_sk#4] -Input [4]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3, i_item_sk#4] - -(11) Scan parquet default.catalog_sales -Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 3] -Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] - -(13) Filter [codegen id : 3] -Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] -Condition : (isnotnull(cs_sold_date_sk#1) AND isnotnull(cs_item_sk#2)) - -(14) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_date#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] -ReadSchema: struct - -(15) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#7, d_date#8] - -(16) Filter [codegen id : 2] -Input [2]: [d_date_sk#7, d_date#8] -Condition : (((isnotnull(d_date#8) AND (d_date#8 >= 10983)) AND (d_date#8 <= 11073)) AND isnotnull(d_date_sk#7)) - -(17) Project [codegen id : 2] -Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_date#8] - -(18) BroadcastExchange -Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] - -(19) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#7] -Join condition: None - -(20) Project [codegen id : 3] -Output [2]: [cs_item_sk#2, cs_ext_discount_amt#3] -Input [4]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3, d_date_sk#7] - -(21) HashAggregate [codegen id : 3] -Input [2]: [cs_item_sk#2, cs_ext_discount_amt#3] -Keys [1]: [cs_item_sk#2] -Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#3))] -Aggregate Attributes [2]: [sum#10, count#11] -Results [3]: [cs_item_sk#2, sum#12, count#13] - -(22) Exchange -Input [3]: [cs_item_sk#2, sum#12, count#13] -Arguments: hashpartitioning(cs_item_sk#2, 5), true, [id=#14] - -(23) HashAggregate [codegen id : 4] -Input [3]: [cs_item_sk#2, sum#12, count#13] -Keys [1]: [cs_item_sk#2] -Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#3))] -Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#3))#15] -Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#3))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2 AS cs_item_sk#2#17] - -(24) Filter [codegen id : 4] -Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2#17] -Condition : isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16) - -(25) BroadcastExchange -Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#18] - -(26) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [i_item_sk#4] -Right keys [1]: [cs_item_sk#2#17] -Join condition: (cast(cs_ext_discount_amt#3 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16) - -(27) Project [codegen id : 6] -Output [1]: [cs_sold_date_sk#1] -Input [5]: [cs_sold_date_sk#1, cs_ext_discount_amt#3, i_item_sk#4, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2#17] - -(28) ReusedExchange [Reuses operator id: 18] -Output [1]: [d_date_sk#7] - -(29) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#7] -Join condition: None - -(30) Project [codegen id : 6] -Output [1]: [1 AS excess discount amount #19] -Input [2]: [cs_sold_date_sk#1, d_date_sk#7] - -(31) CollectLimit -Input [1]: [excess discount amount #19] -Arguments: 100 - +CollectLimit 100 ++- *(6) Project [1 AS excess discount amount #1] + +- *(6) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + :- *(6) Project [cs_sold_date_sk#2] + : +- *(6) BroadcastHashJoin [i_item_sk#4], [cs_item_sk#5#6], Inner, BuildRight, (cast(cs_ext_discount_amt#7 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) + : :- *(6) Project [cs_sold_date_sk#2, cs_ext_discount_amt#7, i_item_sk#4] + : : +- *(6) BroadcastHashJoin [cs_item_sk#5], [i_item_sk#4], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] + : : : +- *(6) Filter ((isnotnull(cs_item_sk#5) && isnotnull(cs_ext_discount_amt#7)) && isnotnull(cs_sold_date_sk#2)) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#4] + : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 977)) && isnotnull(i_item_sk#4)) + : : +- *(1) FileScan parquet default.item[i_item_sk#4,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) + : +- *(4) HashAggregate(keys=[cs_item_sk#5], functions=[avg(UnscaledValue(cs_ext_discount_amt#7))]) + : +- Exchange hashpartitioning(cs_item_sk#5, 5) + : +- *(3) HashAggregate(keys=[cs_item_sk#5], functions=[partial_avg(UnscaledValue(cs_ext_discount_amt#7))]) + : +- *(3) Project [cs_item_sk#5, cs_ext_discount_amt#7] + : +- *(3) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + : :- *(3) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] + : : +- *(3) Filter (isnotnull(cs_sold_date_sk#2) && isnotnull(cs_item_sk#5)) + : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#3] + : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#3)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#3,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt index 7e4b826cc..aeba4db83 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt @@ -1,45 +1,39 @@ CollectLimit - WholeStageCodegen (6) + WholeStageCodegen Project BroadcastHashJoin [cs_sold_date_sk,d_date_sk] Project [cs_sold_date_sk] - BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] - Project [cs_sold_date_sk,cs_ext_discount_amt,i_item_sk] + BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),cs_ext_discount_amt,cs_item_sk,i_item_sk] + Project [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + Project [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + Filter [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #1 - WholeStageCodegen (1) + WholeStageCodegen Project [i_item_sk] - Filter [i_manufact_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_manufact_id] + Filter [i_item_sk,i_manufact_id] + Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #2 - WholeStageCodegen (4) + WholeStageCodegen Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [cs_item_sk,sum,count] [avg(UnscaledValue(cs_ext_discount_amt)),(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),cs_item_sk,sum,count] + HashAggregate [avg(UnscaledValue(cs_ext_discount_amt)),count,cs_item_sk,sum] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(cs_ext_discount_amt)),count,cs_item_sk,sum] InputAdapter Exchange [cs_item_sk] #3 - WholeStageCodegen (3) - HashAggregate [cs_item_sk,cs_ext_discount_amt] [sum,count,sum,count] - Project [cs_item_sk,cs_ext_discount_amt] + WholeStageCodegen + HashAggregate [count,count,cs_ext_discount_amt,cs_item_sk,sum,sum] [count,count,sum,sum] + Project [cs_ext_discount_amt,cs_item_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] + Project [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter - ReusedExchange [d_date_sk] #4 + ReusedExchange [d_date_sk] [d_date_sk] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt index 8d1558a01..2989a8b23 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt @@ -1,378 +1,65 @@ == Physical Plan == -TakeOrderedAndProject (67) -+- * HashAggregate (66) - +- Exchange (65) - +- * HashAggregate (64) - +- Union (63) - :- * HashAggregate (32) - : +- Exchange (31) - : +- * HashAggregate (30) - : +- * Project (29) - : +- * BroadcastHashJoin Inner BuildRight (28) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.customer_address (11) - : +- BroadcastExchange (27) - : +- * BroadcastHashJoin LeftSemi BuildRight (26) - : :- * Filter (20) - : : +- * ColumnarToRow (19) - : : +- Scan parquet default.item (18) - : +- BroadcastExchange (25) - : +- * Project (24) - : +- * Filter (23) - : +- * ColumnarToRow (22) - : +- Scan parquet default.item (21) - :- * HashAggregate (47) - : +- Exchange (46) - : +- * HashAggregate (45) - : +- * Project (44) - : +- * BroadcastHashJoin Inner BuildRight (43) - : :- * Project (41) - : : +- * BroadcastHashJoin Inner BuildRight (40) - : : :- * Project (38) - : : : +- * BroadcastHashJoin Inner BuildRight (37) - : : : :- * Filter (35) - : : : : +- * ColumnarToRow (34) - : : : : +- Scan parquet default.catalog_sales (33) - : : : +- ReusedExchange (36) - : : +- ReusedExchange (39) - : +- ReusedExchange (42) - +- * HashAggregate (62) - +- Exchange (61) - +- * HashAggregate (60) - +- * Project (59) - +- * BroadcastHashJoin Inner BuildRight (58) - :- * Project (56) - : +- * BroadcastHashJoin Inner BuildRight (55) - : :- * Project (53) - : : +- * BroadcastHashJoin Inner BuildRight (52) - : : :- * Filter (50) - : : : +- * ColumnarToRow (49) - : : : +- Scan parquet default.web_sales (48) - : : +- ReusedExchange (51) - : +- ReusedExchange (54) - +- ReusedExchange (57) - - -(1) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] - -(3) Filter [codegen id : 5] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] -Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#5, d_year#6, d_moy#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#6, d_moy#7] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#6, d_moy#7] -Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 5)) AND isnotnull(d_date_sk#5)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#5] -Input [3]: [d_date_sk#5, d_year#6, d_moy#7] - -(8) BroadcastExchange -Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] - -(9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(10) Project [codegen id : 5] -Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] - -(11) Scan parquet default.customer_address -Output [2]: [ca_address_sk#9, ca_gmt_offset#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [ca_address_sk#9, ca_gmt_offset#10] - -(13) Filter [codegen id : 2] -Input [2]: [ca_address_sk#9, ca_gmt_offset#10] -Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) - -(14) Project [codegen id : 2] -Output [1]: [ca_address_sk#9] -Input [2]: [ca_address_sk#9, ca_gmt_offset#10] - -(15) BroadcastExchange -Input [1]: [ca_address_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] - -(16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#9] -Join condition: None - -(17) Project [codegen id : 5] -Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] -Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] - -(18) Scan parquet default.item -Output [2]: [i_item_sk#12, i_manufact_id#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#12, i_manufact_id#13] - -(20) Filter [codegen id : 4] -Input [2]: [i_item_sk#12, i_manufact_id#13] -Condition : isnotnull(i_item_sk#12) - -(21) Scan parquet default.item -Output [2]: [i_category#14, i_manufact_id#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)] -ReadSchema: struct - -(22) ColumnarToRow [codegen id : 3] -Input [2]: [i_category#14, i_manufact_id#13] - -(23) Filter [codegen id : 3] -Input [2]: [i_category#14, i_manufact_id#13] -Condition : (isnotnull(i_category#14) AND (i_category#14 = Electronics)) - -(24) Project [codegen id : 3] -Output [1]: [i_manufact_id#13 AS i_manufact_id#13#15] -Input [2]: [i_category#14, i_manufact_id#13] - -(25) BroadcastExchange -Input [1]: [i_manufact_id#13#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(26) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_manufact_id#13] -Right keys [1]: [i_manufact_id#13#15] -Join condition: None - -(27) BroadcastExchange -Input [2]: [i_item_sk#12, i_manufact_id#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] - -(28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#12] -Join condition: None - -(29) Project [codegen id : 5] -Output [2]: [ss_ext_sales_price#4, i_manufact_id#13] -Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_manufact_id#13] - -(30) HashAggregate [codegen id : 5] -Input [2]: [ss_ext_sales_price#4, i_manufact_id#13] -Keys [1]: [i_manufact_id#13] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [1]: [sum#18] -Results [2]: [i_manufact_id#13, sum#19] - -(31) Exchange -Input [2]: [i_manufact_id#13, sum#19] -Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#20] - -(32) HashAggregate [codegen id : 6] -Input [2]: [i_manufact_id#13, sum#19] -Keys [1]: [i_manufact_id#13] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] -Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] - -(33) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] -ReadSchema: struct - -(34) ColumnarToRow [codegen id : 11] -Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] - -(35) Filter [codegen id : 11] -Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] -Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) - -(36) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(37) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#23] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(38) Project [codegen id : 11] -Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] -Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] - -(39) ReusedExchange [Reuses operator id: 15] -Output [1]: [ca_address_sk#9] - -(40) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_addr_sk#24] -Right keys [1]: [ca_address_sk#9] -Join condition: None - -(41) Project [codegen id : 11] -Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] -Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] - -(42) ReusedExchange [Reuses operator id: 27] -Output [2]: [i_item_sk#12, i_manufact_id#13] - -(43) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#25] -Right keys [1]: [i_item_sk#12] -Join condition: None - -(44) Project [codegen id : 11] -Output [2]: [cs_ext_sales_price#26, i_manufact_id#13] -Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_manufact_id#13] - -(45) HashAggregate [codegen id : 11] -Input [2]: [cs_ext_sales_price#26, i_manufact_id#13] -Keys [1]: [i_manufact_id#13] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] -Aggregate Attributes [1]: [sum#27] -Results [2]: [i_manufact_id#13, sum#28] - -(46) Exchange -Input [2]: [i_manufact_id#13, sum#28] -Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#29] - -(47) HashAggregate [codegen id : 12] -Input [2]: [i_manufact_id#13, sum#28] -Keys [1]: [i_manufact_id#13] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] -Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] - -(48) Scan parquet default.web_sales -Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] -ReadSchema: struct - -(49) ColumnarToRow [codegen id : 17] -Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] - -(50) Filter [codegen id : 17] -Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] -Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) - -(51) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(52) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#32] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(53) Project [codegen id : 17] -Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] -Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] - -(54) ReusedExchange [Reuses operator id: 15] -Output [1]: [ca_address_sk#9] - -(55) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_bill_addr_sk#34] -Right keys [1]: [ca_address_sk#9] -Join condition: None - -(56) Project [codegen id : 17] -Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] -Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] - -(57) ReusedExchange [Reuses operator id: 27] -Output [2]: [i_item_sk#12, i_manufact_id#13] - -(58) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#33] -Right keys [1]: [i_item_sk#12] -Join condition: None - -(59) Project [codegen id : 17] -Output [2]: [ws_ext_sales_price#35, i_manufact_id#13] -Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_manufact_id#13] - -(60) HashAggregate [codegen id : 17] -Input [2]: [ws_ext_sales_price#35, i_manufact_id#13] -Keys [1]: [i_manufact_id#13] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] -Aggregate Attributes [1]: [sum#36] -Results [2]: [i_manufact_id#13, sum#37] - -(61) Exchange -Input [2]: [i_manufact_id#13, sum#37] -Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#38] - -(62) HashAggregate [codegen id : 18] -Input [2]: [i_manufact_id#13, sum#37] -Keys [1]: [i_manufact_id#13] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] -Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] - -(63) Union - -(64) HashAggregate [codegen id : 19] -Input [2]: [i_manufact_id#13, total_sales#22] -Keys [1]: [i_manufact_id#13] -Functions [1]: [partial_sum(total_sales#22)] -Aggregate Attributes [2]: [sum#41, isEmpty#42] -Results [3]: [i_manufact_id#13, sum#43, isEmpty#44] - -(65) Exchange -Input [3]: [i_manufact_id#13, sum#43, isEmpty#44] -Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#45] - -(66) HashAggregate [codegen id : 20] -Input [3]: [i_manufact_id#13, sum#43, isEmpty#44] -Keys [1]: [i_manufact_id#13] -Functions [1]: [sum(total_sales#22)] -Aggregate Attributes [1]: [sum(total_sales#22)#46] -Results [2]: [i_manufact_id#13, sum(total_sales#22)#46 AS total_sales#47] - -(67) TakeOrderedAndProject -Input [2]: [i_manufact_id#13, total_sales#47] -Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_manufact_id#13, total_sales#47] - +TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_manufact_id#2,total_sales#1]) ++- *(20) HashAggregate(keys=[i_manufact_id#2], functions=[sum(total_sales#3)]) + +- Exchange hashpartitioning(i_manufact_id#2, 5) + +- *(19) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(total_sales#3)]) + +- Union + :- *(6) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange hashpartitioning(i_manufact_id#2, 5) + : +- *(5) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(5) Project [ss_ext_sales_price#4, i_manufact_id#2] + : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#10] + : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 5)) && isnotnull(d_date_sk#10)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [ca_address_sk#8] + : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) BroadcastHashJoin [i_manufact_id#2], [i_manufact_id#2#14], LeftSemi, BuildRight + : :- *(4) Project [i_item_sk#6, i_manufact_id#2] + : : +- *(4) Filter isnotnull(i_item_sk#6) + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_manufact_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_manufact_id#2 AS i_manufact_id#2#14] + : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Electronics)) + : +- *(3) FileScan parquet default.item[i_category#15,i_manufact_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)], ReadSchema: struct + :- *(12) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- Exchange hashpartitioning(i_manufact_id#2, 5) + : +- *(11) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- *(11) Project [cs_ext_sales_price#16, i_manufact_id#2] + : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight + : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] + : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight + : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(18) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) + +- Exchange hashpartitioning(i_manufact_id#2, 5) + +- *(17) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) + +- *(17) Project [ws_ext_sales_price#20, i_manufact_id#2] + +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight + :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] + : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight + : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight + : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt index 14787f0bb..28a14a1ea 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt @@ -1,101 +1,91 @@ -TakeOrderedAndProject [total_sales,i_manufact_id] - WholeStageCodegen (20) - HashAggregate [i_manufact_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] +TakeOrderedAndProject [i_manufact_id,total_sales] + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] InputAdapter Exchange [i_manufact_id] #1 - WholeStageCodegen (19) - HashAggregate [i_manufact_id,total_sales] [sum,isEmpty,sum,isEmpty] + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum,total_sales] [sum,sum] InputAdapter Union - WholeStageCodegen (6) - HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] InputAdapter Exchange [i_manufact_id] #2 - WholeStageCodegen (5) - HashAggregate [i_manufact_id,ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price,i_manufact_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + WholeStageCodegen + HashAggregate [i_manufact_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [i_manufact_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + Filter [ca_address_sk,ca_gmt_offset] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen BroadcastHashJoin [i_manufact_id,i_manufact_id] - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_manufact_id] + Project [i_item_sk,i_manufact_id] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #6 - WholeStageCodegen (3) + WholeStageCodegen Project [i_manufact_id] Filter [i_category] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_category,i_manufact_id] - WholeStageCodegen (12) - HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + Scan parquet default.item [i_category,i_manufact_id] [i_category,i_manufact_id] + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] InputAdapter Exchange [i_manufact_id] #7 - WholeStageCodegen (11) - HashAggregate [i_manufact_id,cs_ext_sales_price] [sum,sum] + WholeStageCodegen + HashAggregate [cs_ext_sales_price,i_manufact_id,sum,sum] [sum,sum] Project [cs_ext_sales_price,i_manufact_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_ext_sales_price] - BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] - Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] #4 + ReusedExchange [ca_address_sk] [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_manufact_id] #5 - WholeStageCodegen (18) - HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + ReusedExchange [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] #5 + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] InputAdapter Exchange [i_manufact_id] #8 - WholeStageCodegen (17) - HashAggregate [i_manufact_id,ws_ext_sales_price] [sum,sum] - Project [ws_ext_sales_price,i_manufact_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + WholeStageCodegen + HashAggregate [i_manufact_id,sum,sum,ws_ext_sales_price] [sum,sum] + Project [i_manufact_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] #4 + ReusedExchange [ca_address_sk] [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_manufact_id] #5 + ReusedExchange [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt index 18f465cae..30008ef45 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt @@ -1,203 +1,34 @@ == Physical Plan == -* Sort (36) -+- Exchange (35) - +- * Project (34) - +- * BroadcastHashJoin Inner BuildRight (33) - :- * Filter (28) - : +- * HashAggregate (27) - : +- Exchange (26) - : +- * HashAggregate (25) - : +- * Project (24) - : +- * BroadcastHashJoin Inner BuildRight (23) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.store (11) - : +- BroadcastExchange (22) - : +- * Project (21) - : +- * Filter (20) - : +- * ColumnarToRow (19) - : +- Scan parquet default.household_demographics (18) - +- BroadcastExchange (32) - +- * Filter (31) - +- * ColumnarToRow (30) - +- Scan parquet default.customer (29) - - -(1) Scan parquet default.store_sales -Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] - -(3) Filter [codegen id : 4] -Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] -Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#7, d_dom#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#7, d_dom#8] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#7, d_dom#8] -Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#6] -Input [3]: [d_date_sk#6, d_year#7, d_dom#8] - -(8) BroadcastExchange -Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#6] -Join condition: None - -(10) Project [codegen id : 4] -Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] -Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] - -(11) Scan parquet default.store -Output [2]: [s_store_sk#10, s_county#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [s_store_sk#10, s_county#11] - -(13) Filter [codegen id : 2] -Input [2]: [s_store_sk#10, s_county#11] -Condition : ((isnotnull(s_county#11) AND (s_county#11 = Williamson County)) AND isnotnull(s_store_sk#10)) - -(14) Project [codegen id : 2] -Output [1]: [s_store_sk#10] -Input [2]: [s_store_sk#10, s_county#11] - -(15) BroadcastExchange -Input [1]: [s_store_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#10] -Join condition: None - -(17) Project [codegen id : 4] -Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] -Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] - -(18) Scan parquet default.household_demographics -Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] - -(20) Filter [codegen id : 3] -Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) - -(21) Project [codegen id : 3] -Output [1]: [hd_demo_sk#13] -Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] - -(22) BroadcastExchange -Input [1]: [hd_demo_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] - -(23) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#13] -Join condition: None - -(24) Project [codegen id : 4] -Output [2]: [ss_customer_sk#2, ss_ticket_number#5] -Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] - -(25) HashAggregate [codegen id : 4] -Input [2]: [ss_customer_sk#2, ss_ticket_number#5] -Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#18] -Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] - -(26) Exchange -Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] - -(27) HashAggregate [codegen id : 6] -Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#21] -Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] - -(28) Filter [codegen id : 6] -Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] -Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) - -(29) Scan parquet default.customer -Output [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(30) ColumnarToRow [codegen id : 5] -Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] - -(31) Filter [codegen id : 5] -Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] -Condition : isnotnull(c_customer_sk#23) - -(32) BroadcastExchange -Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] - -(33) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#23] -Join condition: None - -(34) Project [codegen id : 6] -Output [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] - -(35) Exchange -Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), true, [id=#29] - -(36) Sort [codegen id : 7] -Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: [c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST], true, 0 - +*(7) Sort [c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST], true, 0 ++- Exchange rangepartitioning(c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST, 5) + +- *(6) Project [c_last_name#1, c_first_name#2, c_salutation#3, c_preferred_cust_flag#4, ss_ticket_number#5, cnt#6] + +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight + :- *(6) Filter ((cnt#6 >= 15) && (cnt#6 <= 20)) + : +- *(6) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[count(1)]) + : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#7, 5) + : +- *(4) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[partial_count(1)]) + : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#5] + : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight + : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_ticket_number#5] + : : +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight + : : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#5] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#5] + : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#14] + : : : +- *(1) Filter (((((d_dom#15 >= 1) && (d_dom#15 <= 3)) || ((d_dom#15 >= 25) && (d_dom#15 <= 28))) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),Le..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#12] + : : +- *(2) Filter ((isnotnull(s_county#17) && (s_county#17 = Williamson County)) && isnotnull(s_store_sk#12)) + : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [hd_demo_sk#10] + : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.2)) && isnotnull(hd_demo_sk#10)) + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [c_customer_sk#8, c_salutation#3, c_first_name#2, c_last_name#1, c_preferred_cust_flag#4] + +- *(5) Filter isnotnull(c_customer_sk#8) + +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#3,c_first_name#2,c_last_name#1,c_preferred_cust_flag#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 9] -Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] - -(3) Filter [codegen id : 9] -Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] -Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) - -(4) Scan parquet default.store_sales -Output [2]: [ss_sold_date_sk#6, ss_customer_sk#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 2] -Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] - -(6) Filter [codegen id : 2] -Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] -Condition : isnotnull(ss_sold_date_sk#6) - -(7) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] -ReadSchema: struct - -(8) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] - -(9) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] -Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) - -(10) Project [codegen id : 1] -Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] - -(11) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] - -(12) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(13) Project [codegen id : 2] -Output [1]: [ss_customer_sk#7] -Input [3]: [ss_sold_date_sk#6, ss_customer_sk#7, d_date_sk#8] - -(14) BroadcastExchange -Input [1]: [ss_customer_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(15) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ss_customer_sk#7] -Join condition: None - -(16) Scan parquet default.web_sales -Output [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 4] -Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] - -(18) Filter [codegen id : 4] -Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] -Condition : isnotnull(ws_sold_date_sk#13) - -(19) ReusedExchange [Reuses operator id: 11] -Output [1]: [d_date_sk#8] - -(20) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_sold_date_sk#13] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(21) Project [codegen id : 4] -Output [1]: [ws_bill_customer_sk#14] -Input [3]: [ws_sold_date_sk#13, ws_bill_customer_sk#14, d_date_sk#8] - -(22) BroadcastExchange -Input [1]: [ws_bill_customer_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] - -(23) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ws_bill_customer_sk#14] -Join condition: None - -(24) Scan parquet default.catalog_sales -Output [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(25) ColumnarToRow [codegen id : 6] -Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] - -(26) Filter [codegen id : 6] -Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] -Condition : isnotnull(cs_sold_date_sk#16) - -(27) ReusedExchange [Reuses operator id: 11] -Output [1]: [d_date_sk#8] - -(28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#16] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(29) Project [codegen id : 6] -Output [1]: [cs_ship_customer_sk#17] -Input [3]: [cs_sold_date_sk#16, cs_ship_customer_sk#17, d_date_sk#8] - -(30) BroadcastExchange -Input [1]: [cs_ship_customer_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] - -(31) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_customer_sk#3] -Right keys [1]: [cs_ship_customer_sk#17] -Join condition: None - -(32) Filter [codegen id : 9] -Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] -Condition : (exists#2 OR exists#1) - -(33) Project [codegen id : 9] -Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] -Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] - -(34) Scan parquet default.customer_address -Output [2]: [ca_address_sk#19, ca_state#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk)] -ReadSchema: struct - -(35) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_state#20] - -(36) Filter [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_state#20] -Condition : isnotnull(ca_address_sk#19) - -(37) BroadcastExchange -Input [2]: [ca_address_sk#19, ca_state#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] - -(38) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_current_addr_sk#5] -Right keys [1]: [ca_address_sk#19] -Join condition: None - -(39) Project [codegen id : 9] -Output [2]: [c_current_cdemo_sk#4, ca_state#20] -Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#19, ca_state#20] - -(40) Scan parquet default.customer_demographics -Output [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(41) ColumnarToRow [codegen id : 8] -Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] - -(42) Filter [codegen id : 8] -Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Condition : isnotnull(cd_demo_sk#22) - -(43) BroadcastExchange -Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] - -(44) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_current_cdemo_sk#4] -Right keys [1]: [cd_demo_sk#22] -Join condition: None - -(45) Project [codegen id : 9] -Output [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Input [8]: [c_current_cdemo_sk#4, ca_state#20, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] - -(46) HashAggregate [codegen id : 9] -Input [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Functions [10]: [partial_count(1), partial_min(cd_dep_count#25), partial_max(cd_dep_count#25), partial_avg(cast(cd_dep_count#25 as bigint)), partial_min(cd_dep_employed_count#26), partial_max(cd_dep_employed_count#26), partial_avg(cast(cd_dep_employed_count#26 as bigint)), partial_min(cd_dep_college_count#27), partial_max(cd_dep_college_count#27), partial_avg(cast(cd_dep_college_count#27 as bigint))] -Aggregate Attributes [13]: [count#29, min#30, max#31, sum#32, count#33, min#34, max#35, sum#36, count#37, min#38, max#39, sum#40, count#41] -Results [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] - -(47) Exchange -Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] -Arguments: hashpartitioning(ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), true, [id=#55] - -(48) HashAggregate [codegen id : 10] -Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] -Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Functions [10]: [count(1), min(cd_dep_count#25), max(cd_dep_count#25), avg(cast(cd_dep_count#25 as bigint)), min(cd_dep_employed_count#26), max(cd_dep_employed_count#26), avg(cast(cd_dep_employed_count#26 as bigint)), min(cd_dep_college_count#27), max(cd_dep_college_count#27), avg(cast(cd_dep_college_count#27 as bigint))] -Aggregate Attributes [10]: [count(1)#56, min(cd_dep_count#25)#57, max(cd_dep_count#25)#58, avg(cast(cd_dep_count#25 as bigint))#59, min(cd_dep_employed_count#26)#60, max(cd_dep_employed_count#26)#61, avg(cast(cd_dep_employed_count#26 as bigint))#62, min(cd_dep_college_count#27)#63, max(cd_dep_college_count#27)#64, avg(cast(cd_dep_college_count#27 as bigint))#65] -Results [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, count(1)#56 AS cnt1#66, min(cd_dep_count#25)#57 AS min(cd_dep_count)#67, max(cd_dep_count#25)#58 AS max(cd_dep_count)#68, avg(cast(cd_dep_count#25 as bigint))#59 AS avg(cd_dep_count)#69, cd_dep_employed_count#26, count(1)#56 AS cnt2#70, min(cd_dep_employed_count#26)#60 AS min(cd_dep_employed_count)#71, max(cd_dep_employed_count#26)#61 AS max(cd_dep_employed_count)#72, avg(cast(cd_dep_employed_count#26 as bigint))#62 AS avg(cd_dep_employed_count)#73, cd_dep_college_count#27, count(1)#56 AS cnt3#74, min(cd_dep_college_count#27)#63 AS min(cd_dep_college_count)#75, max(cd_dep_college_count#27)#64 AS max(cd_dep_college_count)#76, avg(cast(cd_dep_college_count#27 as bigint))#65 AS avg(cd_dep_college_count)#77, cd_dep_count#25 AS aggOrder#78] - -(49) TakeOrderedAndProject -Input [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, cnt1#66, min(cd_dep_count)#67, max(cd_dep_count)#68, avg(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, min(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, avg(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, min(cd_dep_college_count)#75, max(cd_dep_college_count)#76, avg(cd_dep_college_count)#77, aggOrder#78] -Arguments: 100, [ca_state#20 ASC NULLS FIRST, cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, aggOrder#78 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [ca_state#20, cd_gender#23, cd_marital_status#24, cnt1#66, min(cd_dep_count)#67, max(cd_dep_count)#68, avg(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, min(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, avg(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, min(cd_dep_college_count)#75, max(cd_dep_college_count)#76, avg(cd_dep_college_count)#77] - +TakeOrderedAndProject(limit=100, orderBy=[ca_state#1 ASC NULLS FIRST,cd_gender#2 ASC NULLS FIRST,cd_marital_status#3 ASC NULLS FIRST,aggOrder#4 ASC NULLS FIRST,cd_dep_employed_count#5 ASC NULLS FIRST,cd_dep_college_count#6 ASC NULLS FIRST], output=[ca_state#1,cd_gender#2,cd_marital_status#3,cnt1#7,min(cd_dep_count)#8,max(cd_dep_count)#9,avg(cd_dep_count)#10,cd_dep_employed_count#5,cnt2#11,min(cd_dep_employed_count)#12,max(cd_dep_employed_count)#13,avg(cd_dep_employed_count)#14,cd_dep_college_count#6,cnt3#15,min(cd_dep_college_count)#16,max(cd_dep_college_count)#17,avg(cd_dep_college_count)#18]) ++- *(10) HashAggregate(keys=[ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6], functions=[count(1), min(cd_dep_count#19), max(cd_dep_count#19), avg(cast(cd_dep_count#19 as bigint)), min(cd_dep_employed_count#5), max(cd_dep_employed_count#5), avg(cast(cd_dep_employed_count#5 as bigint)), min(cd_dep_college_count#6), max(cd_dep_college_count#6), avg(cast(cd_dep_college_count#6 as bigint))]) + +- Exchange hashpartitioning(ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6, 5) + +- *(9) HashAggregate(keys=[ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6], functions=[partial_count(1), partial_min(cd_dep_count#19), partial_max(cd_dep_count#19), partial_avg(cast(cd_dep_count#19 as bigint)), partial_min(cd_dep_employed_count#5), partial_max(cd_dep_employed_count#5), partial_avg(cast(cd_dep_employed_count#5 as bigint)), partial_min(cd_dep_college_count#6), partial_max(cd_dep_college_count#6), partial_avg(cast(cd_dep_college_count#6 as bigint))]) + +- *(9) Project [ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6] + +- *(9) BroadcastHashJoin [c_current_cdemo_sk#20], [cd_demo_sk#21], Inner, BuildRight + :- *(9) Project [c_current_cdemo_sk#20, ca_state#1] + : +- *(9) BroadcastHashJoin [c_current_addr_sk#22], [ca_address_sk#23], Inner, BuildRight + : :- *(9) Project [c_current_cdemo_sk#20, c_current_addr_sk#22] + : : +- *(9) Filter (exists#24 || exists#25) + : : +- *(9) BroadcastHashJoin [c_customer_sk#26], [cs_ship_customer_sk#27], ExistenceJoin(exists#25), BuildRight + : : :- *(9) BroadcastHashJoin [c_customer_sk#26], [ws_bill_customer_sk#28], ExistenceJoin(exists#24), BuildRight + : : : :- *(9) BroadcastHashJoin [c_customer_sk#26], [ss_customer_sk#29], LeftSemi, BuildRight + : : : : :- *(9) Project [c_customer_sk#26, c_current_cdemo_sk#20, c_current_addr_sk#22] + : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#22) && isnotnull(c_current_cdemo_sk#20)) + : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#26,c_current_cdemo_sk#20,c_current_addr_sk#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [ss_customer_sk#29] + : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight + : : : : :- *(2) Project [ss_sold_date_sk#30, ss_customer_sk#29] + : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#30) + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_customer_sk#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#31] + : : : : +- *(1) Filter ((((isnotnull(d_year#32) && isnotnull(d_qoy#33)) && (d_year#32 = 2002)) && (d_qoy#33 < 4)) && isnotnull(d_date_sk#31)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#31,d_year#32,d_qoy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_bill_customer_sk#28] + : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#34], [d_date_sk#31], Inner, BuildRight + : : : :- *(4) Project [ws_sold_date_sk#34, ws_bill_customer_sk#28] + : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#34) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#34,ws_bill_customer_sk#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [cs_ship_customer_sk#27] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#35], [d_date_sk#31], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#35, cs_ship_customer_sk#27] + : : : +- *(6) Filter isnotnull(cs_sold_date_sk#35) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#35,cs_ship_customer_sk#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#23, ca_state#1] + : +- *(7) Filter isnotnull(ca_address_sk#23) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#23,ca_state#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [cd_demo_sk#21, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6] + +- *(8) Filter isnotnull(cd_demo_sk#21) + +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_gender#2,cd_marital_status#3,cd_dep_count#19,cd_dep_employed_count#5,cd_dep_college_count#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] - -(3) Filter [codegen id : 4] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] -Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_store_sk#3)) - -(4) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_year#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_year#7] - -(6) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_year#7] -Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_year#7] - -(8) BroadcastExchange -Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#6] -Join condition: None - -(10) Project [codegen id : 4] -Output [4]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, d_date_sk#6] - -(11) Scan parquet default.item -Output [3]: [i_item_sk#9, i_class#10, i_category#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [i_item_sk#9, i_class#10, i_category#11] - -(13) Filter [codegen id : 2] -Input [3]: [i_item_sk#9, i_class#10, i_category#11] -Condition : isnotnull(i_item_sk#9) - -(14) BroadcastExchange -Input [3]: [i_item_sk#9, i_class#10, i_category#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] - -(15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#9] -Join condition: None - -(16) Project [codegen id : 4] -Output [5]: [ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_class#10, i_category#11] -Input [7]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_item_sk#9, i_class#10, i_category#11] - -(17) Scan parquet default.store -Output [2]: [s_store_sk#13, s_state#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#13, s_state#14] - -(19) Filter [codegen id : 3] -Input [2]: [s_store_sk#13, s_state#14] -Condition : ((isnotnull(s_state#14) AND (s_state#14 = TN)) AND isnotnull(s_store_sk#13)) - -(20) Project [codegen id : 3] -Output [1]: [s_store_sk#13] -Input [2]: [s_store_sk#13, s_state#14] - -(21) BroadcastExchange -Input [1]: [s_store_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] - -(22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#13] -Join condition: None - -(23) Project [codegen id : 4] -Output [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#11, i_class#10] -Input [6]: [ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_class#10, i_category#11, s_store_sk#13] - -(24) Expand [codegen id : 4] -Input [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#11, i_class#10] -Arguments: [List(ss_ext_sales_price#4, ss_net_profit#5, i_category#11, i_class#10, 0), List(ss_ext_sales_price#4, ss_net_profit#5, i_category#11, null, 1), List(ss_ext_sales_price#4, ss_net_profit#5, null, null, 3)], [ss_ext_sales_price#4, ss_net_profit#5, i_category#16, i_class#17, spark_grouping_id#18] - -(25) HashAggregate [codegen id : 4] -Input [5]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#16, i_class#17, spark_grouping_id#18] -Keys [3]: [i_category#16, i_class#17, spark_grouping_id#18] -Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [2]: [sum#19, sum#20] -Results [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] - -(26) Exchange -Input [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] -Arguments: hashpartitioning(i_category#16, i_class#17, spark_grouping_id#18, 5), true, [id=#23] - -(27) HashAggregate [codegen id : 5] -Input [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] -Keys [3]: [i_category#16, i_class#17, spark_grouping_id#18] -Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#24, sum(UnscaledValue(ss_ext_sales_price#4))#25] -Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#24,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#25,17,2))), DecimalType(37,20), true) AS gross_margin#26, i_category#16, i_class#17, (cast((shiftright(spark_grouping_id#18, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint)) AS lochierarchy#27, (cast((shiftright(spark_grouping_id#18, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint)) AS _w1#28, CASE WHEN (cast(cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint) as int) = 0) THEN i_category#16 END AS _w2#29, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#24,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#25,17,2))), DecimalType(37,20), true) AS _w3#30] - -(28) Exchange -Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] -Arguments: hashpartitioning(_w1#28, _w2#29, 5), true, [id=#31] - -(29) Sort [codegen id : 6] -Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] -Arguments: [_w1#28 ASC NULLS FIRST, _w2#29 ASC NULLS FIRST, _w3#30 ASC NULLS FIRST], false, 0 - -(30) Window -Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] -Arguments: [rank(_w3#30) windowspecdefinition(_w1#28, _w2#29, _w3#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#32], [_w1#28, _w2#29], [_w3#30 ASC NULLS FIRST] - -(31) Project [codegen id : 7] -Output [5]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] -Input [8]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30, rank_within_parent#32] - -(32) TakeOrderedAndProject -Input [5]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] -Arguments: 100, [lochierarchy#27 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#27 as int) = 0) THEN i_category#16 END ASC NULLS FIRST, rank_within_parent#32 ASC NULLS FIRST], [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] - +TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN i_category#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[gross_margin#4,i_category#2,i_class#5,lochierarchy#1,rank_within_parent#3]) ++- *(7) Project [gross_margin#4, i_category#2, i_class#5, lochierarchy#1, rank_within_parent#3] + +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 ASC NULLS FIRST] + +- *(6) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(_w1#7, _w2#8, 5) + +- *(5) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ss_net_profit#10)), sum(UnscaledValue(ss_ext_sales_price#11))]) + +- Exchange hashpartitioning(i_category#2, i_class#5, spark_grouping_id#9, 5) + +- *(4) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ss_net_profit#10)), partial_sum(UnscaledValue(ss_ext_sales_price#11))]) + +- *(4) Expand [List(ss_ext_sales_price#11, ss_net_profit#10, i_category#12, i_class#13, 0), List(ss_ext_sales_price#11, ss_net_profit#10, i_category#12, null, 1), List(ss_ext_sales_price#11, ss_net_profit#10, null, null, 3)], [ss_ext_sales_price#11, ss_net_profit#10, i_category#2, i_class#5, spark_grouping_id#9] + +- *(4) Project [ss_ext_sales_price#11, ss_net_profit#10, i_category#14 AS i_category#12, i_class#15 AS i_class#13] + +- *(4) BroadcastHashJoin [ss_store_sk#16], [s_store_sk#17], Inner, BuildRight + :- *(4) Project [ss_store_sk#16, ss_ext_sales_price#11, ss_net_profit#10, i_class#15, i_category#14] + : +- *(4) BroadcastHashJoin [ss_item_sk#18], [i_item_sk#19], Inner, BuildRight + : :- *(4) Project [ss_item_sk#18, ss_store_sk#16, ss_ext_sales_price#11, ss_net_profit#10] + : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : :- *(4) Project [ss_sold_date_sk#20, ss_item_sk#18, ss_store_sk#16, ss_ext_sales_price#11, ss_net_profit#10] + : : : +- *(4) Filter ((isnotnull(ss_sold_date_sk#20) && isnotnull(ss_item_sk#18)) && isnotnull(ss_store_sk#16)) + : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_item_sk#18,ss_store_sk#16,ss_ext_sales_price#11,ss_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [i_item_sk#19, i_class#15, i_category#14] + : +- *(2) Filter isnotnull(i_item_sk#19) + : +- *(2) FileScan parquet default.item[i_item_sk#19,i_class#15,i_category#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#17] + +- *(3) Filter ((isnotnull(s_state#23) && (s_state#23 = TN)) && isnotnull(s_store_sk#17)) + +- *(3) FileScan parquet default.store[s_store_sk#17,s_state#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt index d4a081452..8d87de2ff 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt @@ -1,49 +1,43 @@ -TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] - WholeStageCodegen (7) +TakeOrderedAndProject [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] + WholeStageCodegen Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] InputAdapter - Window [_w3,_w1,_w2] - WholeStageCodegen (6) + Window [_w1,_w2,_w3] + WholeStageCodegen Sort [_w1,_w2,_w3] InputAdapter Exchange [_w1,_w2] #1 - WholeStageCodegen (5) - HashAggregate [i_category,i_class,spark_grouping_id,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,lochierarchy,_w1,_w2,_w3,sum,sum] + WholeStageCodegen + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] [_w1,_w2,_w3,gross_margin,lochierarchy,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] InputAdapter Exchange [i_category,i_class,spark_grouping_id] #2 - WholeStageCodegen (4) - HashAggregate [i_category,i_class,spark_grouping_id,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum] - Expand [ss_ext_sales_price,ss_net_profit,i_category,i_class] - Project [ss_ext_sales_price,ss_net_profit,i_category,i_class] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + WholeStageCodegen + HashAggregate [i_category,i_class,spark_grouping_id,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] + Expand [i_category,i_class,ss_ext_sales_price,ss_net_profit] + Project [i_category,i_class,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [i_category,i_class,ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_class,i_category] + WholeStageCodegen + Project [i_category,i_class,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) + WholeStageCodegen Project [s_store_sk] Filter [s_state,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_state] + Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt index 896d6c571..9ffa2c5ee 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt @@ -1,160 +1,26 @@ == Physical Plan == -TakeOrderedAndProject (28) -+- * HashAggregate (27) - +- Exchange (26) - +- * HashAggregate (25) - +- * Project (24) - +- * BroadcastHashJoin Inner BuildRight (23) - :- * Project (18) - : +- * BroadcastHashJoin Inner BuildRight (17) - : :- * Project (11) - : : +- * BroadcastHashJoin Inner BuildRight (10) - : : :- * Project (4) - : : : +- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.item (1) - : : +- BroadcastExchange (9) - : : +- * Project (8) - : : +- * Filter (7) - : : +- * ColumnarToRow (6) - : : +- Scan parquet default.inventory (5) - : +- BroadcastExchange (16) - : +- * Project (15) - : +- * Filter (14) - : +- * ColumnarToRow (13) - : +- Scan parquet default.date_dim (12) - +- BroadcastExchange (22) - +- * Filter (21) - +- * ColumnarToRow (20) - +- Scan parquet default.catalog_sales (19) - - -(1) Scan parquet default.item -Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), In(i_manufact_id, [677,940,694,808]), IsNotNull(i_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] - -(3) Filter [codegen id : 4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] -Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 68.00)) AND (cast(i_current_price#4 as decimal(12,2)) <= 98.00)) AND i_manufact_id#5 IN (677,940,694,808)) AND isnotnull(i_item_sk#1)) - -(4) Project [codegen id : 4] -Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] - -(5) Scan parquet default.inventory -Output [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/inventory] -PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] - -(7) Filter [codegen id : 1] -Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] -Condition : ((((isnotnull(inv_quantity_on_hand#8) AND (inv_quantity_on_hand#8 >= 100)) AND (inv_quantity_on_hand#8 <= 500)) AND isnotnull(inv_item_sk#7)) AND isnotnull(inv_date_sk#6)) - -(8) Project [codegen id : 1] -Output [2]: [inv_date_sk#6, inv_item_sk#7] -Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] - -(9) BroadcastExchange -Input [2]: [inv_date_sk#6, inv_item_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#9] - -(10) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_sk#1] -Right keys [1]: [inv_item_sk#7] -Join condition: None - -(11) Project [codegen id : 4] -Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6] -Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, inv_item_sk#7] - -(12) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_date#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), IsNotNull(d_date_sk)] -ReadSchema: struct - -(13) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#10, d_date#11] - -(14) Filter [codegen id : 2] -Input [2]: [d_date_sk#10, d_date#11] -Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 10988)) AND (d_date#11 <= 11048)) AND isnotnull(d_date_sk#10)) - -(15) Project [codegen id : 2] -Output [1]: [d_date_sk#10] -Input [2]: [d_date_sk#10, d_date#11] - -(16) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(17) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_date_sk#6] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(18) Project [codegen id : 4] -Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, d_date_sk#10] - -(19) Scan parquet default.catalog_sales -Output [1]: [cs_item_sk#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk)] -ReadSchema: struct - -(20) ColumnarToRow [codegen id : 3] -Input [1]: [cs_item_sk#13] - -(21) Filter [codegen id : 3] -Input [1]: [cs_item_sk#13] -Condition : isnotnull(cs_item_sk#13) - -(22) BroadcastExchange -Input [1]: [cs_item_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] - -(23) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_sk#1] -Right keys [1]: [cs_item_sk#13] -Join condition: None - -(24) Project [codegen id : 4] -Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#13] - -(25) HashAggregate [codegen id : 4] -Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] - -(26) Exchange -Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), true, [id=#15] - -(27) HashAggregate [codegen id : 5] -Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] - -(28) TakeOrderedAndProject -Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] - +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,i_current_price#3]) ++- *(5) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, i_current_price#3, 5) + +- *(4) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) + +- *(4) Project [i_item_id#1, i_item_desc#2, i_current_price#3] + +- *(4) BroadcastHashJoin [i_item_sk#4], [cs_item_sk#5], Inner, BuildRight + :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] + : +- *(4) BroadcastHashJoin [inv_date_sk#6], [d_date_sk#7], Inner, BuildRight + : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3, inv_date_sk#6] + : : +- *(4) BroadcastHashJoin [i_item_sk#4], [inv_item_sk#8], Inner, BuildRight + : : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] + : : : +- *(4) Filter ((((isnotnull(i_current_price#3) && (i_current_price#3 >= 68.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 98.00)) && i_manufact_id#9 IN (677,940,694,808)) && isnotnull(i_item_sk#4)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), In(i_manufact_id, [677,94..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) + : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#7] + : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 10988)) && (d_date#11 <= 11048)) && isnotnull(d_date_sk#7)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), Is..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [cs_item_sk#5] + +- *(3) Filter isnotnull(cs_item_sk#5) + +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt index 6d3216fff..23745be9e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt @@ -1,41 +1,34 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] - WholeStageCodegen (5) - HashAggregate [i_item_id,i_item_desc,i_current_price] +TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] + WholeStageCodegen + HashAggregate [i_current_price,i_item_desc,i_item_id] InputAdapter - Exchange [i_item_id,i_item_desc,i_current_price] #1 - WholeStageCodegen (4) - HashAggregate [i_item_id,i_item_desc,i_current_price] - Project [i_item_id,i_item_desc,i_current_price] - BroadcastHashJoin [i_item_sk,cs_item_sk] - Project [i_item_sk,i_item_id,i_item_desc,i_current_price] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + Exchange [i_current_price,i_item_desc,i_item_id] #1 + WholeStageCodegen + HashAggregate [i_current_price,i_item_desc,i_item_id] + Project [i_current_price,i_item_desc,i_item_id] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [i_item_sk,i_item_id,i_item_desc,i_current_price] - Filter [i_current_price,i_manufact_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk,i_manufact_id] + Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [inv_date_sk,inv_item_sk] - Filter [inv_quantity_on_hand,inv_item_sk,inv_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [cs_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_item_sk] + WholeStageCodegen + Project [cs_item_sk] + Filter [cs_item_sk] + Scan parquet default.catalog_sales [cs_item_sk] [cs_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt index 74454cf32..8072eda0e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt @@ -1,323 +1,55 @@ == Physical Plan == -* HashAggregate (54) -+- Exchange (53) - +- * HashAggregate (52) - +- * HashAggregate (51) - +- * HashAggregate (50) - +- * HashAggregate (49) - +- * HashAggregate (48) - +- * HashAggregate (47) - +- Exchange (46) - +- * HashAggregate (45) - +- * BroadcastHashJoin LeftSemi BuildRight (44) - :- * BroadcastHashJoin LeftSemi BuildRight (30) - : :- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.customer (11) - : +- BroadcastExchange (29) - : +- * HashAggregate (28) - : +- Exchange (27) - : +- * HashAggregate (26) - : +- * Project (25) - : +- * BroadcastHashJoin Inner BuildRight (24) - : :- * Project (22) - : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.catalog_sales (17) - : : +- ReusedExchange (20) - : +- ReusedExchange (23) - +- BroadcastExchange (43) - +- * HashAggregate (42) - +- Exchange (41) - +- * HashAggregate (40) - +- * Project (39) - +- * BroadcastHashJoin Inner BuildRight (38) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) - : :- * Filter (33) - : : +- * ColumnarToRow (32) - : : +- Scan parquet default.web_sales (31) - : +- ReusedExchange (34) - +- ReusedExchange (37) - - -(1) Scan parquet default.store_sales -Output [2]: [ss_sold_date_sk#1, ss_customer_sk#2] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 11] -Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] - -(3) Filter [codegen id : 11] -Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] -Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_customer_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] -Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) - -(7) Project [codegen id : 1] -Output [2]: [d_date_sk#3, d_date#4] -Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] - -(8) BroadcastExchange -Input [2]: [d_date_sk#3, d_date#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] - -(9) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#3] -Join condition: None - -(10) Project [codegen id : 11] -Output [2]: [ss_customer_sk#2, d_date#4] -Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, d_date_sk#3, d_date#4] - -(11) Scan parquet default.customer -Output [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] - -(13) Filter [codegen id : 2] -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Condition : isnotnull(c_customer_sk#7) - -(14) BroadcastExchange -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] - -(15) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#7] -Join condition: None - -(16) Project [codegen id : 11] -Output [3]: [d_date#4, c_first_name#8, c_last_name#9] -Input [5]: [ss_customer_sk#2, d_date#4, c_customer_sk#7, c_first_name#8, c_last_name#9] - -(17) Scan parquet default.catalog_sales -Output [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 5] -Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] - -(19) Filter [codegen id : 5] -Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] -Condition : (isnotnull(cs_sold_date_sk#11) AND isnotnull(cs_bill_customer_sk#12)) - -(20) ReusedExchange [Reuses operator id: 8] -Output [2]: [d_date_sk#13, d_date#14] - -(21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#11] -Right keys [1]: [d_date_sk#13] -Join condition: None - -(22) Project [codegen id : 5] -Output [2]: [cs_bill_customer_sk#12, d_date#14] -Input [4]: [cs_sold_date_sk#11, cs_bill_customer_sk#12, d_date_sk#13, d_date#14] - -(23) ReusedExchange [Reuses operator id: 14] -Output [3]: [c_customer_sk#15, c_first_name#16, c_last_name#17] - -(24) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_bill_customer_sk#12] -Right keys [1]: [c_customer_sk#15] -Join condition: None - -(25) Project [codegen id : 5] -Output [3]: [c_last_name#17, c_first_name#16, d_date#14] -Input [5]: [cs_bill_customer_sk#12, d_date#14, c_customer_sk#15, c_first_name#16, c_last_name#17] - -(26) HashAggregate [codegen id : 5] -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#17, c_first_name#16, d_date#14] - -(27) Exchange -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Arguments: hashpartitioning(c_last_name#17, c_first_name#16, d_date#14, 5), true, [id=#18] - -(28) HashAggregate [codegen id : 6] -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#17, c_first_name#16, d_date#14] - -(29) BroadcastExchange -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#19] - -(30) BroadcastHashJoin [codegen id : 11] -Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] -Right keys [6]: [coalesce(c_last_name#17, ), isnull(c_last_name#17), coalesce(c_first_name#16, ), isnull(c_first_name#16), coalesce(d_date#14, 0), isnull(d_date#14)] -Join condition: None - -(31) Scan parquet default.web_sales -Output [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] -ReadSchema: struct - -(32) ColumnarToRow [codegen id : 9] -Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] - -(33) Filter [codegen id : 9] -Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] -Condition : (isnotnull(ws_sold_date_sk#20) AND isnotnull(ws_bill_customer_sk#21)) - -(34) ReusedExchange [Reuses operator id: 8] -Output [2]: [d_date_sk#22, d_date#23] - -(35) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_sold_date_sk#20] -Right keys [1]: [d_date_sk#22] -Join condition: None - -(36) Project [codegen id : 9] -Output [2]: [ws_bill_customer_sk#21, d_date#23] -Input [4]: [ws_sold_date_sk#20, ws_bill_customer_sk#21, d_date_sk#22, d_date#23] - -(37) ReusedExchange [Reuses operator id: 14] -Output [3]: [c_customer_sk#24, c_first_name#25, c_last_name#26] - -(38) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_bill_customer_sk#21] -Right keys [1]: [c_customer_sk#24] -Join condition: None - -(39) Project [codegen id : 9] -Output [3]: [c_last_name#26, c_first_name#25, d_date#23] -Input [5]: [ws_bill_customer_sk#21, d_date#23, c_customer_sk#24, c_first_name#25, c_last_name#26] - -(40) HashAggregate [codegen id : 9] -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#26, c_first_name#25, d_date#23] - -(41) Exchange -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Arguments: hashpartitioning(c_last_name#26, c_first_name#25, d_date#23, 5), true, [id=#27] - -(42) HashAggregate [codegen id : 10] -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#26, c_first_name#25, d_date#23] - -(43) BroadcastExchange -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#28] - -(44) BroadcastHashJoin [codegen id : 11] -Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] -Right keys [6]: [coalesce(c_last_name#26, ), isnull(c_last_name#26), coalesce(c_first_name#25, ), isnull(c_first_name#25), coalesce(d_date#23, 0), isnull(d_date#23)] -Join condition: None - -(45) HashAggregate [codegen id : 11] -Input [3]: [d_date#4, c_first_name#8, c_last_name#9] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#4] - -(46) Exchange -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#4, 5), true, [id=#29] - -(47) HashAggregate [codegen id : 12] -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#4] - -(48) HashAggregate [codegen id : 12] -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#4] - -(49) HashAggregate [codegen id : 12] -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#4] - -(50) HashAggregate [codegen id : 12] -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#4] - -(51) HashAggregate [codegen id : 12] -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results: [] - -(52) HashAggregate [codegen id : 12] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#30] -Results [1]: [count#31] - -(53) Exchange -Input [1]: [count#31] -Arguments: SinglePartition, true, [id=#32] - -(54) HashAggregate [codegen id : 13] -Input [1]: [count#31] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#33] -Results [1]: [count(1)#33 AS count(1)#34] - +CollectLimit 100 ++- *(13) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(12) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#4, ), coalesce(c_first_name#5, ), coalesce(d_date#6, 0)], LeftSemi, BuildRight, (((c_last_name#1 <=> c_last_name#4) && (c_first_name#2 <=> c_first_name#5)) && (d_date#3 <=> d_date#6)) + :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftSemi, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) + : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 5) + : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] + : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight + : : :- *(3) Project [ss_customer_sk#10, d_date#3] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] + : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#13, d_date#3] + : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] + : : +- *(2) Filter isnotnull(c_customer_sk#11) + : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 5) + : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] + : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight + : :- *(6) Project [cs_bill_customer_sk#15, d_date#9] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] + : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 5) + +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] + +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + :- *(10) Project [ws_bill_customer_sk#19, d_date#6] + : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] + : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) + : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt index a5b57a4ac..c7e30522f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt @@ -1,80 +1,75 @@ -WholeStageCodegen (13) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #1 - WholeStageCodegen (12) - HashAggregate [count,count] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #2 - WholeStageCodegen (11) - HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - Project [d_date,c_first_name,c_last_name] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (2) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] +CollectLimit + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),count(1)] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [count,count] [count,count] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #2 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_date,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,d_date] InputAdapter - BroadcastExchange #5 - WholeStageCodegen (6) - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #6 - WholeStageCodegen (5) - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] - InputAdapter - ReusedExchange [d_date_sk,d_date] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (10) - HashAggregate [c_last_name,c_first_name,d_date] + Exchange [c_first_name,c_last_name,d_date] #6 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_bill_customer_sk,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #8 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [d_date,ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] + InputAdapter + ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 InputAdapter - Exchange [c_last_name,c_first_name,d_date] #8 - WholeStageCodegen (9) - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_bill_customer_sk,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] - InputAdapter - ReusedExchange [d_date_sk,d_date] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt index b2cc849c6..5455e1849 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt @@ -1,292 +1,51 @@ == Physical Plan == -* Sort (52) -+- Exchange (51) - +- * BroadcastHashJoin Inner BuildRight (50) - :- * Project (27) - : +- * Filter (26) - : +- * HashAggregate (25) - : +- Exchange (24) - : +- * HashAggregate (23) - : +- * Project (22) - : +- * BroadcastHashJoin Inner BuildRight (21) - : :- * Project (15) - : : +- * BroadcastHashJoin Inner BuildRight (14) - : : :- * Project (9) - : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.inventory (1) - : : : +- BroadcastExchange (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.item (4) - : : +- BroadcastExchange (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.warehouse (10) - : +- BroadcastExchange (20) - : +- * Project (19) - : +- * Filter (18) - : +- * ColumnarToRow (17) - : +- Scan parquet default.date_dim (16) - +- BroadcastExchange (49) - +- * Project (48) - +- * Filter (47) - +- * HashAggregate (46) - +- Exchange (45) - +- * HashAggregate (44) - +- * Project (43) - +- * BroadcastHashJoin Inner BuildRight (42) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) - : :- * Project (33) - : : +- * BroadcastHashJoin Inner BuildRight (32) - : : :- * Filter (30) - : : : +- * ColumnarToRow (29) - : : : +- Scan parquet default.inventory (28) - : : +- ReusedExchange (31) - : +- ReusedExchange (34) - +- BroadcastExchange (41) - +- * Project (40) - +- * Filter (39) - +- * ColumnarToRow (38) - +- Scan parquet default.date_dim (37) - - -(1) Scan parquet default.inventory -Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/inventory] -PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] - -(3) Filter [codegen id : 4] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) - -(4) Scan parquet default.item -Output [1]: [i_item_sk#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [1]: [i_item_sk#5] - -(6) Filter [codegen id : 1] -Input [1]: [i_item_sk#5] -Condition : isnotnull(i_item_sk#5) - -(7) BroadcastExchange -Input [1]: [i_item_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] - -(8) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_item_sk#2] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(9) Project [codegen id : 4] -Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] -Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] - -(10) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#7, w_warehouse_name#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/warehouse] -PushedFilters: [IsNotNull(w_warehouse_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] - -(12) Filter [codegen id : 2] -Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] -Condition : isnotnull(w_warehouse_sk#7) - -(13) BroadcastExchange -Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] - -(14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_warehouse_sk#3] -Right keys [1]: [w_warehouse_sk#7] -Join condition: None - -(15) Project [codegen id : 4] -Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] -Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] - -(16) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] - -(18) Filter [codegen id : 3] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#12)) AND (d_year#11 = 2001)) AND (d_moy#12 = 1)) AND isnotnull(d_date_sk#10)) - -(19) Project [codegen id : 3] -Output [2]: [d_date_sk#10, d_moy#12] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] - -(20) BroadcastExchange -Input [2]: [d_date_sk#10, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_date_sk#1] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(22) Project [codegen id : 4] -Output [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] -Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_date_sk#10, d_moy#12] - -(23) HashAggregate [codegen id : 4] -Input [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] -Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] -Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] -Aggregate Attributes [5]: [n#14, avg#15, m2#16, sum#17, count#18] -Results [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] - -(24) Exchange -Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] -Arguments: hashpartitioning(w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, 5), true, [id=#24] - -(25) HashAggregate [codegen id : 10] -Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] -Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] -Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#25, avg(cast(inv_quantity_on_hand#4 as bigint))#26] -Results [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stddev_samp(cast(inv_quantity_on_hand#4 as double))#25 AS stdev#27, avg(cast(inv_quantity_on_hand#4 as bigint))#26 AS mean#28] - -(26) Filter [codegen id : 10] -Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] -Condition : (CASE WHEN (mean#28 = 0.0) THEN 0.0 ELSE (stdev#27 / mean#28) END > 1.0) - -(27) Project [codegen id : 10] -Output [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END AS cov#29] -Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] - -(28) Scan parquet default.inventory -Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/inventory] -PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 8] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] - -(30) Filter [codegen id : 8] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) - -(31) ReusedExchange [Reuses operator id: 7] -Output [1]: [i_item_sk#30] - -(32) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_item_sk#2] -Right keys [1]: [i_item_sk#30] -Join condition: None - -(33) Project [codegen id : 8] -Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] -Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] - -(34) ReusedExchange [Reuses operator id: 13] -Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] - -(35) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_warehouse_sk#3] -Right keys [1]: [w_warehouse_sk#31] -Join condition: None - -(36) Project [codegen id : 8] -Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] -Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] - -(37) Scan parquet default.date_dim -Output [3]: [d_date_sk#33, d_year#34, d_moy#35] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] -ReadSchema: struct - -(38) ColumnarToRow [codegen id : 7] -Input [3]: [d_date_sk#33, d_year#34, d_moy#35] - -(39) Filter [codegen id : 7] -Input [3]: [d_date_sk#33, d_year#34, d_moy#35] -Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) - -(40) Project [codegen id : 7] -Output [2]: [d_date_sk#33, d_moy#35] -Input [3]: [d_date_sk#33, d_year#34, d_moy#35] - -(41) BroadcastExchange -Input [2]: [d_date_sk#33, d_moy#35] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] - -(42) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_date_sk#1] -Right keys [1]: [d_date_sk#33] -Join condition: None - -(43) Project [codegen id : 8] -Output [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] -Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] - -(44) HashAggregate [codegen id : 8] -Input [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] -Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] -Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] -Aggregate Attributes [5]: [n#37, avg#38, m2#39, sum#40, count#41] -Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] - -(45) Exchange -Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] -Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), true, [id=#47] - -(46) HashAggregate [codegen id : 9] -Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] -Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] -Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#48, avg(cast(inv_quantity_on_hand#4 as bigint))#49] -Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#4 as double))#48 AS stdev#50, avg(cast(inv_quantity_on_hand#4 as bigint))#49 AS mean#51] - -(47) Filter [codegen id : 9] -Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] -Condition : (CASE WHEN (mean#51 = 0.0) THEN 0.0 ELSE (stdev#50 / mean#51) END > 1.0) - -(48) Project [codegen id : 9] -Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, CASE WHEN (mean#51 = 0.0) THEN null ELSE (stdev#50 / mean#51) END AS cov#52] -Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] - -(49) BroadcastExchange -Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#53] - -(50) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [i_item_sk#5, w_warehouse_sk#7] -Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] -Join condition: None - -(51) Exchange -Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] -Arguments: rangepartitioning(w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST, 5), true, [id=#54] - -(52) Sort [codegen id : 11] -Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] -Arguments: [w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST], true, 0 - +*(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 5) + +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight + :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] + : +- *(10) Filter (CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) + : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 5) + : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] + : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight + : :- *(4) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#1], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#2] + : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight + : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#2] + : : : +- *(1) Filter isnotnull(i_item_sk#2) + : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(2) Filter isnotnull(w_warehouse_sk#1) + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [d_date_sk#15, d_moy#3] + : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) + : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] + +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) + +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 5) + +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] + +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight + :- *(8) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20] + : +- *(8) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#10], Inner, BuildRight + : :- *(8) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#9] + : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight + : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [d_date_sk#21, d_moy#6] + +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) + +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt index f4e23c837..d80f4a4ee 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt @@ -1,77 +1,69 @@ -WholeStageCodegen (11) - Sort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] +WholeStageCodegen + Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] InputAdapter - Exchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 - WholeStageCodegen (10) - BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] - Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 + WholeStageCodegen + BroadcastHashJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] Filter [mean,stdev] - HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint)),stdev,mean,n,avg,m2,sum,count] + HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] InputAdapter - Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 - WholeStageCodegen (4) - HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] - Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_date_sk,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name] + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #2 + WholeStageCodegen + HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] - BroadcastHashJoin [inv_item_sk,i_item_sk] - Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk] + WholeStageCodegen + Project [i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk] [i_item_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - Filter [w_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] + WholeStageCodegen + Project [w_warehouse_name,w_warehouse_sk] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #6 - WholeStageCodegen (9) - Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + WholeStageCodegen + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] Filter [mean,stdev] - HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint)),stdev,mean,n,avg,m2,sum,count] + HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] InputAdapter - Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 - WholeStageCodegen (8) - HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] - Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_date_sk,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name] + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #7 + WholeStageCodegen + HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] - BroadcastHashJoin [inv_item_sk,i_item_sk] - Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter - ReusedExchange [i_item_sk] #3 + ReusedExchange [i_item_sk] [i_item_sk] #3 InputAdapter - ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + ReusedExchange [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] #4 InputAdapter BroadcastExchange #8 - WholeStageCodegen (7) + WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt index 92c2d5ed4..3c845e77b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt @@ -1,292 +1,51 @@ == Physical Plan == -* Sort (52) -+- Exchange (51) - +- * BroadcastHashJoin Inner BuildRight (50) - :- * Project (27) - : +- * Filter (26) - : +- * HashAggregate (25) - : +- Exchange (24) - : +- * HashAggregate (23) - : +- * Project (22) - : +- * BroadcastHashJoin Inner BuildRight (21) - : :- * Project (15) - : : +- * BroadcastHashJoin Inner BuildRight (14) - : : :- * Project (9) - : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.inventory (1) - : : : +- BroadcastExchange (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.item (4) - : : +- BroadcastExchange (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.warehouse (10) - : +- BroadcastExchange (20) - : +- * Project (19) - : +- * Filter (18) - : +- * ColumnarToRow (17) - : +- Scan parquet default.date_dim (16) - +- BroadcastExchange (49) - +- * Project (48) - +- * Filter (47) - +- * HashAggregate (46) - +- Exchange (45) - +- * HashAggregate (44) - +- * Project (43) - +- * BroadcastHashJoin Inner BuildRight (42) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) - : :- * Project (33) - : : +- * BroadcastHashJoin Inner BuildRight (32) - : : :- * Filter (30) - : : : +- * ColumnarToRow (29) - : : : +- Scan parquet default.inventory (28) - : : +- ReusedExchange (31) - : +- ReusedExchange (34) - +- BroadcastExchange (41) - +- * Project (40) - +- * Filter (39) - +- * ColumnarToRow (38) - +- Scan parquet default.date_dim (37) - - -(1) Scan parquet default.inventory -Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/inventory] -PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] - -(3) Filter [codegen id : 4] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) - -(4) Scan parquet default.item -Output [1]: [i_item_sk#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [1]: [i_item_sk#5] - -(6) Filter [codegen id : 1] -Input [1]: [i_item_sk#5] -Condition : isnotnull(i_item_sk#5) - -(7) BroadcastExchange -Input [1]: [i_item_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] - -(8) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_item_sk#2] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(9) Project [codegen id : 4] -Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] -Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] - -(10) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#7, w_warehouse_name#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/warehouse] -PushedFilters: [IsNotNull(w_warehouse_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] - -(12) Filter [codegen id : 2] -Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] -Condition : isnotnull(w_warehouse_sk#7) - -(13) BroadcastExchange -Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] - -(14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_warehouse_sk#3] -Right keys [1]: [w_warehouse_sk#7] -Join condition: None - -(15) Project [codegen id : 4] -Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] -Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] - -(16) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] - -(18) Filter [codegen id : 3] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#12)) AND (d_year#11 = 2001)) AND (d_moy#12 = 1)) AND isnotnull(d_date_sk#10)) - -(19) Project [codegen id : 3] -Output [2]: [d_date_sk#10, d_moy#12] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] - -(20) BroadcastExchange -Input [2]: [d_date_sk#10, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_date_sk#1] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(22) Project [codegen id : 4] -Output [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] -Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_date_sk#10, d_moy#12] - -(23) HashAggregate [codegen id : 4] -Input [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] -Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] -Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] -Aggregate Attributes [5]: [n#14, avg#15, m2#16, sum#17, count#18] -Results [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] - -(24) Exchange -Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] -Arguments: hashpartitioning(w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, 5), true, [id=#24] - -(25) HashAggregate [codegen id : 10] -Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] -Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] -Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#25, avg(cast(inv_quantity_on_hand#4 as bigint))#26] -Results [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stddev_samp(cast(inv_quantity_on_hand#4 as double))#25 AS stdev#27, avg(cast(inv_quantity_on_hand#4 as bigint))#26 AS mean#28] - -(26) Filter [codegen id : 10] -Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] -Condition : ((CASE WHEN (mean#28 = 0.0) THEN 0.0 ELSE (stdev#27 / mean#28) END > 1.0) AND (CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END > 1.5)) - -(27) Project [codegen id : 10] -Output [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END AS cov#29] -Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] - -(28) Scan parquet default.inventory -Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/inventory] -PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 8] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] - -(30) Filter [codegen id : 8] -Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] -Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) - -(31) ReusedExchange [Reuses operator id: 7] -Output [1]: [i_item_sk#30] - -(32) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_item_sk#2] -Right keys [1]: [i_item_sk#30] -Join condition: None - -(33) Project [codegen id : 8] -Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] -Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] - -(34) ReusedExchange [Reuses operator id: 13] -Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] - -(35) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_warehouse_sk#3] -Right keys [1]: [w_warehouse_sk#31] -Join condition: None - -(36) Project [codegen id : 8] -Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] -Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] - -(37) Scan parquet default.date_dim -Output [3]: [d_date_sk#33, d_year#34, d_moy#35] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] -ReadSchema: struct - -(38) ColumnarToRow [codegen id : 7] -Input [3]: [d_date_sk#33, d_year#34, d_moy#35] - -(39) Filter [codegen id : 7] -Input [3]: [d_date_sk#33, d_year#34, d_moy#35] -Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) - -(40) Project [codegen id : 7] -Output [2]: [d_date_sk#33, d_moy#35] -Input [3]: [d_date_sk#33, d_year#34, d_moy#35] - -(41) BroadcastExchange -Input [2]: [d_date_sk#33, d_moy#35] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] - -(42) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_date_sk#1] -Right keys [1]: [d_date_sk#33] -Join condition: None - -(43) Project [codegen id : 8] -Output [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] -Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] - -(44) HashAggregate [codegen id : 8] -Input [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] -Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] -Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] -Aggregate Attributes [5]: [n#37, avg#38, m2#39, sum#40, count#41] -Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] - -(45) Exchange -Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] -Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), true, [id=#47] - -(46) HashAggregate [codegen id : 9] -Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] -Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] -Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#48, avg(cast(inv_quantity_on_hand#4 as bigint))#49] -Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#4 as double))#48 AS stdev#50, avg(cast(inv_quantity_on_hand#4 as bigint))#49 AS mean#51] - -(47) Filter [codegen id : 9] -Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] -Condition : (CASE WHEN (mean#51 = 0.0) THEN 0.0 ELSE (stdev#50 / mean#51) END > 1.0) - -(48) Project [codegen id : 9] -Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, CASE WHEN (mean#51 = 0.0) THEN null ELSE (stdev#50 / mean#51) END AS cov#52] -Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] - -(49) BroadcastExchange -Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#53] - -(50) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [i_item_sk#5, w_warehouse_sk#7] -Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] -Join condition: None - -(51) Exchange -Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] -Arguments: rangepartitioning(w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST, 5), true, [id=#54] - -(52) Sort [codegen id : 11] -Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] -Arguments: [w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST], true, 0 - +*(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 5) + +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight + :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] + : +- *(10) Filter ((CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) && (CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END > 1.5)) + : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 5) + : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] + : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight + : :- *(4) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#1], Inner, BuildRight + : : :- *(4) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#2] + : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight + : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#2] + : : : +- *(1) Filter isnotnull(i_item_sk#2) + : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] + : : +- *(2) Filter isnotnull(w_warehouse_sk#1) + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [d_date_sk#15, d_moy#3] + : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) + : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] + +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) + +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 5) + +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) + +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] + +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight + :- *(8) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20] + : +- *(8) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#10], Inner, BuildRight + : :- *(8) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#9] + : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight + : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] + : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) + : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [d_date_sk#21, d_moy#6] + +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) + +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt index f4e23c837..d80f4a4ee 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt @@ -1,77 +1,69 @@ -WholeStageCodegen (11) - Sort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] +WholeStageCodegen + Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] InputAdapter - Exchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 - WholeStageCodegen (10) - BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] - Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 + WholeStageCodegen + BroadcastHashJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] Filter [mean,stdev] - HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint)),stdev,mean,n,avg,m2,sum,count] + HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] InputAdapter - Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 - WholeStageCodegen (4) - HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] - Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_date_sk,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name] + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #2 + WholeStageCodegen + HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] - BroadcastHashJoin [inv_item_sk,i_item_sk] - Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk] + WholeStageCodegen + Project [i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk] [i_item_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - Filter [w_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] + WholeStageCodegen + Project [w_warehouse_name,w_warehouse_sk] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #6 - WholeStageCodegen (9) - Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] + WholeStageCodegen + Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] Filter [mean,stdev] - HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint)),stdev,mean,n,avg,m2,sum,count] + HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] InputAdapter - Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 - WholeStageCodegen (8) - HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] - Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [inv_date_sk,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name] + Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #7 + WholeStageCodegen + HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] + Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] - BroadcastHashJoin [inv_item_sk,i_item_sk] - Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] + BroadcastHashJoin [i_item_sk,inv_item_sk] + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter - ReusedExchange [i_item_sk] #3 + ReusedExchange [i_item_sk] [i_item_sk] #3 InputAdapter - ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 + ReusedExchange [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] #4 InputAdapter BroadcastExchange #8 - WholeStageCodegen (7) + WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt index 79a7abdcf..16fa8a95c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt @@ -1,606 +1,124 @@ == Physical Plan == -TakeOrderedAndProject (107) -+- * Project (106) - +- * BroadcastHashJoin Inner BuildRight (105) - :- * Project (91) - : +- * BroadcastHashJoin Inner BuildRight (90) - : :- * Project (71) - : : +- * BroadcastHashJoin Inner BuildRight (70) - : : :- * Project (56) - : : : +- * BroadcastHashJoin Inner BuildRight (55) - : : : :- * BroadcastHashJoin Inner BuildRight (36) - : : : : :- * Filter (19) - : : : : : +- * HashAggregate (18) - : : : : : +- Exchange (17) - : : : : : +- * HashAggregate (16) - : : : : : +- * Project (15) - : : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : : :- * Project (9) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : : : :- * Filter (3) - : : : : : : : +- * ColumnarToRow (2) - : : : : : : : +- Scan parquet default.customer (1) - : : : : : : +- BroadcastExchange (7) - : : : : : : +- * Filter (6) - : : : : : : +- * ColumnarToRow (5) - : : : : : : +- Scan parquet default.store_sales (4) - : : : : : +- BroadcastExchange (13) - : : : : : +- * Filter (12) - : : : : : +- * ColumnarToRow (11) - : : : : : +- Scan parquet default.date_dim (10) - : : : : +- BroadcastExchange (35) - : : : : +- * HashAggregate (34) - : : : : +- Exchange (33) - : : : : +- * HashAggregate (32) - : : : : +- * Project (31) - : : : : +- * BroadcastHashJoin Inner BuildRight (30) - : : : : :- * Project (25) - : : : : : +- * BroadcastHashJoin Inner BuildRight (24) - : : : : : :- * Filter (22) - : : : : : : +- * ColumnarToRow (21) - : : : : : : +- Scan parquet default.customer (20) - : : : : : +- ReusedExchange (23) - : : : : +- BroadcastExchange (29) - : : : : +- * Filter (28) - : : : : +- * ColumnarToRow (27) - : : : : +- Scan parquet default.date_dim (26) - : : : +- BroadcastExchange (54) - : : : +- * Project (53) - : : : +- * Filter (52) - : : : +- * HashAggregate (51) - : : : +- Exchange (50) - : : : +- * HashAggregate (49) - : : : +- * Project (48) - : : : +- * BroadcastHashJoin Inner BuildRight (47) - : : : :- * Project (45) - : : : : +- * BroadcastHashJoin Inner BuildRight (44) - : : : : :- * Filter (39) - : : : : : +- * ColumnarToRow (38) - : : : : : +- Scan parquet default.customer (37) - : : : : +- BroadcastExchange (43) - : : : : +- * Filter (42) - : : : : +- * ColumnarToRow (41) - : : : : +- Scan parquet default.catalog_sales (40) - : : : +- ReusedExchange (46) - : : +- BroadcastExchange (69) - : : +- * HashAggregate (68) - : : +- Exchange (67) - : : +- * HashAggregate (66) - : : +- * Project (65) - : : +- * BroadcastHashJoin Inner BuildRight (64) - : : :- * Project (62) - : : : +- * BroadcastHashJoin Inner BuildRight (61) - : : : :- * Filter (59) - : : : : +- * ColumnarToRow (58) - : : : : +- Scan parquet default.customer (57) - : : : +- ReusedExchange (60) - : : +- ReusedExchange (63) - : +- BroadcastExchange (89) - : +- * Project (88) - : +- * Filter (87) - : +- * HashAggregate (86) - : +- Exchange (85) - : +- * HashAggregate (84) - : +- * Project (83) - : +- * BroadcastHashJoin Inner BuildRight (82) - : :- * Project (80) - : : +- * BroadcastHashJoin Inner BuildRight (79) - : : :- * Filter (74) - : : : +- * ColumnarToRow (73) - : : : +- Scan parquet default.customer (72) - : : +- BroadcastExchange (78) - : : +- * Filter (77) - : : +- * ColumnarToRow (76) - : : +- Scan parquet default.web_sales (75) - : +- ReusedExchange (81) - +- BroadcastExchange (104) - +- * HashAggregate (103) - +- Exchange (102) - +- * HashAggregate (101) - +- * Project (100) - +- * BroadcastHashJoin Inner BuildRight (99) - :- * Project (97) - : +- * BroadcastHashJoin Inner BuildRight (96) - : :- * Filter (94) - : : +- * ColumnarToRow (93) - : : +- Scan parquet default.customer (92) - : +- ReusedExchange (95) - +- ReusedExchange (98) - - -(1) Scan parquet default.customer -Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] - -(3) Filter [codegen id : 3] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(4) Scan parquet default.store_sales -Output [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] - -(6) Filter [codegen id : 1] -Input [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] -Condition : (isnotnull(ss_customer_sk#10) AND isnotnull(ss_sold_date_sk#9)) - -(7) BroadcastExchange -Input [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#15] - -(8) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#10] -Join condition: None - -(9) Project [codegen id : 3] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] -Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] - -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#16, d_year#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#16, d_year#17] - -(12) Filter [codegen id : 2] -Input [2]: [d_date_sk#16, d_year#17] -Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) - -(13) BroadcastExchange -Input [2]: [d_date_sk#16, d_year#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] - -(14) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#9] -Right keys [1]: [d_date_sk#16] -Join condition: None - -(15) Project [codegen id : 3] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] -Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_date_sk#16, d_year#17] - -(16) HashAggregate [codegen id : 3] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [2]: [sum#19, isEmpty#20] -Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#21, isEmpty#22] - -(17) Exchange -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#21, isEmpty#22] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#23] - -(18) HashAggregate [codegen id : 24] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#21, isEmpty#22] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#24] -Results [2]: [c_customer_id#2 AS customer_id#25, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#24 AS year_total#26] - -(19) Filter [codegen id : 24] -Input [2]: [customer_id#25, year_total#26] -Condition : (isnotnull(year_total#26) AND (year_total#26 > 0.000000)) - -(20) Scan parquet default.customer -Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(21) ColumnarToRow [codegen id : 6] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] - -(22) Filter [codegen id : 6] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(23) ReusedExchange [Reuses operator id: 7] -Output [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] - -(24) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#10] -Join condition: None - -(25) Project [codegen id : 6] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] -Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] - -(26) Scan parquet default.date_dim -Output [2]: [d_date_sk#16, d_year#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] -ReadSchema: struct - -(27) ColumnarToRow [codegen id : 5] -Input [2]: [d_date_sk#16, d_year#17] - -(28) Filter [codegen id : 5] -Input [2]: [d_date_sk#16, d_year#17] -Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2002)) AND isnotnull(d_date_sk#16)) - -(29) BroadcastExchange -Input [2]: [d_date_sk#16, d_year#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] - -(30) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#9] -Right keys [1]: [d_date_sk#16] -Join condition: None - -(31) Project [codegen id : 6] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] -Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_date_sk#16, d_year#17] - -(32) HashAggregate [codegen id : 6] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [2]: [sum#28, isEmpty#29] -Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#30, isEmpty#31] - -(33) Exchange -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#30, isEmpty#31] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#32] - -(34) HashAggregate [codegen id : 7] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#30, isEmpty#31] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#33] -Results [8]: [c_customer_id#2 AS customer_id#34, c_first_name#3 AS customer_first_name#35, c_last_name#4 AS customer_last_name#36, c_preferred_cust_flag#5 AS customer_preferred_cust_flag#37, c_birth_country#6 AS customer_birth_country#38, c_login#7 AS customer_login#39, c_email_address#8 AS customer_email_address#40, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#33 AS year_total#41] - -(35) BroadcastExchange -Input [8]: [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#42] - -(36) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#34] -Join condition: None - -(37) Scan parquet default.customer -Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(38) ColumnarToRow [codegen id : 10] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] - -(39) Filter [codegen id : 10] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(40) Scan parquet default.catalog_sales -Output [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(41) ColumnarToRow [codegen id : 8] -Input [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] - -(42) Filter [codegen id : 8] -Input [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] -Condition : (isnotnull(cs_bill_customer_sk#44) AND isnotnull(cs_sold_date_sk#43)) - -(43) BroadcastExchange -Input [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#49] - -(44) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [cs_bill_customer_sk#44] -Join condition: None - -(45) Project [codegen id : 10] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] -Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] - -(46) ReusedExchange [Reuses operator id: 13] -Output [2]: [d_date_sk#16, d_year#17] - -(47) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#43] -Right keys [1]: [d_date_sk#16] -Join condition: None - -(48) Project [codegen id : 10] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] -Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_date_sk#16, d_year#17] - -(49) HashAggregate [codegen id : 10] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [2]: [sum#50, isEmpty#51] -Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#52, isEmpty#53] - -(50) Exchange -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#52, isEmpty#53] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#54] - -(51) HashAggregate [codegen id : 11] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#52, isEmpty#53] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#55] -Results [2]: [c_customer_id#2 AS customer_id#56, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#55 AS year_total#57] - -(52) Filter [codegen id : 11] -Input [2]: [customer_id#56, year_total#57] -Condition : (isnotnull(year_total#57) AND (year_total#57 > 0.000000)) - -(53) Project [codegen id : 11] -Output [2]: [customer_id#56 AS customer_id#58, year_total#57 AS year_total#59] -Input [2]: [customer_id#56, year_total#57] - -(54) BroadcastExchange -Input [2]: [customer_id#58, year_total#59] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#60] - -(55) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#58] -Join condition: None - -(56) Project [codegen id : 24] -Output [11]: [customer_id#25, year_total#26, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41, year_total#59] -Input [12]: [customer_id#25, year_total#26, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41, customer_id#58, year_total#59] - -(57) Scan parquet default.customer -Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(58) ColumnarToRow [codegen id : 14] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] - -(59) Filter [codegen id : 14] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(60) ReusedExchange [Reuses operator id: 43] -Output [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] - -(61) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [cs_bill_customer_sk#44] -Join condition: None - -(62) Project [codegen id : 14] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] -Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] - -(63) ReusedExchange [Reuses operator id: 29] -Output [2]: [d_date_sk#16, d_year#17] - -(64) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [cs_sold_date_sk#43] -Right keys [1]: [d_date_sk#16] -Join condition: None - -(65) Project [codegen id : 14] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] -Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_date_sk#16, d_year#17] - -(66) HashAggregate [codegen id : 14] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [2]: [sum#61, isEmpty#62] -Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#63, isEmpty#64] - -(67) Exchange -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#63, isEmpty#64] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#65] - -(68) HashAggregate [codegen id : 15] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#63, isEmpty#64] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#66] -Results [2]: [c_customer_id#2 AS customer_id#67, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#66 AS year_total#68] - -(69) BroadcastExchange -Input [2]: [customer_id#67, year_total#68] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#69] - -(70) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#67] -Join condition: (CASE WHEN (year_total#59 > 0.000000) THEN CheckOverflow((promote_precision(year_total#68) / promote_precision(year_total#59)), DecimalType(38,14), true) ELSE null END > CASE WHEN (year_total#26 > 0.000000) THEN CheckOverflow((promote_precision(year_total#41) / promote_precision(year_total#26)), DecimalType(38,14), true) ELSE null END) - -(71) Project [codegen id : 24] -Output [10]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68] -Input [13]: [customer_id#25, year_total#26, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41, year_total#59, customer_id#67, year_total#68] - -(72) Scan parquet default.customer -Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(73) ColumnarToRow [codegen id : 18] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] - -(74) Filter [codegen id : 18] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(75) Scan parquet default.web_sales -Output [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(76) ColumnarToRow [codegen id : 16] -Input [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] - -(77) Filter [codegen id : 16] -Input [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] -Condition : (isnotnull(ws_bill_customer_sk#71) AND isnotnull(ws_sold_date_sk#70)) - -(78) BroadcastExchange -Input [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#76] - -(79) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ws_bill_customer_sk#71] -Join condition: None - -(80) Project [codegen id : 18] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] -Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] - -(81) ReusedExchange [Reuses operator id: 13] -Output [2]: [d_date_sk#16, d_year#17] - -(82) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_sold_date_sk#70] -Right keys [1]: [d_date_sk#16] -Join condition: None - -(83) Project [codegen id : 18] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] -Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_date_sk#16, d_year#17] - -(84) HashAggregate [codegen id : 18] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [2]: [sum#77, isEmpty#78] -Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#79, isEmpty#80] - -(85) Exchange -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#79, isEmpty#80] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#81] - -(86) HashAggregate [codegen id : 19] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#79, isEmpty#80] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#82] -Results [2]: [c_customer_id#2 AS customer_id#83, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#82 AS year_total#84] - -(87) Filter [codegen id : 19] -Input [2]: [customer_id#83, year_total#84] -Condition : (isnotnull(year_total#84) AND (year_total#84 > 0.000000)) - -(88) Project [codegen id : 19] -Output [2]: [customer_id#83 AS customer_id#85, year_total#84 AS year_total#86] -Input [2]: [customer_id#83, year_total#84] - -(89) BroadcastExchange -Input [2]: [customer_id#85, year_total#86] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#87] - -(90) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#85] -Join condition: None - -(91) Project [codegen id : 24] -Output [11]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68, year_total#86] -Input [12]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68, customer_id#85, year_total#86] - -(92) Scan parquet default.customer -Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(93) ColumnarToRow [codegen id : 22] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] - -(94) Filter [codegen id : 22] -Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(95) ReusedExchange [Reuses operator id: 78] -Output [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] - -(96) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ws_bill_customer_sk#71] -Join condition: None - -(97) Project [codegen id : 22] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] -Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] - -(98) ReusedExchange [Reuses operator id: 29] -Output [2]: [d_date_sk#16, d_year#17] - -(99) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [ws_sold_date_sk#70] -Right keys [1]: [d_date_sk#16] -Join condition: None - -(100) Project [codegen id : 22] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] -Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_date_sk#16, d_year#17] - -(101) HashAggregate [codegen id : 22] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [2]: [sum#88, isEmpty#89] -Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#90, isEmpty#91] - -(102) Exchange -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#90, isEmpty#91] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#92] - -(103) HashAggregate [codegen id : 23] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#90, isEmpty#91] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#93] -Results [2]: [c_customer_id#2 AS customer_id#94, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#93 AS year_total#95] - -(104) BroadcastExchange -Input [2]: [customer_id#94, year_total#95] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#96] - -(105) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#94] -Join condition: (CASE WHEN (year_total#59 > 0.000000) THEN CheckOverflow((promote_precision(year_total#68) / promote_precision(year_total#59)), DecimalType(38,14), true) ELSE null END > CASE WHEN (year_total#86 > 0.000000) THEN CheckOverflow((promote_precision(year_total#95) / promote_precision(year_total#86)), DecimalType(38,14), true) ELSE null END) - -(106) Project [codegen id : 24] -Output [7]: [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40] -Input [13]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68, year_total#86, customer_id#94, year_total#95] - -(107) TakeOrderedAndProject -Input [7]: [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40] -Arguments: 100, [customer_id#34 ASC NULLS FIRST, customer_first_name#35 ASC NULLS FIRST, customer_last_name#36 ASC NULLS FIRST, customer_preferred_cust_flag#37 ASC NULLS FIRST, customer_birth_country#38 ASC NULLS FIRST, customer_login#39 ASC NULLS FIRST, customer_email_address#40 ASC NULLS FIRST], [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40] - +TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer_first_name#2 ASC NULLS FIRST,customer_last_name#3 ASC NULLS FIRST,customer_preferred_cust_flag#4 ASC NULLS FIRST,customer_birth_country#5 ASC NULLS FIRST,customer_login#6 ASC NULLS FIRST,customer_email_address#7 ASC NULLS FIRST], output=[customer_id#1,customer_first_name#2,customer_last_name#3,customer_preferred_cust_flag#4,customer_birth_country#5,customer_login#6,customer_email_address#7]) ++- *(25) Project [customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7] + +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#9], Inner, BuildRight, (CASE WHEN (year_total#10 > 0.000000) THEN CheckOverflow((promote_precision(year_total#11) / promote_precision(year_total#10)), DecimalType(38,14)) ELSE null END > CASE WHEN (year_total#12 > 0.000000) THEN CheckOverflow((promote_precision(year_total#13) / promote_precision(year_total#12)), DecimalType(38,14)) ELSE null END) + :- *(25) Project [customer_id#8, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#10, year_total#11, year_total#12] + : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#14], Inner, BuildRight + : :- *(25) Project [customer_id#8, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#10, year_total#11] + : : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#15], Inner, BuildRight, (CASE WHEN (year_total#10 > 0.000000) THEN CheckOverflow((promote_precision(year_total#11) / promote_precision(year_total#10)), DecimalType(38,14)) ELSE null END > CASE WHEN (year_total#16 > 0.000000) THEN CheckOverflow((promote_precision(year_total#17) / promote_precision(year_total#16)), DecimalType(38,14)) ELSE null END) + : : :- *(25) Project [customer_id#8, year_total#16, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#17, year_total#10] + : : : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#18], Inner, BuildRight + : : : :- *(25) BroadcastHashJoin [customer_id#8], [customer_id#1], Inner, BuildRight + : : : : :- Union + : : : : : :- *(4) Filter (isnotnull(year_total#16) && (year_total#16 > 0.000000)) + : : : : : : +- *(4) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) + : : : : : : +- *(3) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : : +- *(3) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] + : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight + : : : : : : :- *(3) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_sold_date_sk#31, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27] + : : : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight + : : : : : : : :- *(3) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : : :- LocalTableScan , [customer_id#35, year_total#36] + : : : : : +- LocalTableScan , [customer_id#37, year_total#38] + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : : +- Union + : : : : :- *(8) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) + : : : : : +- *(7) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : : +- *(7) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] + : : : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight + : : : : : :- *(7) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_sold_date_sk#31, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27] + : : : : : : +- *(7) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight + : : : : : : :- *(7) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : : : +- *(7) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : :- LocalTableScan , [customer_id#35, customer_first_name#39, customer_last_name#40, customer_preferred_cust_flag#41, customer_birth_country#42, customer_login#43, customer_email_address#44, year_total#36] + : : : : +- LocalTableScan , [customer_id#37, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#38] + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : : +- Union + : : : :- LocalTableScan , [customer_id#18, year_total#10] + : : : :- *(12) Filter (isnotnull(year_total#36) && (year_total#36 > 0.000000)) + : : : : +- *(12) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) + : : : : +- *(11) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : : +- *(11) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] + : : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight + : : : : :- *(11) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_sold_date_sk#55, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51] + : : : : : +- *(11) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight + : : : : : :- *(11) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : : +- *(11) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : : +- *(11) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- Union + : : :- LocalTableScan , [customer_id#15, year_total#11] + : : :- *(16) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) + : : : +- *(15) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : : : +- *(15) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] + : : : +- *(15) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight + : : : :- *(15) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_sold_date_sk#55, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51] + : : : : +- *(15) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight + : : : : :- *(15) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : : : +- *(15) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : : : +- *(15) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- Union + : :- LocalTableScan , [customer_id#14, year_total#12] + : :- LocalTableScan , [customer_id#35, year_total#36] + : +- *(20) Filter (isnotnull(year_total#38) && (year_total#38 > 0.000000)) + : +- *(20) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) + : +- *(19) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + : +- *(19) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] + : +- *(19) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight + : :- *(19) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_sold_date_sk#61, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57] + : : +- *(19) BroadcastHashJoin [c_customer_sk#33], [ws_bill_customer_sk#62], Inner, BuildRight + : : :- *(19) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : : +- *(19) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : : +- *(19) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#9, year_total#13] + :- LocalTableScan , [customer_id#35, year_total#36] + +- *(24) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) + +- *(23) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) + +- *(23) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] + +- *(23) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight + :- *(23) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_sold_date_sk#61, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57] + : +- *(23) BroadcastHashJoin [c_customer_sk#33], [ws_bill_customer_sk#62], Inner, BuildRight + : :- *(23) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] + : : +- *(23) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) + : : +- *(23) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] - -(3) Filter [codegen id : 5] -Input [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] -Condition : ((isnotnull(cs_warehouse_sk#2) AND isnotnull(cs_item_sk#3)) AND isnotnull(cs_sold_date_sk#1)) - -(4) Scan parquet default.catalog_returns -Output [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] - -(6) Filter [codegen id : 1] -Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] -Condition : (isnotnull(cr_order_number#7) AND isnotnull(cr_item_sk#6)) - -(7) BroadcastExchange -Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#9] - -(8) BroadcastHashJoin [codegen id : 5] -Left keys [2]: [cs_order_number#4, cs_item_sk#3] -Right keys [2]: [cr_order_number#7, cr_item_sk#6] -Join condition: None - -(9) Project [codegen id : 5] -Output [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8] -Input [8]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5, cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] - -(10) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#10, w_state#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/warehouse] -PushedFilters: [IsNotNull(w_warehouse_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [w_warehouse_sk#10, w_state#11] - -(12) Filter [codegen id : 2] -Input [2]: [w_warehouse_sk#10, w_state#11] -Condition : isnotnull(w_warehouse_sk#10) - -(13) BroadcastExchange -Input [2]: [w_warehouse_sk#10, w_state#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] - -(14) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_warehouse_sk#2] -Right keys [1]: [w_warehouse_sk#10] -Join condition: None - -(15) Project [codegen id : 5] -Output [5]: [cs_sold_date_sk#1, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8, w_state#11] -Input [7]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8, w_warehouse_sk#10, w_state#11] - -(16) Scan parquet default.item -Output [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] - -(18) Filter [codegen id : 3] -Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] -Condition : (((isnotnull(i_current_price#15) AND (i_current_price#15 >= 0.99)) AND (i_current_price#15 <= 1.49)) AND isnotnull(i_item_sk#13)) - -(19) Project [codegen id : 3] -Output [2]: [i_item_sk#13, i_item_id#14] -Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] - -(20) BroadcastExchange -Input [2]: [i_item_sk#13, i_item_id#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#13] -Join condition: None - -(22) Project [codegen id : 5] -Output [5]: [cs_sold_date_sk#1, cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14] -Input [7]: [cs_sold_date_sk#1, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_sk#13, i_item_id#14] - -(23) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_date#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [2]: [d_date_sk#17, d_date#18] - -(25) Filter [codegen id : 4] -Input [2]: [d_date_sk#17, d_date#18] -Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 10997)) AND (d_date#18 <= 11057)) AND isnotnull(d_date_sk#17)) - -(26) BroadcastExchange -Input [2]: [d_date_sk#17, d_date#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] - -(27) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#17] -Join condition: None - -(28) Project [codegen id : 5] -Output [5]: [cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14, d_date#18] -Input [7]: [cs_sold_date_sk#1, cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14, d_date_sk#17, d_date#18] - -(29) HashAggregate [codegen id : 5] -Input [5]: [cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14, d_date#18] -Keys [2]: [w_state#11, i_item_id#14] -Functions [2]: [partial_sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)] -Aggregate Attributes [4]: [sum#20, isEmpty#21, sum#22, isEmpty#23] -Results [6]: [w_state#11, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27] - -(30) Exchange -Input [6]: [w_state#11, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27] -Arguments: hashpartitioning(w_state#11, i_item_id#14, 5), true, [id=#28] - -(31) HashAggregate [codegen id : 6] -Input [6]: [w_state#11, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27] -Keys [2]: [w_state#11, i_item_id#14] -Functions [2]: [sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)] -Aggregate Attributes [2]: [sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#29, sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#30] -Results [4]: [w_state#11, i_item_id#14, sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#29 AS sales_before#31, sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#30 AS sales_after#32] - -(32) TakeOrderedAndProject -Input [4]: [w_state#11, i_item_id#14, sales_before#31, sales_after#32] -Arguments: 100, [w_state#11 ASC NULLS FIRST, i_item_id#14 ASC NULLS FIRST], [w_state#11, i_item_id#14, sales_before#31, sales_after#32] - +TakeOrderedAndProject(limit=100, orderBy=[w_state#1 ASC NULLS FIRST,i_item_id#2 ASC NULLS FIRST], output=[w_state#1,i_item_id#2,sales_before#3,sales_after#4]) ++- *(6) HashAggregate(keys=[w_state#1, i_item_id#2], functions=[sum(CASE WHEN (d_date#5 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), sum(CASE WHEN (d_date#5 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) + +- Exchange hashpartitioning(w_state#1, i_item_id#2, 5) + +- *(5) HashAggregate(keys=[w_state#1, i_item_id#2], functions=[partial_sum(CASE WHEN (d_date#5 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#5 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) + +- *(5) Project [cs_sales_price#6, cr_refunded_cash#7, w_state#1, i_item_id#2, d_date#5] + +- *(5) BroadcastHashJoin [cs_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + :- *(5) Project [cs_sold_date_sk#8, cs_sales_price#6, cr_refunded_cash#7, w_state#1, i_item_id#2] + : +- *(5) BroadcastHashJoin [cs_item_sk#10], [i_item_sk#11], Inner, BuildRight + : :- *(5) Project [cs_sold_date_sk#8, cs_item_sk#10, cs_sales_price#6, cr_refunded_cash#7, w_state#1] + : : +- *(5) BroadcastHashJoin [cs_warehouse_sk#12], [w_warehouse_sk#13], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#8, cs_warehouse_sk#12, cs_item_sk#10, cs_sales_price#6, cr_refunded_cash#7] + : : : +- *(5) BroadcastHashJoin [cs_order_number#14, cs_item_sk#10], [cr_order_number#15, cr_item_sk#16], LeftOuter, BuildRight + : : : :- *(5) Project [cs_sold_date_sk#8, cs_warehouse_sk#12, cs_item_sk#10, cs_order_number#14, cs_sales_price#6] + : : : : +- *(5) Filter ((isnotnull(cs_warehouse_sk#12) && isnotnull(cs_item_sk#10)) && isnotnull(cs_sold_date_sk#8)) + : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#8,cs_warehouse_sk#12,cs_item_sk#10,cs_order_number#14,cs_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [w_warehouse_sk#13, w_state#1] + : : +- *(2) Filter isnotnull(w_warehouse_sk#13) + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#13,w_state#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_item_sk#11, i_item_id#2] + : +- *(3) Filter (((isnotnull(i_current_price#17) && (i_current_price#17 >= 0.99)) && (i_current_price#17 <= 1.49)) && isnotnull(i_item_sk#11)) + : +- *(3) FileScan parquet default.item[i_item_sk#11,i_item_id#2,i_current_price#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#9, d_date#5] + +- *(4) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#9)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#9,d_date#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt index b1e0f1f17..fcc311d0d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt @@ -1,48 +1,42 @@ -TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] - WholeStageCodegen (6) - HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty] +TakeOrderedAndProject [i_item_id,sales_after,sales_before,w_state] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),w_state] [sales_after,sales_before,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] InputAdapter - Exchange [w_state,i_item_id] #1 - WholeStageCodegen (5) - HashAggregate [w_state,i_item_id,d_date,cs_sales_price,cr_refunded_cash] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - Project [cs_sales_price,cr_refunded_cash,w_state,i_item_id,d_date] + Exchange [i_item_id,w_state] #1 + WholeStageCodegen + HashAggregate [cr_refunded_cash,cs_sales_price,d_date,i_item_id,sum,sum,sum,sum,w_state] [sum,sum,sum,sum] + Project [cr_refunded_cash,cs_sales_price,d_date,i_item_id,w_state] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_sales_price,cr_refunded_cash,w_state,i_item_id] + Project [cr_refunded_cash,cs_sales_price,cs_sold_date_sk,i_item_id,w_state] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_sales_price,cr_refunded_cash,w_state] + Project [cr_refunded_cash,cs_item_sk,cs_sales_price,cs_sold_date_sk,w_state] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_sold_date_sk,cs_warehouse_sk,cs_item_sk,cs_sales_price,cr_refunded_cash] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Filter [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price] + Project [cr_refunded_cash,cs_item_sk,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] + Filter [cs_item_sk,cs_sold_date_sk,cs_warehouse_sk] + Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [cr_order_number,cr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash] + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_refunded_cash] + Filter [cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash] [cr_item_sk,cr_order_number,cr_refunded_cash] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [w_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.warehouse [w_warehouse_sk,w_state] + WholeStageCodegen + Project [w_state,w_warehouse_sk] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_state,w_warehouse_sk] [w_state,w_warehouse_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Project [i_item_sk,i_item_id] + WholeStageCodegen + Project [i_item_id,i_item_sk] Filter [i_current_price,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_current_price] + Scan parquet default.item [i_current_price,i_item_id,i_item_sk] [i_current_price,i_item_id,i_item_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + WholeStageCodegen + Project [d_date,d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt index 13d73e61e..31bc2e514 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt @@ -1,120 +1,19 @@ == Physical Plan == -TakeOrderedAndProject (20) -+- * HashAggregate (19) - +- Exchange (18) - +- * HashAggregate (17) - +- * Project (16) - +- * BroadcastHashJoin Inner BuildRight (15) - :- * Project (4) - : +- * Filter (3) - : +- * ColumnarToRow (2) - : +- Scan parquet default.item (1) - +- BroadcastExchange (14) - +- * Project (13) - +- * Filter (12) - +- * HashAggregate (11) - +- Exchange (10) - +- * HashAggregate (9) - +- * Project (8) - +- * Filter (7) - +- * ColumnarToRow (6) - +- Scan parquet default.item (5) - - -(1) Scan parquet default.item -Output [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,778), IsNotNull(i_manufact)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] - -(3) Filter [codegen id : 3] -Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] -Condition : (((isnotnull(i_manufact_id#1) AND (i_manufact_id#1 >= 738)) AND (i_manufact_id#1 <= 778)) AND isnotnull(i_manufact#2)) - -(4) Project [codegen id : 3] -Output [2]: [i_manufact#2, i_product_name#3] -Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] - -(5) Scan parquet default.item -Output [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [Or(Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,powder),EqualTo(i_color,khaki)),Or(EqualTo(i_units,Ounce),EqualTo(i_units,Oz))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))),And(And(Or(EqualTo(i_color,brown),EqualTo(i_color,honeydew)),Or(EqualTo(i_units,Bunch),EqualTo(i_units,Ton))),Or(EqualTo(i_size,N/A),EqualTo(i_size,small))))),And(EqualTo(i_category,Men),Or(And(And(Or(EqualTo(i_color,floral),EqualTo(i_color,deep)),Or(EqualTo(i_units,N/A),EqualTo(i_units,Dozen))),Or(EqualTo(i_size,petite),EqualTo(i_size,large))),And(And(Or(EqualTo(i_color,light),EqualTo(i_color,cornflower)),Or(EqualTo(i_units,Box),EqualTo(i_units,Pound))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large)))))),Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,midnight),EqualTo(i_color,snow)),Or(EqualTo(i_units,Pallet),EqualTo(i_units,Gross))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))),And(And(Or(EqualTo(i_color,cyan),EqualTo(i_color,papaya)),Or(EqualTo(i_units,Cup),EqualTo(i_units,Dram))),Or(EqualTo(i_size,N/A),EqualTo(i_size,small))))),And(EqualTo(i_category,Men),Or(And(And(Or(EqualTo(i_color,orange),EqualTo(i_color,frosted)),Or(EqualTo(i_units,Each),EqualTo(i_units,Tbl))),Or(EqualTo(i_size,petite),EqualTo(i_size,large))),And(And(Or(EqualTo(i_color,forest),EqualTo(i_color,ghost)),Or(EqualTo(i_units,Lb),EqualTo(i_units,Bundle))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))))))), IsNotNull(i_manufact)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] - -(7) Filter [codegen id : 1] -Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] -Condition : (((((i_category#4 = Women) AND (((((i_color#6 = powder) OR (i_color#6 = khaki)) AND ((i_units#7 = Ounce) OR (i_units#7 = Oz))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))) OR ((((i_color#6 = brown) OR (i_color#6 = honeydew)) AND ((i_units#7 = Bunch) OR (i_units#7 = Ton))) AND ((i_size#5 = N/A) OR (i_size#5 = small))))) OR ((i_category#4 = Men) AND (((((i_color#6 = floral) OR (i_color#6 = deep)) AND ((i_units#7 = N/A) OR (i_units#7 = Dozen))) AND ((i_size#5 = petite) OR (i_size#5 = large))) OR ((((i_color#6 = light) OR (i_color#6 = cornflower)) AND ((i_units#7 = Box) OR (i_units#7 = Pound))) AND ((i_size#5 = medium) OR (i_size#5 = extra large)))))) OR (((i_category#4 = Women) AND (((((i_color#6 = midnight) OR (i_color#6 = snow)) AND ((i_units#7 = Pallet) OR (i_units#7 = Gross))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))) OR ((((i_color#6 = cyan) OR (i_color#6 = papaya)) AND ((i_units#7 = Cup) OR (i_units#7 = Dram))) AND ((i_size#5 = N/A) OR (i_size#5 = small))))) OR ((i_category#4 = Men) AND (((((i_color#6 = orange) OR (i_color#6 = frosted)) AND ((i_units#7 = Each) OR (i_units#7 = Tbl))) AND ((i_size#5 = petite) OR (i_size#5 = large))) OR ((((i_color#6 = forest) OR (i_color#6 = ghost)) AND ((i_units#7 = Lb) OR (i_units#7 = Bundle))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))))))) AND isnotnull(i_manufact#2)) - -(8) Project [codegen id : 1] -Output [1]: [i_manufact#2] -Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] - -(9) HashAggregate [codegen id : 1] -Input [1]: [i_manufact#2] -Keys [1]: [i_manufact#2] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#8] -Results [2]: [i_manufact#2, count#9] - -(10) Exchange -Input [2]: [i_manufact#2, count#9] -Arguments: hashpartitioning(i_manufact#2, 5), true, [id=#10] - -(11) HashAggregate [codegen id : 2] -Input [2]: [i_manufact#2, count#9] -Keys [1]: [i_manufact#2] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#11] -Results [2]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13] - -(12) Filter [codegen id : 2] -Input [2]: [item_cnt#12, i_manufact#2#13] -Condition : (item_cnt#12 > 0) - -(13) Project [codegen id : 2] -Output [1]: [i_manufact#2#13] -Input [2]: [item_cnt#12, i_manufact#2#13] - -(14) BroadcastExchange -Input [1]: [i_manufact#2#13] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14] - -(15) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [i_manufact#2] -Right keys [1]: [i_manufact#2#13] -Join condition: None - -(16) Project [codegen id : 3] -Output [1]: [i_product_name#3] -Input [3]: [i_manufact#2, i_product_name#3, i_manufact#2#13] - -(17) HashAggregate [codegen id : 3] -Input [1]: [i_product_name#3] -Keys [1]: [i_product_name#3] -Functions: [] -Aggregate Attributes: [] -Results [1]: [i_product_name#3] - -(18) Exchange -Input [1]: [i_product_name#3] -Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#15] - -(19) HashAggregate [codegen id : 4] -Input [1]: [i_product_name#3] -Keys [1]: [i_product_name#3] -Functions: [] -Aggregate Attributes: [] -Results [1]: [i_product_name#3] - -(20) TakeOrderedAndProject -Input [1]: [i_product_name#3] -Arguments: 100, [i_product_name#3 ASC NULLS FIRST], [i_product_name#3] - +TakeOrderedAndProject(limit=100, orderBy=[i_product_name#1 ASC NULLS FIRST], output=[i_product_name#1]) ++- *(4) HashAggregate(keys=[i_product_name#1], functions=[]) + +- Exchange hashpartitioning(i_product_name#1, 5) + +- *(3) HashAggregate(keys=[i_product_name#1], functions=[]) + +- *(3) Project [i_product_name#1] + +- *(3) BroadcastHashJoin [i_manufact#2], [i_manufact#2#3], Inner, BuildRight + :- *(3) Project [i_manufact#2, i_product_name#1] + : +- *(3) Filter (((isnotnull(i_manufact_id#4) && (i_manufact_id#4 >= 738)) && (i_manufact_id#4 <= 778)) && isnotnull(i_manufact#2)) + : +- *(3) FileScan parquet default.item[i_manufact_id#4,i_manufact#2,i_product_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,7..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(2) Project [i_manufact#2#3] + +- *(2) Filter (if (isnull(alwaysTrue#5)) 0 else item_cnt#6 > 0) + +- *(2) HashAggregate(keys=[i_manufact#2], functions=[count(1)]) + +- Exchange hashpartitioning(i_manufact#2, 5) + +- *(1) HashAggregate(keys=[i_manufact#2], functions=[partial_count(1)]) + +- *(1) Project [i_manufact#2] + +- *(1) Filter (((((i_category#7 = Women) && (((((i_color#8 = powder) || (i_color#8 = khaki)) && ((i_units#9 = Ounce) || (i_units#9 = Oz))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = brown) || (i_color#8 = honeydew)) && ((i_units#9 = Bunch) || (i_units#9 = Ton))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = floral) || (i_color#8 = deep)) && ((i_units#9 = N/A) || (i_units#9 = Dozen))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = light) || (i_color#8 = cornflower)) && ((i_units#9 = Box) || (i_units#9 = Pound))) && ((i_size#10 = medium) || (i_size#10 = extra large)))))) || (((i_category#7 = Women) && (((((i_color#8 = midnight) || (i_color#8 = snow)) && ((i_units#9 = Pallet) || (i_units#9 = Gross))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = cyan) || (i_color#8 = papaya)) && ((i_units#9 = Cup) || (i_units#9 = Dram))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = orange) || (i_color#8 = frosted)) && ((i_units#9 = Each) || (i_units#9 = Tbl))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = forest) || (i_color#8 = ghost)) && ((i_units#9 = Lb) || (i_units#9 = Bundle))) && ((i_size#10 = medium) || (i_size#10 = extra large))))))) && isnotnull(i_manufact#2)) + +- *(1) FileScan parquet default.item[i_category#7,i_manufact#2,i_size#10,i_color#8,i_units#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,powder),EqualTo(i_color,khaki)..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt index 2d14d75ca..7196f719a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt @@ -1,29 +1,25 @@ TakeOrderedAndProject [i_product_name] - WholeStageCodegen (4) + WholeStageCodegen HashAggregate [i_product_name] InputAdapter Exchange [i_product_name] #1 - WholeStageCodegen (3) + WholeStageCodegen HashAggregate [i_product_name] Project [i_product_name] BroadcastHashJoin [i_manufact,i_manufact] Project [i_manufact,i_product_name] - Filter [i_manufact_id,i_manufact] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_manufact_id,i_manufact,i_product_name] + Filter [i_manufact,i_manufact_id] + Scan parquet default.item [i_manufact,i_manufact_id,i_product_name] [i_manufact,i_manufact_id,i_product_name] InputAdapter BroadcastExchange #2 - WholeStageCodegen (2) + WholeStageCodegen Project [i_manufact] - Filter [item_cnt] - HashAggregate [i_manufact,count] [count(1),item_cnt,i_manufact,count] + Filter [alwaysTrue,item_cnt] + HashAggregate [count,count(1),i_manufact] [alwaysTrue,count,count(1),i_manufact,item_cnt] InputAdapter Exchange [i_manufact] #3 - WholeStageCodegen (1) - HashAggregate [i_manufact] [count,count] + WholeStageCodegen + HashAggregate [count,count,i_manufact] [count,count] Project [i_manufact] - Filter [i_category,i_color,i_units,i_size,i_manufact] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_category,i_manufact,i_size,i_color,i_units] + Filter [i_category,i_color,i_manufact,i_size,i_units] + Scan parquet default.item [i_category,i_color,i_manufact,i_size,i_units] [i_category,i_color,i_manufact,i_size,i_units] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt index f7732f3c8..1e560817c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt @@ -1,122 +1,20 @@ == Physical Plan == -TakeOrderedAndProject (21) -+- * HashAggregate (20) - +- Exchange (19) - +- * HashAggregate (18) - +- * Project (17) - +- * BroadcastHashJoin Inner BuildRight (16) - :- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Project (4) - : : +- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.date_dim (1) - : +- BroadcastExchange (8) - : +- * Filter (7) - : +- * ColumnarToRow (6) - : +- Scan parquet default.store_sales (5) - +- BroadcastExchange (15) - +- * Project (14) - +- * Filter (13) - +- * ColumnarToRow (12) - +- Scan parquet default.item (11) - - -(1) Scan parquet default.date_dim -Output [3]: [d_date_sk#1, d_year#2, d_moy#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] - -(3) Filter [codegen id : 3] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] -Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) - -(4) Project [codegen id : 3] -Output [2]: [d_date_sk#1, d_year#2] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] - -(5) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] - -(7) Filter [codegen id : 1] -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) - -(8) BroadcastExchange -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#4] -Join condition: None - -(10) Project [codegen id : 3] -Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] -Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] - -(11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] - -(13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] -Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) - -(14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_category_id#9, i_category#10] -Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] - -(15) BroadcastExchange -Input [3]: [i_item_sk#8, i_category_id#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#5] -Right keys [1]: [i_item_sk#8] -Join condition: None - -(17) Project [codegen id : 3] -Output [4]: [d_year#2, ss_ext_sales_price#6, i_category_id#9, i_category#10] -Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_category_id#9, i_category#10] - -(18) HashAggregate [codegen id : 3] -Input [4]: [d_year#2, ss_ext_sales_price#6, i_category_id#9, i_category#10] -Keys [3]: [d_year#2, i_category_id#9, i_category#10] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum#13] -Results [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] - -(19) Exchange -Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] -Arguments: hashpartitioning(d_year#2, i_category_id#9, i_category#10, 5), true, [id=#15] - -(20) HashAggregate [codegen id : 4] -Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] -Keys [3]: [d_year#2, i_category_id#9, i_category#10] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] -Results [4]: [d_year#2, i_category_id#9, i_category#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS sum(ss_ext_sales_price)#17] - -(21) TakeOrderedAndProject -Input [4]: [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] -Arguments: 100, [sum(ss_ext_sales_price)#17 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] - +TakeOrderedAndProject(limit=100, orderBy=[sum(ss_ext_sales_price)#1 DESC NULLS LAST,d_year#2 ASC NULLS FIRST,i_category_id#3 ASC NULLS FIRST,i_category#4 ASC NULLS FIRST], output=[d_year#2,i_category_id#3,i_category#4,sum(ss_ext_sales_price)#1]) ++- *(4) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) + +- Exchange hashpartitioning(d_year#2, i_category_id#3, i_category#4, 5) + +- *(3) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) + +- *(3) Project [d_year#2, ss_ext_sales_price#5, i_category_id#3, i_category#4] + +- *(3) BroadcastHashJoin [ss_item_sk#6], [i_item_sk#7], Inner, BuildRight + :- *(3) Project [d_year#2, ss_item_sk#6, ss_ext_sales_price#5] + : +- *(3) BroadcastHashJoin [d_date_sk#8], [ss_sold_date_sk#9], Inner, BuildRight + : :- *(3) Project [d_date_sk#8, d_year#2] + : : +- *(3) Filter ((((isnotnull(d_moy#10) && isnotnull(d_year#2)) && (d_moy#10 = 11)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#8)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_year#2,d_moy#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#6, ss_ext_sales_price#5] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#9) && isnotnull(ss_item_sk#6)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#6,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#7, i_category_id#3, i_category#4] + +- *(2) Filter ((isnotnull(i_manager_id#11) && (i_manager_id#11 = 1)) && isnotnull(i_item_sk#7)) + +- *(2) FileScan parquet default.item[i_item_sk#7,i_category_id#3,i_category#4,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt index d9bb6de20..67de156ac 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt @@ -1,31 +1,26 @@ -TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category] - WholeStageCodegen (4) - HashAggregate [d_year,i_category_id,i_category,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] +TakeOrderedAndProject [d_year,i_category,i_category_id,sum(ss_ext_sales_price)] + WholeStageCodegen + HashAggregate [d_year,i_category,i_category_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price)] InputAdapter - Exchange [d_year,i_category_id,i_category] #1 - WholeStageCodegen (3) - HashAggregate [d_year,i_category_id,i_category,ss_ext_sales_price] [sum,sum] - Project [d_year,ss_ext_sales_price,i_category_id,i_category] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [d_year,ss_item_sk,ss_ext_sales_price] + Exchange [d_year,i_category,i_category_id] #1 + WholeStageCodegen + HashAggregate [d_year,i_category,i_category_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [d_year,i_category,i_category_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_year] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [ss_sold_date_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + WholeStageCodegen + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Project [i_item_sk,i_category_id,i_category] - Filter [i_manager_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_category_id,i_category,i_manager_id] + WholeStageCodegen + Project [i_category,i_category_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + Scan parquet default.item [i_category,i_category_id,i_item_sk,i_manager_id] [i_category,i_category_id,i_item_sk,i_manager_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt index 8f3ef7fee..b68348e44 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt @@ -1,122 +1,20 @@ == Physical Plan == -TakeOrderedAndProject (21) -+- * HashAggregate (20) - +- Exchange (19) - +- * HashAggregate (18) - +- * Project (17) - +- * BroadcastHashJoin Inner BuildRight (16) - :- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Project (4) - : : +- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.date_dim (1) - : +- BroadcastExchange (8) - : +- * Filter (7) - : +- * ColumnarToRow (6) - : +- Scan parquet default.store_sales (5) - +- BroadcastExchange (15) - +- * Project (14) - +- * Filter (13) - +- * ColumnarToRow (12) - +- Scan parquet default.store (11) - - -(1) Scan parquet default.date_dim -Output [3]: [d_date_sk#1, d_year#2, d_day_name#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] - -(3) Filter [codegen id : 3] -Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] -Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) - -(4) Project [codegen id : 3] -Output [2]: [d_date_sk#1, d_day_name#3] -Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] - -(5) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] - -(7) Filter [codegen id : 1] -Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] -Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_store_sk#5)) - -(8) BroadcastExchange -Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#4] -Join condition: None - -(10) Project [codegen id : 3] -Output [3]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6] -Input [5]: [d_date_sk#1, d_day_name#3, ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] - -(11) Scan parquet default.store -Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] - -(13) Filter [codegen id : 2] -Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] -Condition : ((isnotnull(s_gmt_offset#11) AND (s_gmt_offset#11 = -5.00)) AND isnotnull(s_store_sk#8)) - -(14) Project [codegen id : 2] -Output [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] -Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] - -(15) BroadcastExchange -Input [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_store_sk#5] -Right keys [1]: [s_store_sk#8] -Join condition: None - -(17) Project [codegen id : 3] -Output [4]: [d_day_name#3, ss_sales_price#6, s_store_id#9, s_store_name#10] -Input [6]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6, s_store_sk#8, s_store_id#9, s_store_name#10] - -(18) HashAggregate [codegen id : 3] -Input [4]: [d_day_name#3, ss_sales_price#6, s_store_id#9, s_store_name#10] -Keys [2]: [s_store_name#10, s_store_id#9] -Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))] -Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] -Results [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] - -(19) Exchange -Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Arguments: hashpartitioning(s_store_name#10, s_store_id#9, 5), true, [id=#27] - -(20) HashAggregate [codegen id : 4] -Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Keys [2]: [s_store_name#10, s_store_id#9] -Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))#34] -Results [9]: [s_store_name#10, s_store_id#9, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))#34,17,2) AS sat_sales#41] - -(21) TakeOrderedAndProject -Input [9]: [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] -Arguments: 100, [s_store_name#10 ASC NULLS FIRST, s_store_id#9 ASC NULLS FIRST, sun_sales#35 ASC NULLS FIRST, mon_sales#36 ASC NULLS FIRST, tue_sales#37 ASC NULLS FIRST, wed_sales#38 ASC NULLS FIRST, thu_sales#39 ASC NULLS FIRST, fri_sales#40 ASC NULLS FIRST, sat_sales#41 ASC NULLS FIRST], [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] - +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_store_id#2 ASC NULLS FIRST,sun_sales#3 ASC NULLS FIRST,mon_sales#4 ASC NULLS FIRST,tue_sales#5 ASC NULLS FIRST,wed_sales#6 ASC NULLS FIRST,thu_sales#7 ASC NULLS FIRST,fri_sales#8 ASC NULLS FIRST,sat_sales#9 ASC NULLS FIRST], output=[s_store_name#1,s_store_id#2,sun_sales#3,mon_sales#4,tue_sales#5,wed_sales#6,thu_sales#7,fri_sales#8,sat_sales#9]) ++- *(4) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) + +- Exchange hashpartitioning(s_store_name#1, s_store_id#2, 5) + +- *(3) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) + +- *(3) Project [d_day_name#10, ss_sales_price#11, s_store_id#2, s_store_name#1] + +- *(3) BroadcastHashJoin [ss_store_sk#12], [s_store_sk#13], Inner, BuildRight + :- *(3) Project [d_day_name#10, ss_store_sk#12, ss_sales_price#11] + : +- *(3) BroadcastHashJoin [d_date_sk#14], [ss_sold_date_sk#15], Inner, BuildRight + : :- *(3) Project [d_date_sk#14, d_day_name#10] + : : +- *(3) Filter ((isnotnull(d_year#16) && (d_year#16 = 2000)) && isnotnull(d_date_sk#14)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_day_name#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#15, ss_store_sk#12, ss_sales_price#11] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#15) && isnotnull(ss_store_sk#12)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#15,ss_store_sk#12,ss_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [s_store_sk#13, s_store_id#2, s_store_name#1] + +- *(2) Filter ((isnotnull(s_gmt_offset#17) && (s_gmt_offset#17 = -5.00)) && isnotnull(s_store_sk#13)) + +- *(2) FileScan parquet default.store[s_store_sk#13,s_store_id#2,s_store_name#1,s_gmt_offset#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt index 1694f3aab..136a277e1 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt @@ -1,31 +1,26 @@ -TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] - WholeStageCodegen (4) - HashAggregate [s_store_name,s_store_id,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] +TakeOrderedAndProject [fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + WholeStageCodegen + HashAggregate [s_store_id,s_store_name,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] InputAdapter - Exchange [s_store_name,s_store_id] #1 - WholeStageCodegen (3) - HashAggregate [s_store_name,s_store_id,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_day_name,ss_sales_price,s_store_id,s_store_name] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [d_day_name,ss_store_sk,ss_sales_price] + Exchange [s_store_id,s_store_name] #1 + WholeStageCodegen + HashAggregate [d_day_name,s_store_id,s_store_name,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,s_store_id,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_day_name,ss_sales_price,ss_store_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_day_name] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_day_name] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_day_name,d_year] [d_date_sk,d_day_name,d_year] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_sales_price] + WholeStageCodegen + Project [ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Project [s_store_sk,s_store_id,s_store_name] + WholeStageCodegen + Project [s_store_id,s_store_name,s_store_sk] Filter [s_gmt_offset,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset] + Scan parquet default.store [s_gmt_offset,s_store_id,s_store_name,s_store_sk] [s_gmt_offset,s_store_id,s_store_name,s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt index 096bd45f0..99c27e16a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt @@ -1,248 +1,50 @@ == Physical Plan == -TakeOrderedAndProject (36) -+- * Project (35) - +- * BroadcastHashJoin Inner BuildRight (34) - :- * Project (32) - : +- * BroadcastHashJoin Inner BuildRight (31) - : :- * Project (26) - : : +- * BroadcastHashJoin Inner BuildRight (25) - : : :- * Project (14) - : : : +- * Filter (13) - : : : +- Window (12) - : : : +- * Sort (11) - : : : +- Exchange (10) - : : : +- * Project (9) - : : : +- * Filter (8) - : : : +- * HashAggregate (7) - : : : +- Exchange (6) - : : : +- * HashAggregate (5) - : : : +- * Project (4) - : : : +- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.store_sales (1) - : : +- BroadcastExchange (24) - : : +- * Project (23) - : : +- * Filter (22) - : : +- Window (21) - : : +- * Sort (20) - : : +- Exchange (19) - : : +- * Project (18) - : : +- * Filter (17) - : : +- * HashAggregate (16) - : : +- ReusedExchange (15) - : +- BroadcastExchange (30) - : +- * Filter (29) - : +- * ColumnarToRow (28) - : +- Scan parquet default.item (27) - +- ReusedExchange (33) - - -(1) Scan parquet default.store_sales -Output [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 1] -Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] - -(3) Filter [codegen id : 1] -Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] -Condition : (isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) - -(4) Project [codegen id : 1] -Output [2]: [ss_item_sk#1, ss_net_profit#3] -Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] - -(5) HashAggregate [codegen id : 1] -Input [2]: [ss_item_sk#1, ss_net_profit#3] -Keys [1]: [ss_item_sk#1] -Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [2]: [sum#4, count#5] -Results [3]: [ss_item_sk#1, sum#6, count#7] - -(6) Exchange -Input [3]: [ss_item_sk#1, sum#6, count#7] -Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#8] - -(7) HashAggregate [codegen id : 2] -Input [3]: [ss_item_sk#1, sum#6, count#7] -Keys [1]: [ss_item_sk#1] -Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#9] -Results [3]: [ss_item_sk#1 AS item_sk#10, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS rank_col#11, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS avg(ss_net_profit#3)#12] - -(8) Filter [codegen id : 2] -Input [3]: [item_sk#10, rank_col#11, avg(ss_net_profit#3)#12] -Condition : (isnotnull(avg(ss_net_profit#3)#12) AND (cast(avg(ss_net_profit#3)#12 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7), true))) - -(9) Project [codegen id : 2] -Output [2]: [item_sk#10, rank_col#11] -Input [3]: [item_sk#10, rank_col#11, avg(ss_net_profit#3)#12] - -(10) Exchange -Input [2]: [item_sk#10, rank_col#11] -Arguments: SinglePartition, true, [id=#15] - -(11) Sort [codegen id : 3] -Input [2]: [item_sk#10, rank_col#11] -Arguments: [rank_col#11 ASC NULLS FIRST], false, 0 - -(12) Window -Input [2]: [item_sk#10, rank_col#11] -Arguments: [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#16], [rank_col#11 ASC NULLS FIRST] - -(13) Filter [codegen id : 10] -Input [3]: [item_sk#10, rank_col#11, rnk#16] -Condition : ((isnotnull(rnk#16) AND (rnk#16 < 11)) AND isnotnull(item_sk#10)) - -(14) Project [codegen id : 10] -Output [2]: [item_sk#10, rnk#16] -Input [3]: [item_sk#10, rank_col#11, rnk#16] - -(15) ReusedExchange [Reuses operator id: 6] -Output [3]: [ss_item_sk#1, sum#17, count#18] - -(16) HashAggregate [codegen id : 5] -Input [3]: [ss_item_sk#1, sum#17, count#18] -Keys [1]: [ss_item_sk#1] -Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#19] -Results [3]: [ss_item_sk#1 AS item_sk#20, cast((avg(UnscaledValue(ss_net_profit#3))#19 / 100.0) as decimal(11,6)) AS rank_col#21, cast((avg(UnscaledValue(ss_net_profit#3))#19 / 100.0) as decimal(11,6)) AS avg(ss_net_profit#3)#22] - -(17) Filter [codegen id : 5] -Input [3]: [item_sk#20, rank_col#21, avg(ss_net_profit#3)#22] -Condition : (isnotnull(avg(ss_net_profit#3)#22) AND (cast(avg(ss_net_profit#3)#22 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(ReusedSubquery Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7), true))) - -(18) Project [codegen id : 5] -Output [2]: [item_sk#20, rank_col#21] -Input [3]: [item_sk#20, rank_col#21, avg(ss_net_profit#3)#22] - -(19) Exchange -Input [2]: [item_sk#20, rank_col#21] -Arguments: SinglePartition, true, [id=#23] - -(20) Sort [codegen id : 6] -Input [2]: [item_sk#20, rank_col#21] -Arguments: [rank_col#21 DESC NULLS LAST], false, 0 - -(21) Window -Input [2]: [item_sk#20, rank_col#21] -Arguments: [rank(rank_col#21) windowspecdefinition(rank_col#21 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#24], [rank_col#21 DESC NULLS LAST] - -(22) Filter [codegen id : 7] -Input [3]: [item_sk#20, rank_col#21, rnk#24] -Condition : ((isnotnull(rnk#24) AND (rnk#24 < 11)) AND isnotnull(item_sk#20)) - -(23) Project [codegen id : 7] -Output [2]: [item_sk#20, rnk#24] -Input [3]: [item_sk#20, rank_col#21, rnk#24] - -(24) BroadcastExchange -Input [2]: [item_sk#20, rnk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#25] - -(25) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [rnk#16] -Right keys [1]: [rnk#24] -Join condition: None - -(26) Project [codegen id : 10] -Output [3]: [item_sk#10, rnk#16, item_sk#20] -Input [4]: [item_sk#10, rnk#16, item_sk#20, rnk#24] - -(27) Scan parquet default.item -Output [2]: [i_item_sk#26, i_product_name#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(28) ColumnarToRow [codegen id : 8] -Input [2]: [i_item_sk#26, i_product_name#27] - -(29) Filter [codegen id : 8] -Input [2]: [i_item_sk#26, i_product_name#27] -Condition : isnotnull(i_item_sk#26) - -(30) BroadcastExchange -Input [2]: [i_item_sk#26, i_product_name#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] - -(31) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [item_sk#10] -Right keys [1]: [i_item_sk#26] -Join condition: None - -(32) Project [codegen id : 10] -Output [3]: [rnk#16, item_sk#20, i_product_name#27] -Input [5]: [item_sk#10, rnk#16, item_sk#20, i_item_sk#26, i_product_name#27] - -(33) ReusedExchange [Reuses operator id: 30] -Output [2]: [i_item_sk#29, i_product_name#30] - -(34) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [item_sk#20] -Right keys [1]: [i_item_sk#29] -Join condition: None - -(35) Project [codegen id : 10] -Output [3]: [rnk#16, i_product_name#27 AS best_performing#31, i_product_name#30 AS worst_performing#32] -Input [5]: [rnk#16, item_sk#20, i_product_name#27, i_item_sk#29, i_product_name#30] - -(36) TakeOrderedAndProject -Input [3]: [rnk#16, best_performing#31, worst_performing#32] -Arguments: 100, [rnk#16 ASC NULLS FIRST], [rnk#16, best_performing#31, worst_performing#32] - -===== Subqueries ===== - -Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#13, [id=#14] -* HashAggregate (43) -+- Exchange (42) - +- * HashAggregate (41) - +- * Project (40) - +- * Filter (39) - +- * ColumnarToRow (38) - +- Scan parquet default.store_sales (37) - - -(37) Scan parquet default.store_sales -Output [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)] -ReadSchema: struct - -(38) ColumnarToRow [codegen id : 1] -Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] - -(39) Filter [codegen id : 1] -Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] -Condition : ((isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) AND isnull(ss_addr_sk#33)) - -(40) Project [codegen id : 1] -Output [2]: [ss_store_sk#2, ss_net_profit#3] -Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] - -(41) HashAggregate [codegen id : 1] -Input [2]: [ss_store_sk#2, ss_net_profit#3] -Keys [1]: [ss_store_sk#2] -Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [2]: [sum#34, count#35] -Results [3]: [ss_store_sk#2, sum#36, count#37] - -(42) Exchange -Input [3]: [ss_store_sk#2, sum#36, count#37] -Arguments: hashpartitioning(ss_store_sk#2, 5), true, [id=#38] - -(43) HashAggregate [codegen id : 2] -Input [3]: [ss_store_sk#2, sum#36, count#37] -Keys [1]: [ss_store_sk#2] -Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#39] -Results [1]: [cast((avg(UnscaledValue(ss_net_profit#3))#39 / 100.0) as decimal(11,6)) AS rank_col#40] - -Subquery:2 Hosting operator id = 17 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] - - +TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1,best_performing#2,worst_performing#3]) ++- *(10) Project [rnk#1, i_product_name#4 AS best_performing#2, i_product_name#5 AS worst_performing#3] + +- *(10) BroadcastHashJoin [item_sk#6], [i_item_sk#7], Inner, BuildRight + :- *(10) Project [rnk#1, item_sk#6, i_product_name#4] + : +- *(10) BroadcastHashJoin [item_sk#8], [i_item_sk#9], Inner, BuildRight + : :- *(10) Project [item_sk#8, rnk#1, item_sk#6] + : : +- *(10) BroadcastHashJoin [rnk#1], [rnk#10], Inner, BuildRight + : : :- *(10) Project [item_sk#8, rnk#1] + : : : +- *(10) Filter ((isnotnull(rnk#1) && (rnk#1 < 11)) && isnotnull(item_sk#8)) + : : : +- Window [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#1], [rank_col#11 ASC NULLS FIRST] + : : : +- *(3) Sort [rank_col#11 ASC NULLS FIRST], false, 0 + : : : +- Exchange SinglePartition + : : : +- *(2) Project [item_sk#8, rank_col#11] + : : : +- *(2) Filter (isnotnull(avg(ss_net_profit#12)#13) && (cast(avg(ss_net_profit#12)#13 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7089)), DecimalType(13,7)))) + : : : : +- Subquery subquery7089 + : : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) + : : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) + : : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] + : : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct + : : : +- *(2) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : : +- Exchange hashpartitioning(ss_item_sk#16, 5) + : : : +- *(1) HashAggregate(keys=[ss_item_sk#16], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) + : : : +- *(1) Project [ss_item_sk#16, ss_net_profit#12] + : : : +- *(1) Filter (isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) + : : : +- *(1) FileScan parquet default.store_sales[ss_item_sk#16,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(7) Project [item_sk#6, rnk#10] + : : +- *(7) Filter ((isnotnull(rnk#10) && (rnk#10 < 11)) && isnotnull(item_sk#6)) + : : +- Window [rank(rank_col#17) windowspecdefinition(rank_col#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#10], [rank_col#17 DESC NULLS LAST] + : : +- *(6) Sort [rank_col#17 DESC NULLS LAST], false, 0 + : : +- Exchange SinglePartition + : : +- *(5) Project [item_sk#6, rank_col#17] + : : +- *(5) Filter (isnotnull(avg(ss_net_profit#12)#18) && (cast(avg(ss_net_profit#12)#18 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7094)), DecimalType(13,7)))) + : : : +- Subquery subquery7094 + : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) + : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) + : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] + : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) + : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct + : : +- *(5) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) + : : +- ReusedExchange [ss_item_sk#16, sum#19, count#20], Exchange hashpartitioning(ss_item_sk#16, 5) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(8) Project [i_item_sk#9, i_product_name#4] + : +- *(8) Filter isnotnull(i_item_sk#9) + : +- *(8) FileScan parquet default.item[i_item_sk#9,i_product_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- ReusedExchange [i_item_sk#7, i_product_name#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt index f2106ad7a..139fd1f7b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt @@ -1,68 +1,72 @@ -TakeOrderedAndProject [rnk,best_performing,worst_performing] - WholeStageCodegen (10) - Project [rnk,i_product_name,i_product_name] - BroadcastHashJoin [item_sk,i_item_sk] - Project [rnk,item_sk,i_product_name] - BroadcastHashJoin [item_sk,i_item_sk] - Project [item_sk,rnk,item_sk] +TakeOrderedAndProject [best_performing,rnk,worst_performing] + WholeStageCodegen + Project [i_product_name,i_product_name,rnk] + BroadcastHashJoin [i_item_sk,item_sk] + Project [i_product_name,item_sk,rnk] + BroadcastHashJoin [i_item_sk,item_sk] + Project [item_sk,item_sk,rnk] BroadcastHashJoin [rnk,rnk] Project [item_sk,rnk] - Filter [rnk,item_sk] + Filter [item_sk,rnk] InputAdapter Window [rank_col] - WholeStageCodegen (3) + WholeStageCodegen Sort [rank_col] InputAdapter Exchange #1 - WholeStageCodegen (2) + WholeStageCodegen Project [item_sk,rank_col] Filter [avg(ss_net_profit)] Subquery #1 - WholeStageCodegen (2) - HashAggregate [ss_store_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_store_sk,sum] [avg(UnscaledValue(ss_net_profit)),count,rank_col,sum] InputAdapter Exchange [ss_store_sk] #3 - WholeStageCodegen (1) - HashAggregate [ss_store_sk,ss_net_profit] [sum,count,sum,count] - Project [ss_store_sk,ss_net_profit] - Filter [ss_store_sk,ss_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] - HashAggregate [ss_item_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),item_sk,rank_col,avg(ss_net_profit),sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_net_profit,ss_store_sk,sum,sum] [count,count,sum,sum] + Project [ss_net_profit,ss_store_sk] + Filter [ss_addr_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_net_profit,ss_store_sk] [ss_addr_sk,ss_net_profit,ss_store_sk] + HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] InputAdapter Exchange [ss_item_sk] #2 - WholeStageCodegen (1) - HashAggregate [ss_item_sk,ss_net_profit] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_item_sk,ss_net_profit,sum,sum] [count,count,sum,sum] Project [ss_item_sk,ss_net_profit] Filter [ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit] + Scan parquet default.store_sales [ss_item_sk,ss_net_profit,ss_store_sk] [ss_item_sk,ss_net_profit,ss_store_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (7) + WholeStageCodegen Project [item_sk,rnk] - Filter [rnk,item_sk] + Filter [item_sk,rnk] InputAdapter Window [rank_col] - WholeStageCodegen (6) + WholeStageCodegen Sort [rank_col] InputAdapter Exchange #5 - WholeStageCodegen (5) + WholeStageCodegen Project [item_sk,rank_col] Filter [avg(ss_net_profit)] - ReusedSubquery [rank_col] #1 - HashAggregate [ss_item_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),item_sk,rank_col,avg(ss_net_profit),sum,count] + Subquery #2 + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_store_sk,sum] [avg(UnscaledValue(ss_net_profit)),count,rank_col,sum] + InputAdapter + Exchange [ss_store_sk] #6 + WholeStageCodegen + HashAggregate [count,count,ss_net_profit,ss_store_sk,sum,sum] [count,count,sum,sum] + Project [ss_net_profit,ss_store_sk] + Filter [ss_addr_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_net_profit,ss_store_sk] [ss_addr_sk,ss_net_profit,ss_store_sk] + HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] InputAdapter - ReusedExchange [ss_item_sk,sum,count] #2 + ReusedExchange [count,ss_item_sk,sum] [count,ss_item_sk,sum] #2 InputAdapter - BroadcastExchange #6 - WholeStageCodegen (8) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_product_name] + BroadcastExchange #7 + WholeStageCodegen + Project [i_item_sk,i_product_name] + Filter [i_item_sk] + Scan parquet default.item [i_item_sk,i_product_name] [i_item_sk,i_product_name] InputAdapter - ReusedExchange [i_item_sk,i_product_name] #6 + ReusedExchange [i_item_sk,i_product_name] [i_item_sk,i_product_name] #7 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt index f556e9b0d..ad97e7d1a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt @@ -1,226 +1,39 @@ == Physical Plan == -TakeOrderedAndProject (40) -+- * HashAggregate (39) - +- Exchange (38) - +- * HashAggregate (37) - +- * Project (36) - +- * Filter (35) - +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (34) - :- * Project (28) - : +- * BroadcastHashJoin Inner BuildRight (27) - : :- * Project (22) - : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Project (15) - : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : :- * Project (9) - : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.web_sales (1) - : : : : +- BroadcastExchange (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.customer (4) - : : : +- BroadcastExchange (13) - : : : +- * Filter (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.customer_address (10) - : : +- BroadcastExchange (20) - : : +- * Project (19) - : : +- * Filter (18) - : : +- * ColumnarToRow (17) - : : +- Scan parquet default.date_dim (16) - : +- BroadcastExchange (26) - : +- * Filter (25) - : +- * ColumnarToRow (24) - : +- Scan parquet default.item (23) - +- BroadcastExchange (33) - +- * Project (32) - +- * Filter (31) - +- * ColumnarToRow (30) - +- Scan parquet default.item (29) - - -(1) Scan parquet default.web_sales -Output [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 6] -Input [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] - -(3) Filter [codegen id : 6] -Input [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] -Condition : ((isnotnull(ws_bill_customer_sk#4) AND isnotnull(ws_sold_date_sk#2)) AND isnotnull(ws_item_sk#3)) - -(4) Scan parquet default.customer -Output [2]: [c_customer_sk#6, c_current_addr_sk#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [c_customer_sk#6, c_current_addr_sk#7] - -(6) Filter [codegen id : 1] -Input [2]: [c_customer_sk#6, c_current_addr_sk#7] -Condition : (isnotnull(c_customer_sk#6) AND isnotnull(c_current_addr_sk#7)) - -(7) BroadcastExchange -Input [2]: [c_customer_sk#6, c_current_addr_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] - -(8) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_bill_customer_sk#4] -Right keys [1]: [c_customer_sk#6] -Join condition: None - -(9) Project [codegen id : 6] -Output [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, c_current_addr_sk#7] -Input [6]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5, c_customer_sk#6, c_current_addr_sk#7] - -(10) Scan parquet default.customer_address -Output [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] - -(12) Filter [codegen id : 2] -Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] -Condition : isnotnull(ca_address_sk#9) - -(13) BroadcastExchange -Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] - -(14) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_current_addr_sk#7] -Right keys [1]: [ca_address_sk#9] -Join condition: None - -(15) Project [codegen id : 6] -Output [5]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11] -Input [7]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, c_current_addr_sk#7, ca_address_sk#9, ca_city#10, ca_zip#11] - -(16) Scan parquet default.date_dim -Output [3]: [d_date_sk#13, d_year#14, d_qoy#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] - -(18) Filter [codegen id : 3] -Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] -Condition : ((((isnotnull(d_qoy#15) AND isnotnull(d_year#14)) AND (d_qoy#15 = 2)) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) - -(19) Project [codegen id : 3] -Output [1]: [d_date_sk#13] -Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] - -(20) BroadcastExchange -Input [1]: [d_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(21) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#2] -Right keys [1]: [d_date_sk#13] -Join condition: None - -(22) Project [codegen id : 6] -Output [4]: [ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11] -Input [6]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11, d_date_sk#13] - -(23) Scan parquet default.item -Output [2]: [i_item_sk#17, i_item_id#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#17, i_item_id#18] - -(25) Filter [codegen id : 4] -Input [2]: [i_item_sk#17, i_item_id#18] -Condition : isnotnull(i_item_sk#17) - -(26) BroadcastExchange -Input [2]: [i_item_sk#17, i_item_id#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] - -(27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_item_sk#3] -Right keys [1]: [i_item_sk#17] -Join condition: None - -(28) Project [codegen id : 6] -Output [4]: [ws_sales_price#5, ca_city#10, ca_zip#11, i_item_id#18] -Input [6]: [ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11, i_item_sk#17, i_item_id#18] - -(29) Scan parquet default.item -Output [2]: [i_item_sk#17, i_item_id#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [In(i_item_sk, [2,3,5,7,11,13,17,19,23,29])] -ReadSchema: struct - -(30) ColumnarToRow [codegen id : 5] -Input [2]: [i_item_sk#17, i_item_id#18] - -(31) Filter [codegen id : 5] -Input [2]: [i_item_sk#17, i_item_id#18] -Condition : i_item_sk#17 IN (2,3,5,7,11,13,17,19,23,29) - -(32) Project [codegen id : 5] -Output [1]: [i_item_id#18 AS i_item_id#18#20] -Input [2]: [i_item_sk#17, i_item_id#18] - -(33) BroadcastExchange -Input [1]: [i_item_id#18#20] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#21] - -(34) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [i_item_id#18] -Right keys [1]: [i_item_id#18#20] -Join condition: None - -(35) Filter [codegen id : 6] -Input [5]: [ws_sales_price#5, ca_city#10, ca_zip#11, i_item_id#18, exists#1] -Condition : (substr(ca_zip#11, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) - -(36) Project [codegen id : 6] -Output [3]: [ws_sales_price#5, ca_city#10, ca_zip#11] -Input [5]: [ws_sales_price#5, ca_city#10, ca_zip#11, i_item_id#18, exists#1] - -(37) HashAggregate [codegen id : 6] -Input [3]: [ws_sales_price#5, ca_city#10, ca_zip#11] -Keys [2]: [ca_zip#11, ca_city#10] -Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#5))] -Aggregate Attributes [1]: [sum#22] -Results [3]: [ca_zip#11, ca_city#10, sum#23] - -(38) Exchange -Input [3]: [ca_zip#11, ca_city#10, sum#23] -Arguments: hashpartitioning(ca_zip#11, ca_city#10, 5), true, [id=#24] - -(39) HashAggregate [codegen id : 7] -Input [3]: [ca_zip#11, ca_city#10, sum#23] -Keys [2]: [ca_zip#11, ca_city#10] -Functions [1]: [sum(UnscaledValue(ws_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#5))#25] -Results [3]: [ca_zip#11, ca_city#10, MakeDecimal(sum(UnscaledValue(ws_sales_price#5))#25,17,2) AS sum(ws_sales_price)#26] - -(40) TakeOrderedAndProject -Input [3]: [ca_zip#11, ca_city#10, sum(ws_sales_price)#26] -Arguments: 100, [ca_zip#11 ASC NULLS FIRST, ca_city#10 ASC NULLS FIRST], [ca_zip#11, ca_city#10, sum(ws_sales_price)#26] - +TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST,ca_city#2 ASC NULLS FIRST], output=[ca_zip#1,ca_city#2,sum(ws_sales_price)#3]) ++- *(7) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[sum(UnscaledValue(ws_sales_price#4))]) + +- Exchange hashpartitioning(ca_zip#1, ca_city#2, 5) + +- *(6) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[partial_sum(UnscaledValue(ws_sales_price#4))]) + +- *(6) Project [ws_sales_price#4, ca_city#2, ca_zip#1] + +- *(6) Filter (substring(ca_zip#1, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) || exists#5) + +- *(6) BroadcastHashJoin [i_item_id#6], [i_item_id#6#7], ExistenceJoin(exists#5), BuildRight + :- *(6) Project [ws_sales_price#4, ca_city#2, ca_zip#1, i_item_id#6] + : +- *(6) BroadcastHashJoin [ws_item_sk#8], [i_item_sk#9], Inner, BuildRight + : :- *(6) Project [ws_item_sk#8, ws_sales_price#4, ca_city#2, ca_zip#1] + : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_sales_price#4, ca_city#2, ca_zip#1] + : : : +- *(6) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight + : : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_sales_price#4, c_current_addr_sk#12] + : : : : +- *(6) BroadcastHashJoin [ws_bill_customer_sk#14], [c_customer_sk#15], Inner, BuildRight + : : : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_bill_customer_sk#14, ws_sales_price#4] + : : : : : +- *(6) Filter ((isnotnull(ws_bill_customer_sk#14) && isnotnull(ws_sold_date_sk#10)) && isnotnull(ws_item_sk#8)) + : : : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#10,ws_item_sk#8,ws_bill_customer_sk#14,ws_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [c_customer_sk#15, c_current_addr_sk#12] + : : : : +- *(1) Filter (isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) + : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#15,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#13, ca_city#2, ca_zip#1] + : : : +- *(2) Filter isnotnull(ca_address_sk#13) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#13,ca_city#2,ca_zip#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#11] + : : +- *(3) Filter ((((isnotnull(d_qoy#16) && isnotnull(d_year#17)) && (d_qoy#16 = 2)) && (d_year#17 = 2001)) && isnotnull(d_date_sk#11)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#11,d_year#17,d_qoy#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [i_item_sk#9, i_item_id#6] + : +- *(4) Filter isnotnull(i_item_sk#9) + : +- *(4) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(5) Project [i_item_id#6 AS i_item_id#6#7] + +- *(5) Filter i_item_sk#9 IN (2,3,5,7,11,13,17,19,23,29) + +- *(5) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_item_sk, [2,3,5,7,11,13,17,19,23,29])], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt index 7cc474fb5..72b6b03ea 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt @@ -1,59 +1,51 @@ -TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] - WholeStageCodegen (7) - HashAggregate [ca_zip,ca_city,sum] [sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price),sum] +TakeOrderedAndProject [ca_city,ca_zip,sum(ws_sales_price)] + WholeStageCodegen + HashAggregate [ca_city,ca_zip,sum,sum(UnscaledValue(ws_sales_price))] [sum,sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price)] InputAdapter - Exchange [ca_zip,ca_city] #1 - WholeStageCodegen (6) - HashAggregate [ca_zip,ca_city,ws_sales_price] [sum,sum] - Project [ws_sales_price,ca_city,ca_zip] + Exchange [ca_city,ca_zip] #1 + WholeStageCodegen + HashAggregate [ca_city,ca_zip,sum,sum,ws_sales_price] [sum,sum] + Project [ca_city,ca_zip,ws_sales_price] Filter [ca_zip,exists] BroadcastHashJoin [i_item_id,i_item_id] - Project [ws_sales_price,ca_city,ca_zip,i_item_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_sales_price,ca_city,ca_zip] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_sales_price,ca_city,ca_zip] + Project [ca_city,ca_zip,i_item_id,ws_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ca_city,ca_zip,ws_item_sk,ws_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ca_city,ca_zip,ws_item_sk,ws_sales_price,ws_sold_date_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_sales_price,c_current_addr_sk] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Filter [ws_bill_customer_sk,ws_sold_date_sk,ws_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_sales_price] + Project [c_current_addr_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + WholeStageCodegen + Project [c_current_addr_sk,c_customer_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip] + WholeStageCodegen + Project [ca_address_sk,ca_city,ca_zip] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip] [ca_address_sk,ca_city,ca_zip] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk] - Filter [d_qoy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id] + WholeStageCodegen + Project [i_item_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen (5) + WholeStageCodegen Project [i_item_id] Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt index a6a3c3c46..a5d4817de 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt @@ -1,241 +1,41 @@ == Physical Plan == -TakeOrderedAndProject (43) -+- * Project (42) - +- * BroadcastHashJoin Inner BuildRight (41) - :- * Project (39) - : +- * BroadcastHashJoin Inner BuildRight (38) - : :- * HashAggregate (33) - : : +- Exchange (32) - : : +- * HashAggregate (31) - : : +- * Project (30) - : : +- * BroadcastHashJoin Inner BuildRight (29) - : : :- * Project (24) - : : : +- * BroadcastHashJoin Inner BuildRight (23) - : : : :- * Project (17) - : : : : +- * BroadcastHashJoin Inner BuildRight (16) - : : : : :- * Project (10) - : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.store_sales (1) - : : : : : +- BroadcastExchange (8) - : : : : : +- * Project (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.date_dim (4) - : : : : +- BroadcastExchange (15) - : : : : +- * Project (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.store (11) - : : : +- BroadcastExchange (22) - : : : +- * Project (21) - : : : +- * Filter (20) - : : : +- * ColumnarToRow (19) - : : : +- Scan parquet default.household_demographics (18) - : : +- BroadcastExchange (28) - : : +- * Filter (27) - : : +- * ColumnarToRow (26) - : : +- Scan parquet default.customer_address (25) - : +- BroadcastExchange (37) - : +- * Filter (36) - : +- * ColumnarToRow (35) - : +- Scan parquet default.customer (34) - +- ReusedExchange (40) - - -(1) Scan parquet default.store_sales -Output [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] - -(3) Filter [codegen id : 5] -Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] -Condition : ((((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_customer_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#9, d_year#10, d_dow#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [In(d_dow, [6,0]), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#9, d_year#10, d_dow#11] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#9, d_year#10, d_dow#11] -Condition : ((d_dow#11 IN (6,0) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#9] -Input [3]: [d_date_sk#9, d_year#10, d_dow#11] - -(8) BroadcastExchange -Input [1]: [d_date_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#9] -Join condition: None - -(10) Project [codegen id : 5] -Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] -Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, d_date_sk#9] - -(11) Scan parquet default.store -Output [2]: [s_store_sk#13, s_city#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [s_store_sk#13, s_city#14] - -(13) Filter [codegen id : 2] -Input [2]: [s_store_sk#13, s_city#14] -Condition : (s_city#14 IN (Fairview,Midway) AND isnotnull(s_store_sk#13)) - -(14) Project [codegen id : 2] -Output [1]: [s_store_sk#13] -Input [2]: [s_store_sk#13, s_city#14] - -(15) BroadcastExchange -Input [1]: [s_store_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] - -(16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_store_sk#5] -Right keys [1]: [s_store_sk#13] -Join condition: None - -(17) Project [codegen id : 5] -Output [6]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] -Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_store_sk#13] - -(18) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] - -(20) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] -Condition : (((hd_dep_count#17 = 4) OR (hd_vehicle_count#18 = 3)) AND isnotnull(hd_demo_sk#16)) - -(21) Project [codegen id : 3] -Output [1]: [hd_demo_sk#16] -Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] - -(22) BroadcastExchange -Input [1]: [hd_demo_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] - -(23) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#16] -Join condition: None - -(24) Project [codegen id : 5] -Output [5]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] -Input [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, hd_demo_sk#16] - -(25) Scan parquet default.customer_address -Output [2]: [ca_address_sk#20, ca_city#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] -ReadSchema: struct - -(26) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#20, ca_city#21] - -(27) Filter [codegen id : 4] -Input [2]: [ca_address_sk#20, ca_city#21] -Condition : (isnotnull(ca_address_sk#20) AND isnotnull(ca_city#21)) - -(28) BroadcastExchange -Input [2]: [ca_address_sk#20, ca_city#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] - -(29) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_addr_sk#4] -Right keys [1]: [ca_address_sk#20] -Join condition: None - -(30) Project [codegen id : 5] -Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_city#21] -Input [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_address_sk#20, ca_city#21] - -(31) HashAggregate [codegen id : 5] -Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_city#21] -Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21] -Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#7)), partial_sum(UnscaledValue(ss_net_profit#8))] -Aggregate Attributes [2]: [sum#23, sum#24] -Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] - -(32) Exchange -Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] -Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, 5), true, [id=#27] - -(33) HashAggregate [codegen id : 8] -Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] -Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21] -Functions [2]: [sum(UnscaledValue(ss_coupon_amt#7)), sum(UnscaledValue(ss_net_profit#8))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#7))#28, sum(UnscaledValue(ss_net_profit#8))#29] -Results [5]: [ss_ticket_number#6, ss_customer_sk#2, ca_city#21 AS bought_city#30, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#7))#28,17,2) AS amt#31, MakeDecimal(sum(UnscaledValue(ss_net_profit#8))#29,17,2) AS profit#32] - -(34) Scan parquet default.customer -Output [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(35) ColumnarToRow [codegen id : 6] -Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] - -(36) Filter [codegen id : 6] -Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] -Condition : (isnotnull(c_customer_sk#33) AND isnotnull(c_current_addr_sk#34)) - -(37) BroadcastExchange -Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] - -(38) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#33] -Join condition: None - -(39) Project [codegen id : 8] -Output [7]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36] -Input [9]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#30, amt#31, profit#32, c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] - -(40) ReusedExchange [Reuses operator id: 28] -Output [2]: [ca_address_sk#20, ca_city#21] - -(41) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [c_current_addr_sk#34] -Right keys [1]: [ca_address_sk#20] -Join condition: NOT (ca_city#21 = bought_city#30) - -(42) Project [codegen id : 8] -Output [7]: [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] -Input [9]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36, ca_address_sk#20, ca_city#21] - -(43) TakeOrderedAndProject -Input [7]: [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] -Arguments: 100, [c_last_name#36 ASC NULLS FIRST, c_first_name#35 ASC NULLS FIRST, ca_city#21 ASC NULLS FIRST, bought_city#30 ASC NULLS FIRST, ss_ticket_number#6 ASC NULLS FIRST], [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] - +TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,ca_city#3 ASC NULLS FIRST,bought_city#4 ASC NULLS FIRST,ss_ticket_number#5 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,ca_city#3,bought_city#4,ss_ticket_number#5,amt#6,profit#7]) ++- *(8) Project [c_last_name#1, c_first_name#2, ca_city#3, bought_city#4, ss_ticket_number#5, amt#6, profit#7] + +- *(8) BroadcastHashJoin [c_current_addr_sk#8], [ca_address_sk#9], Inner, BuildRight, NOT (ca_city#3 = bought_city#4) + :- *(8) Project [ss_ticket_number#5, bought_city#4, amt#6, profit#7, c_current_addr_sk#8, c_first_name#2, c_last_name#1] + : +- *(8) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight + : :- *(8) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#13)), sum(UnscaledValue(ss_net_profit#14))]) + : : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3, 5) + : : +- *(5) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#13)), partial_sum(UnscaledValue(ss_net_profit#14))]) + : : +- *(5) Project [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14, ca_city#3] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#12], [ca_address_sk#9], Inner, BuildRight + : : :- *(5) Project [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : +- *(5) BroadcastHashJoin [ss_hdemo_sk#15], [hd_demo_sk#16], Inner, BuildRight + : : : :- *(5) Project [ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : : +- *(5) BroadcastHashJoin [ss_store_sk#17], [s_store_sk#18], Inner, BuildRight + : : : : :- *(5) Project [ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_store_sk#17, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight + : : : : : :- *(5) Project [ss_sold_date_sk#19, ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_store_sk#17, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] + : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#19) && isnotnull(ss_store_sk#17)) && isnotnull(ss_hdemo_sk#15)) && isnotnull(ss_addr_sk#12)) && isnotnull(ss_customer_sk#10)) + : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#10,ss_hdemo_sk#15,ss_addr_sk#12,ss_store_sk#17,ss_ticket_number#5,ss_coupon_amt#13,ss_net_profit#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#18] + : : : : +- *(2) Filter (s_city#23 IN (Fairview,Midway) && isnotnull(s_store_sk#18)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#18,s_city#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [hd_demo_sk#16] + : : : +- *(3) Filter (((hd_dep_count#24 = 4) || (hd_vehicle_count#25 = 3)) && isnotnull(hd_demo_sk#16)) + : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#16,hd_dep_count#24,hd_vehicle_count#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#9, ca_city#3] + : : +- *(4) Filter (isnotnull(ca_address_sk#9) && isnotnull(ca_city#3)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#9,ca_city#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [c_customer_sk#11, c_current_addr_sk#8, c_first_name#2, c_last_name#1] + : +- *(6) Filter (isnotnull(c_customer_sk#11) && isnotnull(c_current_addr_sk#8)) + : +- *(6) FileScan parquet default.customer[c_customer_sk#11,c_current_addr_sk#8,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + +- ReusedExchange [ca_address_sk#9, ca_city#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt index abdc7a3ba..213915231 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt @@ -1,63 +1,54 @@ -TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] - WholeStageCodegen (8) - Project [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] - Project [ss_ticket_number,bought_city,amt,profit,c_current_addr_sk,c_first_name,c_last_name] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum] [sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),bought_city,amt,profit,sum,sum] +TakeOrderedAndProject [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] + WholeStageCodegen + Project [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] + BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] + Project [amt,bought_city,c_current_addr_sk,c_first_name,c_last_name,profit,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] [amt,bought_city,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] InputAdapter - Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 - WholeStageCodegen (5) - HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,ss_coupon_amt,ss_net_profit] [sum,sum,sum,sum] - Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ca_city] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 + WholeStageCodegen + HashAggregate [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_dow,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_dow] + Filter [d_date_sk,d_dow,d_year] + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] [d_date_sk,d_dow,d_year] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [s_store_sk] Filter [s_city,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_city] + Scan parquet default.store [s_city,s_store_sk] [s_city,s_store_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) - Filter [ca_address_sk,ca_city] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_city] + WholeStageCodegen + Project [ca_address_sk,ca_city] + Filter [ca_address_sk,ca_city] + Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] InputAdapter BroadcastExchange #6 - WholeStageCodegen (6) - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + WholeStageCodegen + Project [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] InputAdapter - ReusedExchange [ca_address_sk,ca_city] #5 + ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt index a84ba3a03..e7dcf37e4 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt @@ -1,278 +1,51 @@ == Physical Plan == -TakeOrderedAndProject (51) -+- * Project (50) - +- * BroadcastHashJoin Inner BuildRight (49) - :- * Project (42) - : +- * BroadcastHashJoin Inner BuildRight (41) - : :- * Filter (32) - : : +- Window (31) - : : +- * Sort (30) - : : +- Exchange (29) - : : +- * Project (28) - : : +- Window (27) - : : +- * Sort (26) - : : +- Exchange (25) - : : +- * HashAggregate (24) - : : +- Exchange (23) - : : +- * HashAggregate (22) - : : +- * Project (21) - : : +- * BroadcastHashJoin Inner BuildRight (20) - : : :- * Project (15) - : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : :- * Project (9) - : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.item (1) - : : : : +- BroadcastExchange (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.store_sales (4) - : : : +- BroadcastExchange (13) - : : : +- * Filter (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.date_dim (10) - : : +- BroadcastExchange (19) - : : +- * Filter (18) - : : +- * ColumnarToRow (17) - : : +- Scan parquet default.store (16) - : +- BroadcastExchange (40) - : +- * Project (39) - : +- * Filter (38) - : +- Window (37) - : +- * Sort (36) - : +- Exchange (35) - : +- * HashAggregate (34) - : +- ReusedExchange (33) - +- BroadcastExchange (48) - +- * Project (47) - +- * Filter (46) - +- Window (45) - +- * Sort (44) - +- ReusedExchange (43) - - -(1) Scan parquet default.item -Output [3]: [i_item_sk#1, i_brand#2, i_category#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [3]: [i_item_sk#1, i_brand#2, i_category#3] - -(3) Filter [codegen id : 4] -Input [3]: [i_item_sk#1, i_brand#2, i_category#3] -Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) - -(4) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] - -(6) Filter [codegen id : 1] -Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] -Condition : ((isnotnull(ss_item_sk#5) AND isnotnull(ss_sold_date_sk#4)) AND isnotnull(ss_store_sk#6)) - -(7) BroadcastExchange -Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#8] - -(8) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#5] -Join condition: None - -(9) Project [codegen id : 4] -Output [5]: [i_brand#2, i_category#3, ss_sold_date_sk#4, ss_store_sk#6, ss_sales_price#7] -Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] - -(10) Scan parquet default.date_dim -Output [3]: [d_date_sk#9, d_year#10, d_moy#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] - -(12) Filter [codegen id : 2] -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) - -(13) BroadcastExchange -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] - -(14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#4] -Right keys [1]: [d_date_sk#9] -Join condition: None - -(15) Project [codegen id : 4] -Output [6]: [i_brand#2, i_category#3, ss_store_sk#6, ss_sales_price#7, d_year#10, d_moy#11] -Input [8]: [i_brand#2, i_category#3, ss_sold_date_sk#4, ss_store_sk#6, ss_sales_price#7, d_date_sk#9, d_year#10, d_moy#11] - -(16) Scan parquet default.store -Output [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] - -(18) Filter [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Condition : ((isnotnull(s_store_sk#13) AND isnotnull(s_store_name#14)) AND isnotnull(s_company_name#15)) - -(19) BroadcastExchange -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] - -(20) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#6] -Right keys [1]: [s_store_sk#13] -Join condition: None - -(21) Project [codegen id : 4] -Output [7]: [i_brand#2, i_category#3, ss_sales_price#7, d_year#10, d_moy#11, s_store_name#14, s_company_name#15] -Input [9]: [i_brand#2, i_category#3, ss_store_sk#6, ss_sales_price#7, d_year#10, d_moy#11, s_store_sk#13, s_store_name#14, s_company_name#15] - -(22) HashAggregate [codegen id : 4] -Input [7]: [i_brand#2, i_category#3, ss_sales_price#7, d_year#10, d_moy#11, s_store_name#14, s_company_name#15] -Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [1]: [sum#17] -Results [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] - -(23) Exchange -Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] -Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, 5), true, [id=#19] - -(24) HashAggregate [codegen id : 5] -Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] -Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11] -Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#20] -Results [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS _w0#22] - -(25) Exchange -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] -Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, 5), true, [id=#23] - -(26) Sort [codegen id : 6] -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] -Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], false, 0 - -(27) Window -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] -Arguments: [avg(_w0#22) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10] - -(28) Project [codegen id : 7] -Output [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] -Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22, avg_monthly_sales#24] - -(29) Exchange -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] -Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, 5), true, [id=#25] - -(30) Sort [codegen id : 8] -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] -Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 - -(31) Window -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] -Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] - -(32) Filter [codegen id : 23] -Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26] -Condition : (((((isnotnull(d_year#10) AND isnotnull(avg_monthly_sales#24)) AND (d_year#10 = 1999)) AND (avg_monthly_sales#24 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#24 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#26)) - -(33) ReusedExchange [Reuses operator id: 23] -Output [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33] - -(34) HashAggregate [codegen id : 13] -Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33] -Keys [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32] -Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#34] -Results [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#34,17,2) AS sum_sales#35] - -(35) Exchange -Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] -Arguments: hashpartitioning(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, 5), true, [id=#36] - -(36) Sort [codegen id : 14] -Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] -Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, s_store_name#29 ASC NULLS FIRST, s_company_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST], false, 0 - -(37) Window -Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] -Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#27, i_brand#28, s_store_name#29, s_company_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] - -(38) Filter [codegen id : 15] -Input [8]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] -Condition : isnotnull(rn#37) - -(39) Project [codegen id : 15] -Output [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] -Input [8]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] - -(40) BroadcastExchange -Input [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1)),false), [id=#38] - -(41) BroadcastHashJoin [codegen id : 23] -Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#26] -Right keys [5]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, (rn#37 + 1)] -Join condition: None - -(42) Project [codegen id : 23] -Output [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35] -Input [15]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] - -(43) ReusedExchange [Reuses operator id: 35] -Output [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] - -(44) Sort [codegen id : 21] -Input [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] -Arguments: [i_category#39 ASC NULLS FIRST, i_brand#40 ASC NULLS FIRST, s_store_name#41 ASC NULLS FIRST, s_company_name#42 ASC NULLS FIRST, d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST], false, 0 - -(45) Window -Input [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] -Arguments: [rank(d_year#43, d_moy#44) windowspecdefinition(i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#46], [i_category#39, i_brand#40, s_store_name#41, s_company_name#42], [d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST] - -(46) Filter [codegen id : 22] -Input [8]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45, rn#46] -Condition : isnotnull(rn#46) - -(47) Project [codegen id : 22] -Output [6]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] -Input [8]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45, rn#46] - -(48) BroadcastExchange -Input [6]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1)),false), [id=#47] - -(49) BroadcastHashJoin [codegen id : 23] -Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#26] -Right keys [5]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, (rn#46 - 1)] -Join condition: None - -(50) Project [codegen id : 23] -Output [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, sum_sales#35 AS psum#48, sum_sales#45 AS nsum#49] -Input [16]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35, i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] - -(51) TakeOrderedAndProject -Input [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, psum#48, nsum#49] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, psum#48, nsum#49] - +TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,s_store_name#3,s_company_name#6,d_year#7,d_moy#8,avg_monthly_sales#2,sum_sales#1,psum#9,nsum#10]) ++- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] + +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight + :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] + : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight + : :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13] + : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) + : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] + : : +- *(7) Filter (isnotnull(d_year#7) && (d_year#7 = 1999)) + : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] + : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 5) + : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) + : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) + : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] + : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight + : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight + : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight + : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] + : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] + : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] + : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) + : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1))) + : +- *(14) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] + : +- *(14) Filter isnotnull(rn#23) + : +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#23], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] + : +- *(13) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) + : +- *(12) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) + : +- ReusedExchange [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33, sum#34], Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1))) + +- *(21) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] + +- *(21) Filter isnotnull(rn#18) + +- Window [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] + +- *(20) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 + +- ReusedExchange [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35, d_moy#36, sum_sales#12], Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt index 66ccfa4a9..f0ed21cce 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt @@ -1,84 +1,77 @@ -TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_brand,s_company_name,d_year,d_moy,psum,nsum] - WholeStageCodegen (23) - Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] - BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] - Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] - BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] - Filter [d_year,avg_monthly_sales,sum_sales,rn] - InputAdapter - Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] - WholeStageCodegen (8) - Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] - InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name] #1 - WholeStageCodegen (7) - Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales] - InputAdapter - Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year] - WholeStageCodegen (6) - Sort [i_category,i_brand,s_store_name,s_company_name,d_year] - InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name,d_year] #2 - WholeStageCodegen (5) - HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] - InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #3 - WholeStageCodegen (4) - HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,ss_sales_price] [sum,sum] - Project [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_brand,i_category,ss_sold_date_sk,ss_store_sk,ss_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Filter [i_item_sk,i_category,i_brand] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand,i_category] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (1) - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (2) - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (3) - Filter [s_store_sk,s_store_name,s_company_name] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name,s_company_name] +TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,psum,s_company_name,s_store_name,sum_sales] + WholeStageCodegen + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales,sum_sales] + BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] + Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + Filter [avg_monthly_sales,rn,sum_sales] + InputAdapter + Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen + Filter [d_year] + InputAdapter + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + InputAdapter + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 + WholeStageCodegen + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + InputAdapter + Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #2 + WholeStageCodegen + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] + Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_year,i_brand,i_category,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_category,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [i_brand,i_category,i_item_sk] + Filter [i_brand,i_category,i_item_sk] + Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_moy,d_year] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [s_company_name,s_store_name,s_store_sk] + Filter [s_company_name,s_store_name,s_store_sk] + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (15) - Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + BroadcastExchange #6 + WholeStageCodegen + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] Filter [rn] InputAdapter - Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] - WholeStageCodegen (14) - Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name] #8 - WholeStageCodegen (13) - HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] + Exchange [i_brand,i_category,s_company_name,s_store_name] #7 + WholeStageCodegen + HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [sum,sum(UnscaledValue(ss_sales_price)),sum_sales] InputAdapter - ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #3 + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #2 InputAdapter - BroadcastExchange #9 - WholeStageCodegen (22) - Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] + BroadcastExchange #8 + WholeStageCodegen + Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] Filter [rn] InputAdapter - Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] - WholeStageCodegen (21) - Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen + Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] InputAdapter - ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #8 + ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #7 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt index f317bcdf1..735a739a8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt @@ -1,183 +1,31 @@ == Physical Plan == -* HashAggregate (32) -+- Exchange (31) - +- * HashAggregate (30) - +- * Project (29) - +- * BroadcastHashJoin Inner BuildRight (28) - :- * Project (22) - : +- * BroadcastHashJoin Inner BuildRight (21) - : :- * Project (15) - : : +- * BroadcastHashJoin Inner BuildRight (14) - : : :- * Project (9) - : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.store (4) - : : +- BroadcastExchange (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.customer_demographics (10) - : +- BroadcastExchange (20) - : +- * Project (19) - : +- * Filter (18) - : +- * ColumnarToRow (17) - : +- Scan parquet default.customer_address (16) - +- BroadcastExchange (27) - +- * Project (26) - +- * Filter (25) - +- * ColumnarToRow (24) - +- Scan parquet default.date_dim (23) - - -(1) Scan parquet default.store_sales -Output [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ss_net_profit,0.00),LessThanOrEqual(ss_net_profit,2000.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,3000.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,25000.00)))] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] - -(3) Filter [codegen id : 5] -Input [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] -Condition : (((((isnotnull(ss_store_sk#4) AND isnotnull(ss_cdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_sold_date_sk#1)) AND ((((ss_sales_price#6 >= 100.00) AND (ss_sales_price#6 <= 150.00)) OR ((ss_sales_price#6 >= 50.00) AND (ss_sales_price#6 <= 100.00))) OR ((ss_sales_price#6 >= 150.00) AND (ss_sales_price#6 <= 200.00)))) AND ((((ss_net_profit#7 >= 0.00) AND (ss_net_profit#7 <= 2000.00)) OR ((ss_net_profit#7 >= 150.00) AND (ss_net_profit#7 <= 3000.00))) OR ((ss_net_profit#7 >= 50.00) AND (ss_net_profit#7 <= 25000.00)))) - -(4) Scan parquet default.store -Output [1]: [s_store_sk#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [1]: [s_store_sk#8] - -(6) Filter [codegen id : 1] -Input [1]: [s_store_sk#8] -Condition : isnotnull(s_store_sk#8) - -(7) BroadcastExchange -Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] - -(8) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#8] -Join condition: None - -(9) Project [codegen id : 5] -Output [6]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] -Input [8]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7, s_store_sk#8] - -(10) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,4 yr Degree)),And(EqualTo(cd_marital_status,D),EqualTo(cd_education_status,2 yr Degree))),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College)))] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] - -(12) Filter [codegen id : 2] -Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] -Condition : (isnotnull(cd_demo_sk#10) AND ((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree)) OR ((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree))) OR ((cd_marital_status#11 = S) AND (cd_education_status#12 = College)))) - -(13) BroadcastExchange -Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] - -(14) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#10] -Join condition: ((((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree)) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree)) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#11 = S) AND (cd_education_status#12 = College)) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00))) - -(15) Project [codegen id : 5] -Output [4]: [ss_sold_date_sk#1, ss_addr_sk#3, ss_quantity#5, ss_net_profit#7] -Input [9]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_net_profit#7, cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] - -(16) Scan parquet default.customer_address -Output [3]: [ca_address_sk#14, ca_state#15, ca_country#16] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [CO,OH,TX]),In(ca_state, [OR,MN,KY])),In(ca_state, [VA,CA,MS]))] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] - -(18) Filter [codegen id : 3] -Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] -Condition : (((isnotnull(ca_country#16) AND (ca_country#16 = United States)) AND isnotnull(ca_address_sk#14)) AND ((ca_state#15 IN (CO,OH,TX) OR ca_state#15 IN (OR,MN,KY)) OR ca_state#15 IN (VA,CA,MS))) - -(19) Project [codegen id : 3] -Output [2]: [ca_address_sk#14, ca_state#15] -Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] - -(20) BroadcastExchange -Input [2]: [ca_address_sk#14, ca_state#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] - -(21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#14] -Join condition: ((((ca_state#15 IN (CO,OH,TX) AND (ss_net_profit#7 >= 0.00)) AND (ss_net_profit#7 <= 2000.00)) OR ((ca_state#15 IN (OR,MN,KY) AND (ss_net_profit#7 >= 150.00)) AND (ss_net_profit#7 <= 3000.00))) OR ((ca_state#15 IN (VA,CA,MS) AND (ss_net_profit#7 >= 50.00)) AND (ss_net_profit#7 <= 25000.00))) - -(22) Project [codegen id : 5] -Output [2]: [ss_sold_date_sk#1, ss_quantity#5] -Input [6]: [ss_sold_date_sk#1, ss_addr_sk#3, ss_quantity#5, ss_net_profit#7, ca_address_sk#14, ca_state#15] - -(23) Scan parquet default.date_dim -Output [2]: [d_date_sk#18, d_year#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [2]: [d_date_sk#18, d_year#19] - -(25) Filter [codegen id : 4] -Input [2]: [d_date_sk#18, d_year#19] -Condition : ((isnotnull(d_year#19) AND (d_year#19 = 2001)) AND isnotnull(d_date_sk#18)) - -(26) Project [codegen id : 4] -Output [1]: [d_date_sk#18] -Input [2]: [d_date_sk#18, d_year#19] - -(27) BroadcastExchange -Input [1]: [d_date_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] - -(28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#18] -Join condition: None - -(29) Project [codegen id : 5] -Output [1]: [ss_quantity#5] -Input [3]: [ss_sold_date_sk#1, ss_quantity#5, d_date_sk#18] - -(30) HashAggregate [codegen id : 5] -Input [1]: [ss_quantity#5] -Keys: [] -Functions [1]: [partial_sum(cast(ss_quantity#5 as bigint))] -Aggregate Attributes [1]: [sum#21] -Results [1]: [sum#22] - -(31) Exchange -Input [1]: [sum#22] -Arguments: SinglePartition, true, [id=#23] - -(32) HashAggregate [codegen id : 6] -Input [1]: [sum#22] -Keys: [] -Functions [1]: [sum(cast(ss_quantity#5 as bigint))] -Aggregate Attributes [1]: [sum(cast(ss_quantity#5 as bigint))#24] -Results [1]: [sum(cast(ss_quantity#5 as bigint))#24 AS sum(ss_quantity)#25] - +*(6) HashAggregate(keys=[], functions=[sum(cast(ss_quantity#1 as bigint))]) ++- Exchange SinglePartition + +- *(5) HashAggregate(keys=[], functions=[partial_sum(cast(ss_quantity#1 as bigint))]) + +- *(5) Project [ss_quantity#1] + +- *(5) BroadcastHashJoin [ss_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight + :- *(5) Project [ss_sold_date_sk#2, ss_quantity#1] + : +- *(5) BroadcastHashJoin [ss_addr_sk#4], [ca_address_sk#5], Inner, BuildRight, ((((ca_state#6 IN (CO,OH,TX) && (ss_net_profit#7 >= 0.00)) && (ss_net_profit#7 <= 2000.00)) || ((ca_state#6 IN (OR,MN,KY) && (ss_net_profit#7 >= 150.00)) && (ss_net_profit#7 <= 3000.00))) || ((ca_state#6 IN (VA,CA,MS) && (ss_net_profit#7 >= 50.00)) && (ss_net_profit#7 <= 25000.00))) + : :- *(5) Project [ss_sold_date_sk#2, ss_addr_sk#4, ss_quantity#1, ss_net_profit#7] + : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#8], [cd_demo_sk#9], Inner, BuildRight, ((((((cd_marital_status#10 = M) && (cd_education_status#11 = 4 yr Degree)) && (ss_sales_price#12 >= 100.00)) && (ss_sales_price#12 <= 150.00)) || ((((cd_marital_status#10 = D) && (cd_education_status#11 = 2 yr Degree)) && (ss_sales_price#12 >= 50.00)) && (ss_sales_price#12 <= 100.00))) || ((((cd_marital_status#10 = S) && (cd_education_status#11 = College)) && (ss_sales_price#12 >= 150.00)) && (ss_sales_price#12 <= 200.00))) + : : :- *(5) Project [ss_sold_date_sk#2, ss_cdemo_sk#8, ss_addr_sk#4, ss_quantity#1, ss_sales_price#12, ss_net_profit#7] + : : : +- *(5) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#2, ss_cdemo_sk#8, ss_addr_sk#4, ss_store_sk#13, ss_quantity#1, ss_sales_price#12, ss_net_profit#7] + : : : : +- *(5) Filter (((isnotnull(ss_store_sk#13) && isnotnull(ss_cdemo_sk#8)) && isnotnull(ss_addr_sk#4)) && isnotnull(ss_sold_date_sk#2)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#2,ss_cdemo_sk#8,ss_addr_sk#4,ss_store_sk#13,ss_quantity#1,ss_sales_price#12,ss_net_profit#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] + : : +- *(2) Filter isnotnull(cd_demo_sk#9) + : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#9,cd_marital_status#10,cd_education_status#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [ca_address_sk#5, ca_state#6] + : +- *(3) Filter ((isnotnull(ca_country#15) && (ca_country#15 = United States)) && isnotnull(ca_address_sk#5)) + : +- *(3) FileScan parquet default.customer_address[ca_address_sk#5,ca_state#6,ca_country#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#3] + +- *(4) Filter ((isnotnull(d_year#16) && (d_year#16 = 2001)) && isnotnull(d_date_sk#3)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#3,d_year#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt index 710fbdd72..b4f78ab5c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt @@ -1,48 +1,41 @@ -WholeStageCodegen (6) - HashAggregate [sum] [sum(cast(ss_quantity as bigint)),sum(ss_quantity),sum] +WholeStageCodegen + HashAggregate [sum,sum(cast(ss_quantity as bigint))] [sum,sum(cast(ss_quantity as bigint)),sum(ss_quantity)] InputAdapter Exchange #1 - WholeStageCodegen (5) - HashAggregate [ss_quantity] [sum,sum] + WholeStageCodegen + HashAggregate [ss_quantity,sum,sum] [sum,sum] Project [ss_quantity] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity] - BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] - Project [ss_sold_date_sk,ss_addr_sk,ss_quantity,ss_net_profit] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price] - Project [ss_sold_date_sk,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Filter [ss_store_sk,ss_cdemo_sk,ss_addr_sk,ss_sold_date_sk,ss_sales_price,ss_net_profit] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] + Project [ss_addr_sk,ss_net_profit,ss_quantity,ss_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,ss_cdemo_sk,ss_sales_price] + Project [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_addr_sk,ss_cdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk] + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [cd_demo_sk,cd_marital_status,cd_education_status] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + WholeStageCodegen + Project [cd_demo_sk,cd_education_status,cd_marital_status] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [ca_address_sk,ca_state] - Filter [ca_country,ca_address_sk,ca_state] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] + Filter [ca_address_sk,ca_country] + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt index 15f0cda0b..0bad61dd2 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt @@ -1,435 +1,76 @@ == Physical Plan == -TakeOrderedAndProject (78) -+- * HashAggregate (77) - +- Exchange (76) - +- * HashAggregate (75) - +- * Expand (74) - +- Union (73) - :- * HashAggregate (25) - : +- Exchange (24) - : +- * HashAggregate (23) - : +- * Project (22) - : +- * BroadcastHashJoin Inner BuildRight (21) - : :- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) - : : :- Union (9) - : : : :- * Project (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- * Project (8) - : : : +- * Filter (7) - : : : +- * ColumnarToRow (6) - : : : +- Scan parquet default.store_returns (5) - : : +- BroadcastExchange (14) - : : +- * Project (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.date_dim (10) - : +- BroadcastExchange (20) - : +- * Filter (19) - : +- * ColumnarToRow (18) - : +- Scan parquet default.store (17) - :- * HashAggregate (46) - : +- Exchange (45) - : +- * HashAggregate (44) - : +- * Project (43) - : +- * BroadcastHashJoin Inner BuildRight (42) - : :- * Project (37) - : : +- * BroadcastHashJoin Inner BuildRight (36) - : : :- Union (34) - : : : :- * Project (29) - : : : : +- * Filter (28) - : : : : +- * ColumnarToRow (27) - : : : : +- Scan parquet default.catalog_sales (26) - : : : +- * Project (33) - : : : +- * Filter (32) - : : : +- * ColumnarToRow (31) - : : : +- Scan parquet default.catalog_returns (30) - : : +- ReusedExchange (35) - : +- BroadcastExchange (41) - : +- * Filter (40) - : +- * ColumnarToRow (39) - : +- Scan parquet default.catalog_page (38) - +- * HashAggregate (72) - +- Exchange (71) - +- * HashAggregate (70) - +- * Project (69) - +- * BroadcastHashJoin Inner BuildRight (68) - :- * Project (63) - : +- * BroadcastHashJoin Inner BuildRight (62) - : :- Union (60) - : : :- * Project (50) - : : : +- * Filter (49) - : : : +- * ColumnarToRow (48) - : : : +- Scan parquet default.web_sales (47) - : : +- * Project (59) - : : +- * BroadcastHashJoin Inner BuildRight (58) - : : :- * Filter (53) - : : : +- * ColumnarToRow (52) - : : : +- Scan parquet default.web_returns (51) - : : +- BroadcastExchange (57) - : : +- * Filter (56) - : : +- * ColumnarToRow (55) - : : +- Scan parquet default.web_sales (54) - : +- ReusedExchange (61) - +- BroadcastExchange (67) - +- * Filter (66) - +- * ColumnarToRow (65) - +- Scan parquet default.web_site (64) - - -(1) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 1] -Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] - -(3) Filter [codegen id : 1] -Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] -Condition : (isnotnull(cast(ss_sold_date_sk#1 as bigint)) AND isnotnull(cast(ss_store_sk#2 as bigint))) - -(4) Project [codegen id : 1] -Output [6]: [cast(ss_store_sk#2 as bigint) AS store_sk#5, cast(ss_sold_date_sk#1 as bigint) AS date_sk#6, ss_ext_sales_price#3 AS sales_price#7, ss_net_profit#4 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] -Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] - -(5) Scan parquet default.store_returns -Output [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 2] -Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] - -(7) Filter [codegen id : 2] -Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] -Condition : (isnotnull(sr_returned_date_sk#11) AND isnotnull(sr_store_sk#12)) - -(8) Project [codegen id : 2] -Output [6]: [sr_store_sk#12 AS store_sk#15, sr_returned_date_sk#11 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#13 AS return_amt#19, sr_net_loss#14 AS net_loss#20] -Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] - -(9) Union - -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#21, d_date#22] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#21, d_date#22] - -(12) Filter [codegen id : 3] -Input [2]: [d_date_sk#21, d_date#22] -Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 11192)) AND (d_date#22 <= 11206)) AND isnotnull(d_date_sk#21)) - -(13) Project [codegen id : 3] -Output [1]: [d_date_sk#21] -Input [2]: [d_date_sk#21, d_date#22] - -(14) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] - -(15) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [date_sk#6] -Right keys [1]: [cast(d_date_sk#21 as bigint)] -Join condition: None - -(16) Project [codegen id : 5] -Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] -Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] - -(17) Scan parquet default.store -Output [2]: [s_store_sk#24, s_store_id#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 4] -Input [2]: [s_store_sk#24, s_store_id#25] - -(19) Filter [codegen id : 4] -Input [2]: [s_store_sk#24, s_store_id#25] -Condition : isnotnull(s_store_sk#24) - -(20) BroadcastExchange -Input [2]: [s_store_sk#24, s_store_id#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] - -(21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [store_sk#5] -Right keys [1]: [cast(s_store_sk#24 as bigint)] -Join condition: None - -(22) Project [codegen id : 5] -Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] -Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25] - -(23) HashAggregate [codegen id : 5] -Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] -Keys [1]: [s_store_id#25] -Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] -Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30] -Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] - -(24) Exchange -Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35] - -(25) HashAggregate [codegen id : 6] -Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] -Keys [1]: [s_store_id#25] -Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#25) AS id#44] - -(26) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)] -ReadSchema: struct - -(27) ColumnarToRow [codegen id : 7] -Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] - -(28) Filter [codegen id : 7] -Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] -Condition : (isnotnull(cs_sold_date_sk#45) AND isnotnull(cs_catalog_page_sk#46)) - -(29) Project [codegen id : 7] -Output [6]: [cs_catalog_page_sk#46 AS page_sk#49, cs_sold_date_sk#45 AS date_sk#50, cs_ext_sales_price#47 AS sales_price#51, cs_net_profit#48 AS profit#52, 0.00 AS return_amt#53, 0.00 AS net_loss#54] -Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] - -(30) Scan parquet default.catalog_returns -Output [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_catalog_page_sk)] -ReadSchema: struct - -(31) ColumnarToRow [codegen id : 8] -Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] - -(32) Filter [codegen id : 8] -Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] -Condition : (isnotnull(cr_returned_date_sk#55) AND isnotnull(cr_catalog_page_sk#56)) - -(33) Project [codegen id : 8] -Output [6]: [cr_catalog_page_sk#56 AS page_sk#59, cr_returned_date_sk#55 AS date_sk#60, 0.00 AS sales_price#61, 0.00 AS profit#62, cr_return_amount#57 AS return_amt#63, cr_net_loss#58 AS net_loss#64] -Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] - -(34) Union - -(35) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#21] - -(36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [date_sk#50] -Right keys [1]: [d_date_sk#21] -Join condition: None - -(37) Project [codegen id : 11] -Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54] -Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21] - -(38) Scan parquet default.catalog_page -Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_page] -PushedFilters: [IsNotNull(cp_catalog_page_sk)] -ReadSchema: struct - -(39) ColumnarToRow [codegen id : 10] -Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] - -(40) Filter [codegen id : 10] -Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] -Condition : isnotnull(cp_catalog_page_sk#65) - -(41) BroadcastExchange -Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] - -(42) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [page_sk#49] -Right keys [1]: [cp_catalog_page_sk#65] -Join condition: None - -(43) Project [codegen id : 11] -Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] -Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] - -(44) HashAggregate [codegen id : 11] -Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] -Keys [1]: [cp_catalog_page_id#66] -Functions [4]: [partial_sum(UnscaledValue(sales_price#51)), partial_sum(UnscaledValue(return_amt#53)), partial_sum(UnscaledValue(profit#52)), partial_sum(UnscaledValue(net_loss#54))] -Aggregate Attributes [4]: [sum#68, sum#69, sum#70, sum#71] -Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] - -(45) Exchange -Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] -Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76] - -(46) HashAggregate [codegen id : 12] -Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] -Keys [1]: [cp_catalog_page_id#66] -Functions [4]: [sum(UnscaledValue(sales_price#51)), sum(UnscaledValue(return_amt#53)), sum(UnscaledValue(profit#52)), sum(UnscaledValue(net_loss#54))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#51))#77, sum(UnscaledValue(return_amt#53))#78, sum(UnscaledValue(profit#52))#79, sum(UnscaledValue(net_loss#54))#80] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS RETURNS#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85] - -(47) Scan parquet default.web_sales -Output [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_web_site_sk)] -ReadSchema: struct - -(48) ColumnarToRow [codegen id : 13] -Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] - -(49) Filter [codegen id : 13] -Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] -Condition : (isnotnull(cast(ws_sold_date_sk#86 as bigint)) AND isnotnull(ws_web_site_sk#87)) - -(50) Project [codegen id : 13] -Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#90, cast(ws_sold_date_sk#86 as bigint) AS date_sk#91, ws_ext_sales_price#88 AS sales_price#92, ws_net_profit#89 AS profit#93, 0.00 AS return_amt#94, 0.00 AS net_loss#95] -Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] - -(51) Scan parquet default.web_returns -Output [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -PushedFilters: [IsNotNull(wr_returned_date_sk)] -ReadSchema: struct - -(52) ColumnarToRow [codegen id : 15] -Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] - -(53) Filter [codegen id : 15] -Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] -Condition : isnotnull(wr_returned_date_sk#96) - -(54) Scan parquet default.web_sales -Output [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] -ReadSchema: struct - -(55) ColumnarToRow [codegen id : 14] -Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] - -(56) Filter [codegen id : 14] -Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] -Condition : ((isnotnull(ws_item_sk#101) AND isnotnull(ws_order_number#102)) AND isnotnull(ws_web_site_sk#87)) - -(57) BroadcastExchange -Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#103] - -(58) BroadcastHashJoin [codegen id : 15] -Left keys [2]: [wr_item_sk#97, wr_order_number#98] -Right keys [2]: [cast(ws_item_sk#101 as bigint), cast(ws_order_number#102 as bigint)] -Join condition: None - -(59) Project [codegen id : 15] -Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#104, wr_returned_date_sk#96 AS date_sk#105, 0.00 AS sales_price#106, 0.00 AS profit#107, wr_return_amt#99 AS return_amt#108, wr_net_loss#100 AS net_loss#109] -Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100, ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] - -(60) Union - -(61) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#21] - -(62) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [date_sk#91] -Right keys [1]: [cast(d_date_sk#21 as bigint)] -Join condition: None - -(63) Project [codegen id : 18] -Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] -Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21] - -(64) Scan parquet default.web_site -Output [2]: [web_site_sk#110, web_site_id#111] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_site] -PushedFilters: [IsNotNull(web_site_sk)] -ReadSchema: struct - -(65) ColumnarToRow [codegen id : 17] -Input [2]: [web_site_sk#110, web_site_id#111] - -(66) Filter [codegen id : 17] -Input [2]: [web_site_sk#110, web_site_id#111] -Condition : isnotnull(web_site_sk#110) - -(67) BroadcastExchange -Input [2]: [web_site_sk#110, web_site_id#111] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#112] - -(68) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [wsr_web_site_sk#90] -Right keys [1]: [web_site_sk#110] -Join condition: None - -(69) Project [codegen id : 18] -Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#111] -Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#110, web_site_id#111] - -(70) HashAggregate [codegen id : 18] -Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#111] -Keys [1]: [web_site_id#111] -Functions [4]: [partial_sum(UnscaledValue(sales_price#92)), partial_sum(UnscaledValue(return_amt#94)), partial_sum(UnscaledValue(profit#93)), partial_sum(UnscaledValue(net_loss#95))] -Aggregate Attributes [4]: [sum#113, sum#114, sum#115, sum#116] -Results [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] - -(71) Exchange -Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] -Arguments: hashpartitioning(web_site_id#111, 5), true, [id=#121] - -(72) HashAggregate [codegen id : 19] -Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] -Keys [1]: [web_site_id#111] -Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#122, sum(UnscaledValue(return_amt#94))#123, sum(UnscaledValue(profit#93))#124, sum(UnscaledValue(net_loss#95))#125] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#122,17,2) AS sales#126, MakeDecimal(sum(UnscaledValue(return_amt#94))#123,17,2) AS RETURNS#127, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#124,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#125,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#128, web channel AS channel#129, concat(web_site, web_site_id#111) AS id#130] - -(73) Union - -(74) Expand [codegen id : 20] -Input [5]: [sales#40, RETURNS#41, profit#42, channel#43, id#44] -Arguments: [List(sales#40, returns#41, profit#42, channel#43, id#44, 0), List(sales#40, returns#41, profit#42, channel#43, null, 1), List(sales#40, returns#41, profit#42, null, null, 3)], [sales#40, returns#41, profit#42, channel#131, id#132, spark_grouping_id#133] - -(75) HashAggregate [codegen id : 20] -Input [6]: [sales#40, returns#41, profit#42, channel#131, id#132, spark_grouping_id#133] -Keys [3]: [channel#131, id#132, spark_grouping_id#133] -Functions [3]: [partial_sum(sales#40), partial_sum(returns#41), partial_sum(profit#42)] -Aggregate Attributes [6]: [sum#134, isEmpty#135, sum#136, isEmpty#137, sum#138, isEmpty#139] -Results [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] - -(76) Exchange -Input [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] -Arguments: hashpartitioning(channel#131, id#132, spark_grouping_id#133, 5), true, [id=#146] - -(77) HashAggregate [codegen id : 21] -Input [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] -Keys [3]: [channel#131, id#132, spark_grouping_id#133] -Functions [3]: [sum(sales#40), sum(returns#41), sum(profit#42)] -Aggregate Attributes [3]: [sum(sales#40)#147, sum(returns#41)#148, sum(profit#42)#149] -Results [5]: [channel#131, id#132, sum(sales#40)#147 AS sales#150, sum(returns#41)#148 AS returns#151, sum(profit#42)#149 AS profit#152] - -(78) TakeOrderedAndProject -Input [5]: [channel#131, id#132, sales#150, returns#151, profit#152] -Arguments: 100, [channel#131 ASC NULLS FIRST, id#132 ASC NULLS FIRST], [channel#131, id#132, sales#150, returns#151, profit#152] - +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) ++- *(21) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) + +- *(20) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) + +- *(20) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] + +- Union + :- *(6) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(sales_price#13)), sum(UnscaledValue(return_amt#14)), sum(UnscaledValue(profit#15)), sum(UnscaledValue(net_loss#16))]) + : +- Exchange hashpartitioning(s_store_id#12, 5) + : +- *(5) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(sales_price#13)), partial_sum(UnscaledValue(return_amt#14)), partial_sum(UnscaledValue(profit#15)), partial_sum(UnscaledValue(net_loss#16))]) + : +- *(5) Project [sales_price#13, profit#15, return_amt#14, net_loss#16, s_store_id#12] + : +- *(5) BroadcastHashJoin [store_sk#17], [cast(s_store_sk#18 as bigint)], Inner, BuildRight + : :- *(5) Project [store_sk#17, sales_price#13, profit#15, return_amt#14, net_loss#16] + : : +- *(5) BroadcastHashJoin [date_sk#19], [cast(d_date_sk#20 as bigint)], Inner, BuildRight + : : :- Union + : : : :- *(1) Project [cast(ss_store_sk#21 as bigint) AS store_sk#17, cast(ss_sold_date_sk#22 as bigint) AS date_sk#19, ss_ext_sales_price#23 AS sales_price#13, ss_net_profit#24 AS profit#15, 0.00 AS return_amt#14, 0.00 AS net_loss#16] + : : : : +- *(1) Filter (isnotnull(cast(ss_sold_date_sk#22 as bigint)) && isnotnull(cast(ss_store_sk#21 as bigint))) + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_store_sk#21,ss_ext_sales_price#23,ss_net_profit#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [s_store_sk#18, s_store_id#12] + : +- *(4) Filter isnotnull(s_store_sk#18) + : +- *(4) FileScan parquet default.store[s_store_sk#18,s_store_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + :- *(12) HashAggregate(keys=[cp_catalog_page_id#36], functions=[sum(UnscaledValue(sales_price#37)), sum(UnscaledValue(return_amt#38)), sum(UnscaledValue(profit#39)), sum(UnscaledValue(net_loss#40))]) + : +- Exchange hashpartitioning(cp_catalog_page_id#36, 5) + : +- *(11) HashAggregate(keys=[cp_catalog_page_id#36], functions=[partial_sum(UnscaledValue(sales_price#37)), partial_sum(UnscaledValue(return_amt#38)), partial_sum(UnscaledValue(profit#39)), partial_sum(UnscaledValue(net_loss#40))]) + : +- *(11) Project [sales_price#37, profit#39, return_amt#38, net_loss#40, cp_catalog_page_id#36] + : +- *(11) BroadcastHashJoin [page_sk#41], [cp_catalog_page_sk#42], Inner, BuildRight + : :- *(11) Project [page_sk#41, sales_price#37, profit#39, return_amt#38, net_loss#40] + : : +- *(11) BroadcastHashJoin [date_sk#43], [d_date_sk#20], Inner, BuildRight + : : :- Union + : : : :- *(7) Project [cs_catalog_page_sk#44 AS page_sk#41, cs_sold_date_sk#45 AS date_sk#43, cs_ext_sales_price#46 AS sales_price#37, cs_net_profit#47 AS profit#39, 0.00 AS return_amt#38, 0.00 AS net_loss#40] + : : : : +- *(7) Filter (isnotnull(cs_sold_date_sk#45) && isnotnull(cs_catalog_page_sk#44)) + : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#45,cs_catalog_page_sk#44,cs_ext_sales_price#46,cs_net_profit#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) + : : +- *(9) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(10) Project [cp_catalog_page_sk#42, cp_catalog_page_id#36] + : +- *(10) Filter isnotnull(cp_catalog_page_sk#42) + : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#42,cp_catalog_page_id#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct + +- *(19) HashAggregate(keys=[web_site_id#58], functions=[sum(UnscaledValue(sales_price#59)), sum(UnscaledValue(return_amt#60)), sum(UnscaledValue(profit#61)), sum(UnscaledValue(net_loss#62))]) + +- Exchange hashpartitioning(web_site_id#58, 5) + +- *(18) HashAggregate(keys=[web_site_id#58], functions=[partial_sum(UnscaledValue(sales_price#59)), partial_sum(UnscaledValue(return_amt#60)), partial_sum(UnscaledValue(profit#61)), partial_sum(UnscaledValue(net_loss#62))]) + +- *(18) Project [sales_price#59, profit#61, return_amt#60, net_loss#62, web_site_id#58] + +- *(18) BroadcastHashJoin [wsr_web_site_sk#63], [web_site_sk#64], Inner, BuildRight + :- *(18) Project [wsr_web_site_sk#63, sales_price#59, profit#61, return_amt#60, net_loss#62] + : +- *(18) BroadcastHashJoin [date_sk#65], [cast(d_date_sk#20 as bigint)], Inner, BuildRight + : :- Union + : : :- *(13) Project [ws_web_site_sk#66 AS wsr_web_site_sk#63, cast(ws_sold_date_sk#67 as bigint) AS date_sk#65, ws_ext_sales_price#68 AS sales_price#59, ws_net_profit#69 AS profit#61, 0.00 AS return_amt#60, 0.00 AS net_loss#62] + : : : +- *(13) Filter (isnotnull(cast(ws_sold_date_sk#67 as bigint)) && isnotnull(ws_web_site_sk#66)) + : : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#67,ws_web_site_sk#66,ws_ext_sales_price#68,ws_net_profit#69] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_web_site_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(17) Project [web_site_sk#64, web_site_id#58] + +- *(17) Filter isnotnull(web_site_sk#64) + +- *(17) FileScan parquet default.web_site[web_site_sk#64,web_site_id#58] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt index 9b7cc3360..257c8efa6 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt @@ -1,123 +1,110 @@ -TakeOrderedAndProject [channel,id,sales,returns,profit] - WholeStageCodegen (21) - HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen + HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] InputAdapter Exchange [channel,id,spark_grouping_id] #1 - WholeStageCodegen (20) - HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - Expand [sales,returns,profit,channel,id] + WholeStageCodegen + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] InputAdapter Union - WholeStageCodegen (6) - HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum] + WholeStageCodegen + HashAggregate [s_store_id,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] InputAdapter Exchange [s_store_id] #2 - WholeStageCodegen (5) - HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,profit,return_amt,net_loss,s_store_id] - BroadcastHashJoin [store_sk,s_store_sk] - Project [store_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + WholeStageCodegen + HashAggregate [net_loss,profit,return_amt,s_store_id,sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,s_store_id,sales_price] + BroadcastHashJoin [s_store_sk,store_sk] + Project [net_loss,profit,return_amt,sales_price,store_sk] + BroadcastHashJoin [d_date_sk,date_sk] InputAdapter Union - WholeStageCodegen (1) - Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] + WholeStageCodegen + Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] - WholeStageCodegen (2) - Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen + Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] Filter [sr_returned_date_sk,sr_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (4) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] - WholeStageCodegen (12) - HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum] + WholeStageCodegen + Project [s_store_id,s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] + WholeStageCodegen + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] InputAdapter Exchange [cp_catalog_page_id] #5 - WholeStageCodegen (11) - HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] - BroadcastHashJoin [page_sk,cp_catalog_page_sk] - Project [page_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + WholeStageCodegen + HashAggregate [cp_catalog_page_id,net_loss,profit,return_amt,sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] + BroadcastHashJoin [cp_catalog_page_sk,page_sk] + Project [net_loss,page_sk,profit,return_amt,sales_price] + BroadcastHashJoin [d_date_sk,date_sk] InputAdapter Union - WholeStageCodegen (7) - Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] - Filter [cs_sold_date_sk,cs_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] - WholeStageCodegen (8) - Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] - Filter [cr_returned_date_sk,cr_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] + WholeStageCodegen + Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + Filter [cs_catalog_page_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + WholeStageCodegen + Project [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] + Filter [cr_catalog_page_sk,cr_returned_date_sk] + Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter - BroadcastExchange #6 - WholeStageCodegen (10) - Filter [cp_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] - WholeStageCodegen (19) - HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum] + BroadcastExchange #7 + WholeStageCodegen + Project [cp_catalog_page_id,cp_catalog_page_sk] + Filter [cp_catalog_page_sk] + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] [cp_catalog_page_id,cp_catalog_page_sk] + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),web_site_id] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] InputAdapter - Exchange [web_site_id] #7 - WholeStageCodegen (18) - HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [sales_price,profit,return_amt,net_loss,web_site_id] - BroadcastHashJoin [wsr_web_site_sk,web_site_sk] - Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] - BroadcastHashJoin [date_sk,d_date_sk] + Exchange [web_site_id] #8 + WholeStageCodegen + HashAggregate [net_loss,profit,return_amt,sales_price,sum,sum,sum,sum,sum,sum,sum,sum,web_site_id] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [net_loss,profit,return_amt,sales_price,web_site_id] + BroadcastHashJoin [web_site_sk,wsr_web_site_sk] + Project [net_loss,profit,return_amt,sales_price,wsr_web_site_sk] + BroadcastHashJoin [d_date_sk,date_sk] InputAdapter Union - WholeStageCodegen (13) - Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] + WholeStageCodegen + Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] Filter [ws_sold_date_sk,ws_web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] - WholeStageCodegen (15) - Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] + WholeStageCodegen + Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,ws_web_site_sk] BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Filter [wr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + Project [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] + Filter [wr_returned_date_sk] + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] InputAdapter - BroadcastExchange #8 - WholeStageCodegen (14) - Filter [ws_item_sk,ws_order_number,ws_web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] + BroadcastExchange #9 + WholeStageCodegen + Project [ws_item_sk,ws_order_number,ws_web_site_sk] + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_web_site_sk] [ws_item_sk,ws_order_number,ws_web_site_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter - BroadcastExchange #9 - WholeStageCodegen (17) - Filter [web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_site_id] + BroadcastExchange #10 + WholeStageCodegen + Project [web_site_id,web_site_sk] + Filter [web_site_sk] + Scan parquet default.web_site [web_site_id,web_site_sk] [web_site_id,web_site_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt index e083affa7..6716b98d6 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt @@ -1,183 +1,32 @@ == Physical Plan == -TakeOrderedAndProject (32) -+- * HashAggregate (31) - +- Exchange (30) - +- * HashAggregate (29) - +- * Project (28) - +- * BroadcastHashJoin Inner BuildRight (27) - :- * Project (21) - : +- * BroadcastHashJoin Inner BuildRight (20) - : :- * Project (15) - : : +- * BroadcastHashJoin Inner BuildRight (14) - : : :- * Project (9) - : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.store_returns (4) - : : +- BroadcastExchange (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.store (10) - : +- BroadcastExchange (19) - : +- * Filter (18) - : +- * ColumnarToRow (17) - : +- Scan parquet default.date_dim (16) - +- BroadcastExchange (26) - +- * Project (25) - +- * Filter (24) - +- * ColumnarToRow (23) - +- Scan parquet default.date_dim (22) - - -(1) Scan parquet default.store_sales -Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] - -(3) Filter [codegen id : 5] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] -Condition : ((((isnotnull(ss_ticket_number#5) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_sold_date_sk#1)) - -(4) Scan parquet default.store_returns -Output [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk), IsNotNull(sr_customer_sk), IsNotNull(sr_returned_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] - -(6) Filter [codegen id : 1] -Input [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] -Condition : (((isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_customer_sk#8)) AND isnotnull(sr_returned_date_sk#6)) - -(7) BroadcastExchange -Input [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] -Arguments: HashedRelationBroadcastMode(List(input[3, bigint, false], input[1, bigint, false], input[2, bigint, false]),false), [id=#10] - -(8) BroadcastHashJoin [codegen id : 5] -Left keys [3]: [cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint)] -Right keys [3]: [sr_ticket_number#9, sr_item_sk#7, sr_customer_sk#8] -Join condition: None - -(9) Project [codegen id : 5] -Output [3]: [ss_sold_date_sk#1, ss_store_sk#4, sr_returned_date_sk#6] -Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] - -(10) Scan parquet default.store -Output [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] - -(12) Filter [codegen id : 2] -Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] -Condition : isnotnull(s_store_sk#11) - -(13) BroadcastExchange -Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] - -(14) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#11] -Join condition: None - -(15) Project [codegen id : 5] -Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] -Input [14]: [ss_sold_date_sk#1, ss_store_sk#4, sr_returned_date_sk#6, s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] - -(16) Scan parquet default.date_dim -Output [1]: [d_date_sk#23] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [1]: [d_date_sk#23] - -(18) Filter [codegen id : 3] -Input [1]: [d_date_sk#23] -Condition : isnotnull(d_date_sk#23) - -(19) BroadcastExchange -Input [1]: [d_date_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#24] - -(20) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#23] -Join condition: None - -(21) Project [codegen id : 5] -Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] -Input [13]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, d_date_sk#23] - -(22) Scan parquet default.date_dim -Output [3]: [d_date_sk#25, d_year#26, d_moy#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)] -ReadSchema: struct - -(23) ColumnarToRow [codegen id : 4] -Input [3]: [d_date_sk#25, d_year#26, d_moy#27] - -(24) Filter [codegen id : 4] -Input [3]: [d_date_sk#25, d_year#26, d_moy#27] -Condition : ((((isnotnull(d_year#26) AND isnotnull(d_moy#27)) AND (d_year#26 = 2001)) AND (d_moy#27 = 8)) AND isnotnull(d_date_sk#25)) - -(25) Project [codegen id : 4] -Output [1]: [d_date_sk#25] -Input [3]: [d_date_sk#25, d_year#26, d_moy#27] - -(26) BroadcastExchange -Input [1]: [d_date_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] - -(27) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [sr_returned_date_sk#6] -Right keys [1]: [cast(d_date_sk#25 as bigint)] -Join condition: None - -(28) Project [codegen id : 5] -Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] -Input [13]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, d_date_sk#25] - -(29) HashAggregate [codegen id : 5] -Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] -Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] -Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum#29, sum#30, sum#31, sum#32, sum#33] -Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] - -(30) Exchange -Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] -Arguments: hashpartitioning(s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 5), true, [id=#39] - -(31) HashAggregate [codegen id : 6] -Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] -Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] -Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#40, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#44] -Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#40 AS 30 days #45, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#41 AS 31 - 60 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#42 AS 61 - 90 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#43 AS 91 - 120 days #48, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#44 AS >120 days #49] - -(32) TakeOrderedAndProject -Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 30 days #45, 31 - 60 days #46, 61 - 90 days #47, 91 - 120 days #48, >120 days #49] -Arguments: 100, [s_store_name#12 ASC NULLS FIRST, s_company_id#13 ASC NULLS FIRST, s_street_number#14 ASC NULLS FIRST, s_street_name#15 ASC NULLS FIRST, s_street_type#16 ASC NULLS FIRST, s_suite_number#17 ASC NULLS FIRST, s_city#18 ASC NULLS FIRST, s_county#19 ASC NULLS FIRST, s_state#20 ASC NULLS FIRST, s_zip#21 ASC NULLS FIRST], [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 30 days #45, 31 - 60 days #46, 61 - 90 days #47, 91 - 120 days #48, >120 days #49] - +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_company_id#2 ASC NULLS FIRST,s_street_number#3 ASC NULLS FIRST,s_street_name#4 ASC NULLS FIRST,s_street_type#5 ASC NULLS FIRST,s_suite_number#6 ASC NULLS FIRST,s_city#7 ASC NULLS FIRST,s_county#8 ASC NULLS FIRST,s_state#9 ASC NULLS FIRST,s_zip#10 ASC NULLS FIRST], output=[s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10,30 days #11,31 - 60 days #12,61 - 90 days #13,91 - 120 days #14,>120 days #15]) ++- *(6) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10, 5) + +- *(5) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) + +- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + +- *(5) BroadcastHashJoin [sr_returned_date_sk#16], [cast(d_date_sk#18 as bigint)], Inner, BuildRight + :- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + : +- *(5) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#19], Inner, BuildRight + : :- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + : : +- *(5) BroadcastHashJoin [ss_store_sk#20], [s_store_sk#21], Inner, BuildRight + : : :- *(5) Project [ss_sold_date_sk#17, ss_store_sk#20, sr_returned_date_sk#16] + : : : +- *(5) BroadcastHashJoin [cast(ss_ticket_number#22 as bigint), cast(ss_item_sk#23 as bigint), cast(ss_customer_sk#24 as bigint)], [sr_ticket_number#25, sr_item_sk#26, sr_customer_sk#27], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#17, ss_item_sk#23, ss_customer_sk#24, ss_store_sk#20, ss_ticket_number#22] + : : : : +- *(5) Filter ((((isnotnull(ss_ticket_number#22) && isnotnull(ss_item_sk#23)) && isnotnull(ss_customer_sk#24)) && isnotnull(ss_store_sk#20)) && isnotnull(ss_sold_date_sk#17)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_item_sk#23,ss_customer_sk#24,ss_store_sk#20,ss_ticket_number#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_stor..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[3, bigint, true], input[1, bigint, true], input[2, bigint, true])) + : : : +- *(1) Project [sr_returned_date_sk#16, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#25] + : : : +- *(1) Filter (((isnotnull(sr_ticket_number#25) && isnotnull(sr_customer_sk#27)) && isnotnull(sr_item_sk#26)) && isnotnull(sr_returned_date_sk#16)) + : : : +- *(1) FileScan parquet default.store_returns[sr_returned_date_sk#16,sr_item_sk#26,sr_customer_sk#27,sr_ticket_number#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_retu..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#21, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] + : : +- *(2) Filter isnotnull(s_store_sk#21) + : : +- *(2) FileScan parquet default.store[s_store_sk#21,s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#18] + +- *(4) Filter ((((isnotnull(d_year#28) && isnotnull(d_moy#29)) && (d_year#28 = 2001)) && (d_moy#29 = 8)) && isnotnull(d_date_sk#18)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#28,d_moy#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt index 43e777385..c3f728d0a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt @@ -1,48 +1,42 @@ -TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] - WholeStageCodegen (6) - HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] + WholeStageCodegen + HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] InputAdapter - Exchange [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #1 - WholeStageCodegen (5) - HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sr_returned_date_sk,ss_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_sold_date_sk,ss_store_sk,sr_returned_date_sk] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,ss_customer_sk,sr_ticket_number,sr_item_sk,sr_customer_sk] - Filter [ss_ticket_number,ss_item_sk,ss_customer_sk,ss_store_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number] + Exchange [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] #1 + WholeStageCodegen + HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [sr_returned_date_sk,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] + WholeStageCodegen + Project [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + WholeStageCodegen + Project [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] + Filter [s_store_sk] + Scan parquet default.store [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk] + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk] [d_date_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt index b391d90a3..548263929 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt @@ -1,228 +1,41 @@ == Physical Plan == -TakeOrderedAndProject (41) -+- * Filter (40) - +- Window (39) - +- * Sort (38) - +- Exchange (37) - +- * Project (36) - +- SortMergeJoin FullOuter (35) - :- * Sort (19) - : +- Exchange (18) - : +- * Project (17) - : +- Window (16) - : +- * Sort (15) - : +- Exchange (14) - : +- * HashAggregate (13) - : +- Exchange (12) - : +- * HashAggregate (11) - : +- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.web_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.date_dim (4) - +- * Sort (34) - +- Exchange (33) - +- * Project (32) - +- Window (31) - +- * Sort (30) - +- Exchange (29) - +- * HashAggregate (28) - +- Exchange (27) - +- * HashAggregate (26) - +- * Project (25) - +- * BroadcastHashJoin Inner BuildRight (24) - :- * Filter (22) - : +- * ColumnarToRow (21) - : +- Scan parquet default.store_sales (20) - +- ReusedExchange (23) - - -(1) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 2] -Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] - -(3) Filter [codegen id : 2] -Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] -Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] -Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#4)) - -(7) Project [codegen id : 1] -Output [2]: [d_date_sk#4, d_date#5] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] - -(8) BroadcastExchange -Input [2]: [d_date_sk#4, d_date#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ws_sold_date_sk#1] -Right keys [1]: [d_date_sk#4] -Join condition: None - -(10) Project [codegen id : 2] -Output [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] -Input [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3, d_date_sk#4, d_date#5] - -(11) HashAggregate [codegen id : 2] -Input [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] -Keys [2]: [ws_item_sk#2, d_date#5] -Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#3))] -Aggregate Attributes [1]: [sum#8] -Results [3]: [ws_item_sk#2, d_date#5, sum#9] - -(12) Exchange -Input [3]: [ws_item_sk#2, d_date#5, sum#9] -Arguments: hashpartitioning(ws_item_sk#2, d_date#5, 5), true, [id=#10] - -(13) HashAggregate [codegen id : 3] -Input [3]: [ws_item_sk#2, d_date#5, sum#9] -Keys [2]: [ws_item_sk#2, d_date#5] -Functions [1]: [sum(UnscaledValue(ws_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#3))#11] -Results [4]: [ws_item_sk#2 AS item_sk#12, d_date#5, MakeDecimal(sum(UnscaledValue(ws_sales_price#3))#11,17,2) AS _w0#13, ws_item_sk#2] - -(14) Exchange -Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] -Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#14] - -(15) Sort [codegen id : 4] -Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] -Arguments: [ws_item_sk#2 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 - -(16) Window -Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] -Arguments: [sum(_w0#13) windowspecdefinition(ws_item_sk#2, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#15], [ws_item_sk#2], [d_date#5 ASC NULLS FIRST] - -(17) Project [codegen id : 5] -Output [3]: [item_sk#12, d_date#5, cume_sales#15] -Input [5]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2, cume_sales#15] - -(18) Exchange -Input [3]: [item_sk#12, d_date#5, cume_sales#15] -Arguments: hashpartitioning(item_sk#12, d_date#5, 5), true, [id=#16] - -(19) Sort [codegen id : 6] -Input [3]: [item_sk#12, d_date#5, cume_sales#15] -Arguments: [item_sk#12 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 - -(20) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(21) ColumnarToRow [codegen id : 8] -Input [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] - -(22) Filter [codegen id : 8] -Input [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] -Condition : (isnotnull(ss_item_sk#18) AND isnotnull(ss_sold_date_sk#17)) - -(23) ReusedExchange [Reuses operator id: 8] -Output [2]: [d_date_sk#20, d_date#21] - -(24) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_date_sk#17] -Right keys [1]: [d_date_sk#20] -Join condition: None - -(25) Project [codegen id : 8] -Output [3]: [ss_item_sk#18, ss_sales_price#19, d_date#21] -Input [5]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19, d_date_sk#20, d_date#21] - -(26) HashAggregate [codegen id : 8] -Input [3]: [ss_item_sk#18, ss_sales_price#19, d_date#21] -Keys [2]: [ss_item_sk#18, d_date#21] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#19))] -Aggregate Attributes [1]: [sum#22] -Results [3]: [ss_item_sk#18, d_date#21, sum#23] - -(27) Exchange -Input [3]: [ss_item_sk#18, d_date#21, sum#23] -Arguments: hashpartitioning(ss_item_sk#18, d_date#21, 5), true, [id=#24] - -(28) HashAggregate [codegen id : 9] -Input [3]: [ss_item_sk#18, d_date#21, sum#23] -Keys [2]: [ss_item_sk#18, d_date#21] -Functions [1]: [sum(UnscaledValue(ss_sales_price#19))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#19))#25] -Results [4]: [ss_item_sk#18 AS item_sk#26, d_date#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#19))#25,17,2) AS _w0#27, ss_item_sk#18] - -(29) Exchange -Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] -Arguments: hashpartitioning(ss_item_sk#18, 5), true, [id=#28] - -(30) Sort [codegen id : 10] -Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] -Arguments: [ss_item_sk#18 ASC NULLS FIRST, d_date#21 ASC NULLS FIRST], false, 0 - -(31) Window -Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] -Arguments: [sum(_w0#27) windowspecdefinition(ss_item_sk#18, d_date#21 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#29], [ss_item_sk#18], [d_date#21 ASC NULLS FIRST] - -(32) Project [codegen id : 11] -Output [3]: [item_sk#26, d_date#21, cume_sales#29] -Input [5]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18, cume_sales#29] - -(33) Exchange -Input [3]: [item_sk#26, d_date#21, cume_sales#29] -Arguments: hashpartitioning(item_sk#26, d_date#21, 5), true, [id=#30] - -(34) Sort [codegen id : 12] -Input [3]: [item_sk#26, d_date#21, cume_sales#29] -Arguments: [item_sk#26 ASC NULLS FIRST, d_date#21 ASC NULLS FIRST], false, 0 - -(35) SortMergeJoin -Left keys [2]: [item_sk#12, d_date#5] -Right keys [2]: [item_sk#26, d_date#21] -Join condition: None - -(36) Project [codegen id : 13] -Output [4]: [CASE WHEN isnotnull(item_sk#12) THEN item_sk#12 ELSE item_sk#26 END AS item_sk#31, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#21 END AS d_date#32, cume_sales#15 AS web_sales#33, cume_sales#29 AS store_sales#34] -Input [6]: [item_sk#12, d_date#5, cume_sales#15, item_sk#26, d_date#21, cume_sales#29] - -(37) Exchange -Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] -Arguments: hashpartitioning(item_sk#31, 5), true, [id=#35] - -(38) Sort [codegen id : 14] -Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] -Arguments: [item_sk#31 ASC NULLS FIRST, d_date#32 ASC NULLS FIRST], false, 0 - -(39) Window -Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] -Arguments: [max(web_sales#33) windowspecdefinition(item_sk#31, d_date#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#36, max(store_sales#34) windowspecdefinition(item_sk#31, d_date#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#37], [item_sk#31], [d_date#32 ASC NULLS FIRST] - -(40) Filter [codegen id : 15] -Input [6]: [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] -Condition : ((isnotnull(web_cumulative#36) AND isnotnull(store_cumulative#37)) AND (web_cumulative#36 > store_cumulative#37)) - -(41) TakeOrderedAndProject -Input [6]: [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] -Arguments: 100, [item_sk#31 ASC NULLS FIRST, d_date#32 ASC NULLS FIRST], [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] - +TakeOrderedAndProject(limit=100, orderBy=[item_sk#1 ASC NULLS FIRST,d_date#2 ASC NULLS FIRST], output=[item_sk#1,d_date#2,web_sales#3,store_sales#4,web_cumulative#5,store_cumulative#6]) ++- *(15) Filter ((isnotnull(web_cumulative#5) && isnotnull(store_cumulative#6)) && (web_cumulative#5 > store_cumulative#6)) + +- Window [max(web_sales#3) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#5, max(store_sales#4) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#6], [item_sk#1], [d_date#2 ASC NULLS FIRST] + +- *(14) Sort [item_sk#1 ASC NULLS FIRST, d_date#2 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(item_sk#1, 5) + +- *(13) Project [CASE WHEN isnotnull(item_sk#7) THEN item_sk#7 ELSE item_sk#8 END AS item_sk#1, CASE WHEN isnotnull(d_date#9) THEN d_date#9 ELSE d_date#10 END AS d_date#2, cume_sales#11 AS web_sales#3, cume_sales#12 AS store_sales#4] + +- SortMergeJoin [item_sk#7, d_date#9], [item_sk#8, d_date#10], FullOuter + :- *(6) Sort [item_sk#7 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(item_sk#7, d_date#9, 5) + : +- *(5) Project [item_sk#7, d_date#9, cume_sales#11] + : +- Window [sum(_w0#13) windowspecdefinition(ws_item_sk#14, d_date#9 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#11], [ws_item_sk#14], [d_date#9 ASC NULLS FIRST] + : +- *(4) Sort [ws_item_sk#14 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(ws_item_sk#14, 5) + : +- *(3) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[sum(UnscaledValue(ws_sales_price#15))]) + : +- Exchange hashpartitioning(ws_item_sk#14, d_date#9, 5) + : +- *(2) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[partial_sum(UnscaledValue(ws_sales_price#15))]) + : +- *(2) Project [ws_item_sk#14, ws_sales_price#15, d_date#9] + : +- *(2) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : :- *(2) Project [ws_sold_date_sk#16, ws_item_sk#14, ws_sales_price#15] + : : +- *(2) Filter (isnotnull(ws_item_sk#14) && isnotnull(ws_sold_date_sk#16)) + : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_item_sk#14,ws_sales_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#17, d_date#9] + : +- *(1) Filter (((isnotnull(d_month_seq#18) && (d_month_seq#18 >= 1200)) && (d_month_seq#18 <= 1211)) && isnotnull(d_date_sk#17)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#17,d_date#9,d_month_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- *(12) Sort [item_sk#8 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(item_sk#8, d_date#10, 5) + +- *(11) Project [item_sk#8, d_date#10, cume_sales#12] + +- Window [sum(_w0#19) windowspecdefinition(ss_item_sk#20, d_date#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#12], [ss_item_sk#20], [d_date#10 ASC NULLS FIRST] + +- *(10) Sort [ss_item_sk#20 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(ss_item_sk#20, 5) + +- *(9) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[sum(UnscaledValue(ss_sales_price#21))]) + +- Exchange hashpartitioning(ss_item_sk#20, d_date#10, 5) + +- *(8) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[partial_sum(UnscaledValue(ss_sales_price#21))]) + +- *(8) Project [ss_item_sk#20, ss_sales_price#21, d_date#10] + +- *(8) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight + :- *(8) Project [ss_sold_date_sk#22, ss_item_sk#20, ss_sales_price#21] + : +- *(8) Filter (isnotnull(ss_item_sk#20) && isnotnull(ss_sold_date_sk#22)) + : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_item_sk#20,ss_sales_price#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#23, d_date#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt index 17f35514f..2b126d38a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt @@ -1,71 +1,67 @@ -TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative] - WholeStageCodegen (15) - Filter [web_cumulative,store_cumulative] +TakeOrderedAndProject [d_date,item_sk,store_cumulative,store_sales,web_cumulative,web_sales] + WholeStageCodegen + Filter [store_cumulative,web_cumulative] InputAdapter - Window [web_sales,item_sk,d_date,store_sales] - WholeStageCodegen (14) - Sort [item_sk,d_date] + Window [d_date,item_sk,store_sales,web_sales] + WholeStageCodegen + Sort [d_date,item_sk] InputAdapter Exchange [item_sk] #1 - WholeStageCodegen (13) - Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales] + WholeStageCodegen + Project [cume_sales,cume_sales,d_date,d_date,item_sk,item_sk] InputAdapter - SortMergeJoin [item_sk,d_date,item_sk,d_date] - WholeStageCodegen (6) - Sort [item_sk,d_date] + SortMergeJoin [d_date,d_date,item_sk,item_sk] + WholeStageCodegen + Sort [d_date,item_sk] InputAdapter - Exchange [item_sk,d_date] #2 - WholeStageCodegen (5) - Project [item_sk,d_date,cume_sales] + Exchange [d_date,item_sk] #2 + WholeStageCodegen + Project [cume_sales,d_date,item_sk] InputAdapter - Window [_w0,ws_item_sk,d_date] - WholeStageCodegen (4) - Sort [ws_item_sk,d_date] + Window [_w0,d_date,ws_item_sk] + WholeStageCodegen + Sort [d_date,ws_item_sk] InputAdapter Exchange [ws_item_sk] #3 - WholeStageCodegen (3) - HashAggregate [ws_item_sk,d_date,sum] [sum(UnscaledValue(ws_sales_price)),item_sk,_w0,sum] + WholeStageCodegen + HashAggregate [d_date,sum,sum(UnscaledValue(ws_sales_price)),ws_item_sk] [_w0,item_sk,sum,sum(UnscaledValue(ws_sales_price))] InputAdapter - Exchange [ws_item_sk,d_date] #4 - WholeStageCodegen (2) - HashAggregate [ws_item_sk,d_date,ws_sales_price] [sum,sum] - Project [ws_item_sk,ws_sales_price,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_sales_price] + Exchange [d_date,ws_item_sk] #4 + WholeStageCodegen + HashAggregate [d_date,sum,sum,ws_item_sk,ws_sales_price] [sum,sum] + Project [d_date,ws_item_sk,ws_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_item_sk,ws_sales_price,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] [ws_item_sk,ws_sales_price,ws_sold_date_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] - WholeStageCodegen (12) - Sort [item_sk,d_date] + WholeStageCodegen + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] + WholeStageCodegen + Sort [d_date,item_sk] InputAdapter - Exchange [item_sk,d_date] #6 - WholeStageCodegen (11) - Project [item_sk,d_date,cume_sales] + Exchange [d_date,item_sk] #6 + WholeStageCodegen + Project [cume_sales,d_date,item_sk] InputAdapter - Window [_w0,ss_item_sk,d_date] - WholeStageCodegen (10) - Sort [ss_item_sk,d_date] + Window [_w0,d_date,ss_item_sk] + WholeStageCodegen + Sort [d_date,ss_item_sk] InputAdapter Exchange [ss_item_sk] #7 - WholeStageCodegen (9) - HashAggregate [ss_item_sk,d_date,sum] [sum(UnscaledValue(ss_sales_price)),item_sk,_w0,sum] + WholeStageCodegen + HashAggregate [d_date,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] [_w0,item_sk,sum,sum(UnscaledValue(ss_sales_price))] InputAdapter - Exchange [ss_item_sk,d_date] #8 - WholeStageCodegen (8) - HashAggregate [ss_item_sk,d_date,ss_sales_price] [sum,sum] - Project [ss_item_sk,ss_sales_price,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_sales_price] + Exchange [d_date,ss_item_sk] #8 + WholeStageCodegen + HashAggregate [d_date,ss_item_sk,ss_sales_price,sum,sum] [sum,sum] + Project [d_date,ss_item_sk,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_date] #5 + ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt index 47235253f..7a9e19e6c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt @@ -1,122 +1,20 @@ == Physical Plan == -TakeOrderedAndProject (21) -+- * HashAggregate (20) - +- Exchange (19) - +- * HashAggregate (18) - +- * Project (17) - +- * BroadcastHashJoin Inner BuildRight (16) - :- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Project (4) - : : +- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.date_dim (1) - : +- BroadcastExchange (8) - : +- * Filter (7) - : +- * ColumnarToRow (6) - : +- Scan parquet default.store_sales (5) - +- BroadcastExchange (15) - +- * Project (14) - +- * Filter (13) - +- * ColumnarToRow (12) - +- Scan parquet default.item (11) - - -(1) Scan parquet default.date_dim -Output [3]: [d_date_sk#1, d_year#2, d_moy#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] - -(3) Filter [codegen id : 3] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] -Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) - -(4) Project [codegen id : 3] -Output [2]: [d_date_sk#1, d_year#2] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] - -(5) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] - -(7) Filter [codegen id : 1] -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) - -(8) BroadcastExchange -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#4] -Join condition: None - -(10) Project [codegen id : 3] -Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] -Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] - -(11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] - -(13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] -Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) - -(14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] - -(15) BroadcastExchange -Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#5] -Right keys [1]: [i_item_sk#8] -Join condition: None - -(17) Project [codegen id : 3] -Output [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] -Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_brand_id#9, i_brand#10] - -(18) HashAggregate [codegen id : 3] -Input [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum#13] -Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] - -(19) Exchange -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), true, [id=#15] - -(20) HashAggregate [codegen id : 4] -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] -Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS ext_price#19] - -(21) TakeOrderedAndProject -Input [4]: [d_year#2, brand_id#17, brand#18, ext_price#19] -Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, ext_price#19] - +TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,ext_price#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,ext_price#2]) ++- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) + +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 5) + +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) + +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] + +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight + :- *(3) Project [d_year#1, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight + : :- *(3) Project [d_date_sk#10, d_year#1] + : : +- *(3) Filter ((((isnotnull(d_moy#12) && isnotnull(d_year#1)) && (d_moy#12 = 11)) && (d_year#1 = 2000)) && isnotnull(d_date_sk#10)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] + +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 1)) && isnotnull(i_item_sk#9)) + +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manager_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt index ba48ad5b6..583e5acae 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt @@ -1,31 +1,26 @@ -TakeOrderedAndProject [d_year,ext_price,brand_id,brand] - WholeStageCodegen (4) - HashAggregate [d_year,i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] +TakeOrderedAndProject [brand,brand_id,d_year,ext_price] + WholeStageCodegen + HashAggregate [d_year,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [d_year,i_brand,i_brand_id] #1 - WholeStageCodegen (3) - HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] - Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [d_year,ss_item_sk,ss_ext_sales_price] + WholeStageCodegen + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_year,ss_ext_sales_price,ss_item_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_year] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [ss_sold_date_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + WholeStageCodegen + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Project [i_item_sk,i_brand_id,i_brand] - Filter [i_manager_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] + WholeStageCodegen + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt index 249f74e73..f31d6ec98 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt @@ -1,180 +1,31 @@ == Physical Plan == -TakeOrderedAndProject (32) -+- * Project (31) - +- * Filter (30) - +- Window (29) - +- * Sort (28) - +- Exchange (27) - +- * HashAggregate (26) - +- Exchange (25) - +- * HashAggregate (24) - +- * Project (23) - +- * BroadcastHashJoin Inner BuildRight (22) - :- * Project (17) - : +- * BroadcastHashJoin Inner BuildRight (16) - : :- * Project (10) - : : +- * BroadcastHashJoin Inner BuildRight (9) - : : :- * Project (4) - : : : +- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.item (1) - : : +- BroadcastExchange (8) - : : +- * Filter (7) - : : +- * ColumnarToRow (6) - : : +- Scan parquet default.store_sales (5) - : +- BroadcastExchange (15) - : +- * Project (14) - : +- * Filter (13) - : +- * ColumnarToRow (12) - : +- Scan parquet default.date_dim (11) - +- BroadcastExchange (21) - +- * Filter (20) - +- * ColumnarToRow (19) - +- Scan parquet default.store (18) - - -(1) Scan parquet default.item -Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] - -(3) Filter [codegen id : 4] -Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] -Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,reference,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) - -(4) Project [codegen id : 4] -Output [2]: [i_item_sk#1, i_manufact_id#5] -Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] - -(5) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] - -(7) Filter [codegen id : 1] -Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] -Condition : ((isnotnull(ss_item_sk#11) AND isnotnull(ss_sold_date_sk#10)) AND isnotnull(ss_store_sk#12)) - -(8) BroadcastExchange -Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#14] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#11] -Join condition: None - -(10) Project [codegen id : 4] -Output [4]: [i_manufact_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13] -Input [6]: [i_item_sk#1, i_manufact_id#5, ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] - -(11) Scan parquet default.date_dim -Output [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] - -(13) Filter [codegen id : 2] -Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] -Condition : (d_month_seq#16 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) AND isnotnull(d_date_sk#15)) - -(14) Project [codegen id : 2] -Output [2]: [d_date_sk#15, d_qoy#17] -Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] - -(15) BroadcastExchange -Input [2]: [d_date_sk#15, d_qoy#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] - -(16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#10] -Right keys [1]: [d_date_sk#15] -Join condition: None - -(17) Project [codegen id : 4] -Output [4]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, d_qoy#17] -Input [6]: [i_manufact_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13, d_date_sk#15, d_qoy#17] - -(18) Scan parquet default.store -Output [1]: [s_store_sk#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [1]: [s_store_sk#19] - -(20) Filter [codegen id : 3] -Input [1]: [s_store_sk#19] -Condition : isnotnull(s_store_sk#19) - -(21) BroadcastExchange -Input [1]: [s_store_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] - -(22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#12] -Right keys [1]: [s_store_sk#19] -Join condition: None - -(23) Project [codegen id : 4] -Output [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#17] -Input [5]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, d_qoy#17, s_store_sk#19] - -(24) HashAggregate [codegen id : 4] -Input [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#17] -Keys [2]: [i_manufact_id#5, d_qoy#17] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum#21] -Results [3]: [i_manufact_id#5, d_qoy#17, sum#22] - -(25) Exchange -Input [3]: [i_manufact_id#5, d_qoy#17, sum#22] -Arguments: hashpartitioning(i_manufact_id#5, d_qoy#17, 5), true, [id=#23] - -(26) HashAggregate [codegen id : 5] -Input [3]: [i_manufact_id#5, d_qoy#17, sum#22] -Keys [2]: [i_manufact_id#5, d_qoy#17] -Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#24] -Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS _w0#26] - -(27) Exchange -Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] -Arguments: hashpartitioning(i_manufact_id#5, 5), true, [id=#27] - -(28) Sort [codegen id : 6] -Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] -Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 - -(29) Window -Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] -Arguments: [avg(_w0#26) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#28], [i_manufact_id#5] - -(30) Filter [codegen id : 7] -Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] -Condition : (CASE WHEN (avg_quarterly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) - -(31) Project [codegen id : 7] -Output [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] -Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] - -(32) TakeOrderedAndProject -Input [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] -Arguments: 100, [avg_quarterly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] - +TakeOrderedAndProject(limit=100, orderBy=[avg_quarterly_sales#1 ASC NULLS FIRST,sum_sales#2 ASC NULLS FIRST,i_manufact_id#3 ASC NULLS FIRST], output=[i_manufact_id#3,sum_sales#2,avg_quarterly_sales#1]) ++- *(7) Project [i_manufact_id#3, sum_sales#2, avg_quarterly_sales#1] + +- *(7) Filter (CASE WHEN (avg_quarterly_sales#1 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#2 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) + +- Window [avg(_w0#4) windowspecdefinition(i_manufact_id#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#1], [i_manufact_id#3] + +- *(6) Sort [i_manufact_id#3 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_manufact_id#3, 5) + +- *(5) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) + +- Exchange hashpartitioning(i_manufact_id#3, d_qoy#5, 5) + +- *(4) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) + +- *(4) Project [i_manufact_id#3, ss_sales_price#6, d_qoy#5] + +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight + :- *(4) Project [i_manufact_id#3, ss_store_sk#7, ss_sales_price#6, d_qoy#5] + : +- *(4) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : :- *(4) Project [i_manufact_id#3, ss_sold_date_sk#9, ss_store_sk#7, ss_sales_price#6] + : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight + : : :- *(4) Project [i_item_sk#11, i_manufact_id#3] + : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,reference,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manufact_id#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] + : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#10, d_qoy#5] + : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_qoy#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#8] + +- *(3) Filter isnotnull(s_store_sk#8) + +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt index cd55b60d2..ace96279f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt @@ -1,49 +1,43 @@ -TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] - WholeStageCodegen (7) - Project [i_manufact_id,sum_sales,avg_quarterly_sales] +TakeOrderedAndProject [avg_quarterly_sales,i_manufact_id,sum_sales] + WholeStageCodegen + Project [avg_quarterly_sales,i_manufact_id,sum_sales] Filter [avg_quarterly_sales,sum_sales] InputAdapter Window [_w0,i_manufact_id] - WholeStageCodegen (6) + WholeStageCodegen Sort [i_manufact_id] InputAdapter Exchange [i_manufact_id] #1 - WholeStageCodegen (5) - HashAggregate [i_manufact_id,d_qoy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + WholeStageCodegen + HashAggregate [d_qoy,i_manufact_id,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] InputAdapter - Exchange [i_manufact_id,d_qoy] #2 - WholeStageCodegen (4) - HashAggregate [i_manufact_id,d_qoy,ss_sales_price] [sum,sum] - Project [i_manufact_id,ss_sales_price,d_qoy] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_manufact_id,ss_sold_date_sk,ss_store_sk,ss_sales_price] + Exchange [d_qoy,i_manufact_id] #2 + WholeStageCodegen + HashAggregate [d_qoy,i_manufact_id,ss_sales_price,sum,sum] [sum,sum] + Project [d_qoy,i_manufact_id,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_qoy,i_manufact_id,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manufact_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_manufact_id] - Filter [i_category,i_class,i_brand,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id] + Filter [i_brand,i_category,i_class,i_item_sk] + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manufact_id] [i_brand,i_category,i_class,i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + WholeStageCodegen + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk,d_qoy] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] [d_date_sk,d_month_seq,d_qoy] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk] + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt index 2e93e2605..b7f76879c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt @@ -1,459 +1,88 @@ == Physical Plan == -TakeOrderedAndProject (64) -+- * HashAggregate (63) - +- Exchange (62) - +- * HashAggregate (61) - +- * HashAggregate (60) - +- Exchange (59) - +- * HashAggregate (58) - +- * Project (57) - +- * BroadcastHashJoin Inner BuildRight (56) - :- * Project (50) - : +- * BroadcastHashJoin Inner BuildRight (49) - : :- * Project (44) - : : +- * BroadcastHashJoin Inner BuildRight (43) - : : :- * Project (38) - : : : +- * BroadcastHashJoin Inner BuildRight (37) - : : : :- * HashAggregate (32) - : : : : +- Exchange (31) - : : : : +- * HashAggregate (30) - : : : : +- * Project (29) - : : : : +- * BroadcastHashJoin Inner BuildRight (28) - : : : : :- * Project (23) - : : : : : +- * BroadcastHashJoin Inner BuildRight (22) - : : : : : :- * Project (16) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : : : :- Union (9) - : : : : : : : :- * Project (4) - : : : : : : : : +- * Filter (3) - : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : : : +- * Project (8) - : : : : : : : +- * Filter (7) - : : : : : : : +- * ColumnarToRow (6) - : : : : : : : +- Scan parquet default.web_sales (5) - : : : : : : +- BroadcastExchange (14) - : : : : : : +- * Project (13) - : : : : : : +- * Filter (12) - : : : : : : +- * ColumnarToRow (11) - : : : : : : +- Scan parquet default.item (10) - : : : : : +- BroadcastExchange (21) - : : : : : +- * Project (20) - : : : : : +- * Filter (19) - : : : : : +- * ColumnarToRow (18) - : : : : : +- Scan parquet default.date_dim (17) - : : : : +- BroadcastExchange (27) - : : : : +- * Filter (26) - : : : : +- * ColumnarToRow (25) - : : : : +- Scan parquet default.customer (24) - : : : +- BroadcastExchange (36) - : : : +- * Filter (35) - : : : +- * ColumnarToRow (34) - : : : +- Scan parquet default.store_sales (33) - : : +- BroadcastExchange (42) - : : +- * Filter (41) - : : +- * ColumnarToRow (40) - : : +- Scan parquet default.customer_address (39) - : +- BroadcastExchange (48) - : +- * Filter (47) - : +- * ColumnarToRow (46) - : +- Scan parquet default.store (45) - +- BroadcastExchange (55) - +- * Project (54) - +- * Filter (53) - +- * ColumnarToRow (52) - +- Scan parquet default.date_dim (51) - - -(1) Scan parquet default.catalog_sales -Output [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 1] -Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] - -(3) Filter [codegen id : 1] -Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] -Condition : ((isnotnull(cs_item_sk#3) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_bill_customer_sk#2)) - -(4) Project [codegen id : 1] -Output [3]: [cs_sold_date_sk#1 AS sold_date_sk#4, cs_bill_customer_sk#2 AS customer_sk#5, cs_item_sk#3 AS item_sk#6] -Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] - -(5) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 2] -Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] - -(7) Filter [codegen id : 2] -Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] -Condition : ((isnotnull(ws_item_sk#8) AND isnotnull(ws_sold_date_sk#7)) AND isnotnull(ws_bill_customer_sk#9)) - -(8) Project [codegen id : 2] -Output [3]: [ws_sold_date_sk#7 AS sold_date_sk#10, ws_bill_customer_sk#9 AS customer_sk#11, ws_item_sk#8 AS item_sk#12] -Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] - -(9) Union - -(10) Scan parquet default.item -Output [3]: [i_item_sk#13, i_class#14, i_category#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity), IsNotNull(i_item_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 3] -Input [3]: [i_item_sk#13, i_class#14, i_category#15] - -(12) Filter [codegen id : 3] -Input [3]: [i_item_sk#13, i_class#14, i_category#15] -Condition : ((((isnotnull(i_category#15) AND isnotnull(i_class#14)) AND (i_category#15 = Women)) AND (i_class#14 = maternity)) AND isnotnull(i_item_sk#13)) - -(13) Project [codegen id : 3] -Output [1]: [i_item_sk#13] -Input [3]: [i_item_sk#13, i_class#14, i_category#15] - -(14) BroadcastExchange -Input [1]: [i_item_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(15) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [item_sk#6] -Right keys [1]: [i_item_sk#13] -Join condition: None - -(16) Project [codegen id : 6] -Output [2]: [sold_date_sk#4, customer_sk#5] -Input [4]: [sold_date_sk#4, customer_sk#5, item_sk#6, i_item_sk#13] - -(17) Scan parquet default.date_dim -Output [3]: [d_date_sk#17, d_year#18, d_moy#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 4] -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] - -(19) Filter [codegen id : 4] -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] -Condition : ((((isnotnull(d_moy#19) AND isnotnull(d_year#18)) AND (d_moy#19 = 12)) AND (d_year#18 = 1998)) AND isnotnull(d_date_sk#17)) - -(20) Project [codegen id : 4] -Output [1]: [d_date_sk#17] -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] - -(21) BroadcastExchange -Input [1]: [d_date_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] - -(22) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sold_date_sk#4] -Right keys [1]: [d_date_sk#17] -Join condition: None - -(23) Project [codegen id : 6] -Output [1]: [customer_sk#5] -Input [3]: [sold_date_sk#4, customer_sk#5, d_date_sk#17] - -(24) Scan parquet default.customer -Output [2]: [c_customer_sk#21, c_current_addr_sk#22] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(25) ColumnarToRow [codegen id : 5] -Input [2]: [c_customer_sk#21, c_current_addr_sk#22] - -(26) Filter [codegen id : 5] -Input [2]: [c_customer_sk#21, c_current_addr_sk#22] -Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) - -(27) BroadcastExchange -Input [2]: [c_customer_sk#21, c_current_addr_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] - -(28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [customer_sk#5] -Right keys [1]: [c_customer_sk#21] -Join condition: None - -(29) Project [codegen id : 6] -Output [2]: [c_customer_sk#21, c_current_addr_sk#22] -Input [3]: [customer_sk#5, c_customer_sk#21, c_current_addr_sk#22] - -(30) HashAggregate [codegen id : 6] -Input [2]: [c_customer_sk#21, c_current_addr_sk#22] -Keys [2]: [c_customer_sk#21, c_current_addr_sk#22] -Functions: [] -Aggregate Attributes: [] -Results [2]: [c_customer_sk#21, c_current_addr_sk#22] - -(31) Exchange -Input [2]: [c_customer_sk#21, c_current_addr_sk#22] -Arguments: hashpartitioning(c_customer_sk#21, c_current_addr_sk#22, 5), true, [id=#24] - -(32) HashAggregate [codegen id : 11] -Input [2]: [c_customer_sk#21, c_current_addr_sk#22] -Keys [2]: [c_customer_sk#21, c_current_addr_sk#22] -Functions: [] -Aggregate Attributes: [] -Results [2]: [c_customer_sk#21, c_current_addr_sk#22] - -(33) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(34) ColumnarToRow [codegen id : 7] -Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] - -(35) Filter [codegen id : 7] -Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] -Condition : (isnotnull(ss_customer_sk#26) AND isnotnull(ss_sold_date_sk#25)) - -(36) BroadcastExchange -Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#28] - -(37) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [c_customer_sk#21] -Right keys [1]: [ss_customer_sk#26] -Join condition: None - -(38) Project [codegen id : 11] -Output [4]: [c_customer_sk#21, c_current_addr_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27] -Input [5]: [c_customer_sk#21, c_current_addr_sk#22, ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] - -(39) Scan parquet default.customer_address -Output [3]: [ca_address_sk#29, ca_county#30, ca_state#31] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)] -ReadSchema: struct - -(40) ColumnarToRow [codegen id : 8] -Input [3]: [ca_address_sk#29, ca_county#30, ca_state#31] - -(41) Filter [codegen id : 8] -Input [3]: [ca_address_sk#29, ca_county#30, ca_state#31] -Condition : ((isnotnull(ca_address_sk#29) AND isnotnull(ca_county#30)) AND isnotnull(ca_state#31)) - -(42) BroadcastExchange -Input [3]: [ca_address_sk#29, ca_county#30, ca_state#31] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] - -(43) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [c_current_addr_sk#22] -Right keys [1]: [ca_address_sk#29] -Join condition: None - -(44) Project [codegen id : 11] -Output [5]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#30, ca_state#31] -Input [7]: [c_customer_sk#21, c_current_addr_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_address_sk#29, ca_county#30, ca_state#31] - -(45) Scan parquet default.store -Output [2]: [s_county#33, s_state#34] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)] -ReadSchema: struct - -(46) ColumnarToRow [codegen id : 9] -Input [2]: [s_county#33, s_state#34] - -(47) Filter [codegen id : 9] -Input [2]: [s_county#33, s_state#34] -Condition : (isnotnull(s_county#33) AND isnotnull(s_state#34)) - -(48) BroadcastExchange -Input [2]: [s_county#33, s_state#34] -Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#35] - -(49) BroadcastHashJoin [codegen id : 11] -Left keys [2]: [ca_county#30, ca_state#31] -Right keys [2]: [s_county#33, s_state#34] -Join condition: None - -(50) Project [codegen id : 11] -Output [3]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27] -Input [7]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#30, ca_state#31, s_county#33, s_state#34] - -(51) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_month_seq#36] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] -ReadSchema: struct - -(52) ColumnarToRow [codegen id : 10] -Input [2]: [d_date_sk#17, d_month_seq#36] - -(53) Filter [codegen id : 10] -Input [2]: [d_date_sk#17, d_month_seq#36] -Condition : (((isnotnull(d_month_seq#36) AND (d_month_seq#36 >= Subquery scalar-subquery#37, [id=#38])) AND (d_month_seq#36 <= Subquery scalar-subquery#39, [id=#40])) AND isnotnull(d_date_sk#17)) - -(54) Project [codegen id : 10] -Output [1]: [d_date_sk#17] -Input [2]: [d_date_sk#17, d_month_seq#36] - -(55) BroadcastExchange -Input [1]: [d_date_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#41] - -(56) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_sold_date_sk#25] -Right keys [1]: [d_date_sk#17] -Join condition: None - -(57) Project [codegen id : 11] -Output [2]: [c_customer_sk#21, ss_ext_sales_price#27] -Input [4]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27, d_date_sk#17] - -(58) HashAggregate [codegen id : 11] -Input [2]: [c_customer_sk#21, ss_ext_sales_price#27] -Keys [1]: [c_customer_sk#21] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#27))] -Aggregate Attributes [1]: [sum#42] -Results [2]: [c_customer_sk#21, sum#43] - -(59) Exchange -Input [2]: [c_customer_sk#21, sum#43] -Arguments: hashpartitioning(c_customer_sk#21, 5), true, [id=#44] - -(60) HashAggregate [codegen id : 12] -Input [2]: [c_customer_sk#21, sum#43] -Keys [1]: [c_customer_sk#21] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#27))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#27))#45] -Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#27))#45,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#46] - -(61) HashAggregate [codegen id : 12] -Input [1]: [segment#46] -Keys [1]: [segment#46] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#47] -Results [2]: [segment#46, count#48] - -(62) Exchange -Input [2]: [segment#46, count#48] -Arguments: hashpartitioning(segment#46, 5), true, [id=#49] - -(63) HashAggregate [codegen id : 13] -Input [2]: [segment#46, count#48] -Keys [1]: [segment#46] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#50] -Results [3]: [segment#46, count(1)#50 AS num_customers#51, (segment#46 * 50) AS segment_base#52] - -(64) TakeOrderedAndProject -Input [3]: [segment#46, num_customers#51, segment_base#52] -Arguments: 100, [segment#46 ASC NULLS FIRST, num_customers#51 ASC NULLS FIRST], [segment#46, num_customers#51, segment_base#52] - -===== Subqueries ===== - -Subquery:1 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#37, [id=#38] -* HashAggregate (71) -+- Exchange (70) - +- * HashAggregate (69) - +- * Project (68) - +- * Filter (67) - +- * ColumnarToRow (66) - +- Scan parquet default.date_dim (65) - - -(65) Scan parquet default.date_dim -Output [3]: [d_month_seq#36, d_year#18, d_moy#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] -ReadSchema: struct - -(66) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#36, d_year#18, d_moy#19] - -(67) Filter [codegen id : 1] -Input [3]: [d_month_seq#36, d_year#18, d_moy#19] -Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) - -(68) Project [codegen id : 1] -Output [1]: [(d_month_seq#36 + 1) AS (d_month_seq + 1)#53] -Input [3]: [d_month_seq#36, d_year#18, d_moy#19] - -(69) HashAggregate [codegen id : 1] -Input [1]: [(d_month_seq + 1)#53] -Keys [1]: [(d_month_seq + 1)#53] -Functions: [] -Aggregate Attributes: [] -Results [1]: [(d_month_seq + 1)#53] - -(70) Exchange -Input [1]: [(d_month_seq + 1)#53] -Arguments: hashpartitioning((d_month_seq + 1)#53, 5), true, [id=#54] - -(71) HashAggregate [codegen id : 2] -Input [1]: [(d_month_seq + 1)#53] -Keys [1]: [(d_month_seq + 1)#53] -Functions: [] -Aggregate Attributes: [] -Results [1]: [(d_month_seq + 1)#53] - -Subquery:2 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#39, [id=#40] -* HashAggregate (78) -+- Exchange (77) - +- * HashAggregate (76) - +- * Project (75) - +- * Filter (74) - +- * ColumnarToRow (73) - +- Scan parquet default.date_dim (72) - - -(72) Scan parquet default.date_dim -Output [3]: [d_month_seq#36, d_year#18, d_moy#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] -ReadSchema: struct - -(73) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#36, d_year#18, d_moy#19] - -(74) Filter [codegen id : 1] -Input [3]: [d_month_seq#36, d_year#18, d_moy#19] -Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) - -(75) Project [codegen id : 1] -Output [1]: [(d_month_seq#36 + 3) AS (d_month_seq + 3)#55] -Input [3]: [d_month_seq#36, d_year#18, d_moy#19] - -(76) HashAggregate [codegen id : 1] -Input [1]: [(d_month_seq + 3)#55] -Keys [1]: [(d_month_seq + 3)#55] -Functions: [] -Aggregate Attributes: [] -Results [1]: [(d_month_seq + 3)#55] - -(77) Exchange -Input [1]: [(d_month_seq + 3)#55] -Arguments: hashpartitioning((d_month_seq + 3)#55, 5), true, [id=#56] - -(78) HashAggregate [codegen id : 2] -Input [1]: [(d_month_seq + 3)#55] -Keys [1]: [(d_month_seq + 3)#55] -Functions: [] -Aggregate Attributes: [] -Results [1]: [(d_month_seq + 3)#55] - - +TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customers#2 ASC NULLS FIRST], output=[segment#1,num_customers#2,segment_base#3]) ++- *(13) HashAggregate(keys=[segment#1], functions=[count(1)]) + +- Exchange hashpartitioning(segment#1, 5) + +- *(12) HashAggregate(keys=[segment#1], functions=[partial_count(1)]) + +- *(12) HashAggregate(keys=[c_customer_sk#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) + +- Exchange hashpartitioning(c_customer_sk#4, 5) + +- *(11) HashAggregate(keys=[c_customer_sk#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) + +- *(11) Project [c_customer_sk#4, ss_ext_sales_price#5] + +- *(11) BroadcastHashJoin [ss_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight + :- *(11) Project [c_customer_sk#4, ss_sold_date_sk#6, ss_ext_sales_price#5] + : +- *(11) BroadcastHashJoin [ca_county#8, ca_state#9], [s_county#10, s_state#11], Inner, BuildRight + : :- *(11) Project [c_customer_sk#4, ss_sold_date_sk#6, ss_ext_sales_price#5, ca_county#8, ca_state#9] + : : +- *(11) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight + : : :- *(11) Project [c_customer_sk#4, c_current_addr_sk#12, ss_sold_date_sk#6, ss_ext_sales_price#5] + : : : +- *(11) BroadcastHashJoin [c_customer_sk#4], [ss_customer_sk#14], Inner, BuildRight + : : : :- *(11) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) + : : : : +- Exchange hashpartitioning(c_customer_sk#4, c_current_addr_sk#12, 5) + : : : : +- *(6) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) + : : : : +- *(6) Project [c_customer_sk#4, c_current_addr_sk#12] + : : : : +- *(6) BroadcastHashJoin [customer_sk#15], [c_customer_sk#4], Inner, BuildRight + : : : : :- *(6) Project [customer_sk#15] + : : : : : +- *(6) BroadcastHashJoin [sold_date_sk#16], [d_date_sk#7], Inner, BuildRight + : : : : : :- *(6) Project [sold_date_sk#16, customer_sk#15] + : : : : : : +- *(6) BroadcastHashJoin [item_sk#17], [i_item_sk#18], Inner, BuildRight + : : : : : : :- Union + : : : : : : : :- *(1) Project [cs_sold_date_sk#19 AS sold_date_sk#16, cs_bill_customer_sk#20 AS customer_sk#15, cs_item_sk#21 AS item_sk#17] + : : : : : : : : +- *(1) Filter ((isnotnull(cs_item_sk#21) && isnotnull(cs_sold_date_sk#19)) && isnotnull(cs_bill_customer_sk#20)) + : : : : : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_customer_sk#20,cs_item_sk#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : : : : : : +- *(2) Project [ws_sold_date_sk#22 AS sold_date_sk#23, ws_bill_customer_sk#24 AS customer_sk#25, ws_item_sk#26 AS item_sk#27] + : : : : : : : +- *(2) Filter ((isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#22)) && isnotnull(ws_bill_customer_sk#24)) + : : : : : : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#22,ws_item_sk#26,ws_bill_customer_sk#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(3) Project [i_item_sk#18] + : : : : : : +- *(3) Filter ((((isnotnull(i_category#28) && isnotnull(i_class#29)) && (i_category#28 = Women)) && (i_class#29 = maternity)) && isnotnull(i_item_sk#18)) + : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#18,i_class#29,i_category#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity)..., ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(4) Project [d_date_sk#7] + : : : : : +- *(4) Filter ((((isnotnull(d_moy#30) && isnotnull(d_year#31)) && (d_moy#30 = 12)) && (d_year#31 = 1998)) && isnotnull(d_date_sk#7)) + : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#7,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(5) Project [c_customer_sk#4, c_current_addr_sk#12] + : : : : +- *(5) Filter (isnotnull(c_customer_sk#4) && isnotnull(c_current_addr_sk#12)) + : : : : +- *(5) FileScan parquet default.customer[c_customer_sk#4,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : +- *(7) Project [ss_sold_date_sk#6, ss_customer_sk#14, ss_ext_sales_price#5] + : : : +- *(7) Filter (isnotnull(ss_customer_sk#14) && isnotnull(ss_sold_date_sk#6)) + : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#6,ss_customer_sk#14,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(8) Project [ca_address_sk#13, ca_county#8, ca_state#9] + : : +- *(8) Filter ((isnotnull(ca_address_sk#13) && isnotnull(ca_state#9)) && isnotnull(ca_county#8)) + : : +- *(8) FileScan parquet default.customer_address[ca_address_sk#13,ca_county#8,ca_state#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state), IsNotNull(ca_county)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true])) + : +- *(9) Project [s_county#10, s_state#11] + : +- *(9) Filter (isnotnull(s_state#11) && isnotnull(s_county#10)) + : +- *(9) FileScan parquet default.store[s_county#10,s_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(10) Project [d_date_sk#7] + +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery9801)) && (d_month_seq#32 <= Subquery subquery9802)) && isnotnull(d_date_sk#7)) + : :- Subquery subquery9801 + : : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) + : : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] + : : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + : : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + : +- Subquery subquery9802 + : +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + : +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) + : +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + : +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] + : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + +- *(10) FileScan parquet default.date_dim[d_date_sk#7,d_month_seq#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct + :- Subquery subquery9801 + : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) + : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) + : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] + : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + +- Subquery subquery9802 + +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) + +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) + +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] + +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) + +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt index dd92c52db..4abf2165f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt @@ -1,121 +1,123 @@ -TakeOrderedAndProject [segment,num_customers,segment_base] - WholeStageCodegen (13) - HashAggregate [segment,count] [count(1),num_customers,segment_base,count] +TakeOrderedAndProject [num_customers,segment,segment_base] + WholeStageCodegen + HashAggregate [count,count(1),segment] [count,count(1),num_customers,segment_base] InputAdapter Exchange [segment] #1 - WholeStageCodegen (12) - HashAggregate [segment] [count,count] - HashAggregate [c_customer_sk,sum] [sum(UnscaledValue(ss_ext_sales_price)),segment,sum] + WholeStageCodegen + HashAggregate [count,count,segment] [count,count] + HashAggregate [c_customer_sk,sum,sum(UnscaledValue(ss_ext_sales_price))] [segment,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [c_customer_sk] #2 - WholeStageCodegen (11) - HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum] + WholeStageCodegen + HashAggregate [c_customer_sk,ss_ext_sales_price,sum,sum] [sum,sum] Project [c_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] BroadcastHashJoin [ca_county,ca_state,s_county,s_state] - Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ca_county,ca_state] + Project [c_customer_sk,ca_county,ca_state,ss_ext_sales_price,ss_sold_date_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_customer_sk,c_current_addr_sk,ss_sold_date_sk,ss_ext_sales_price] + Project [c_current_addr_sk,c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - HashAggregate [c_customer_sk,c_current_addr_sk] + HashAggregate [c_current_addr_sk,c_customer_sk] InputAdapter - Exchange [c_customer_sk,c_current_addr_sk] #3 - WholeStageCodegen (6) - HashAggregate [c_customer_sk,c_current_addr_sk] - Project [c_customer_sk,c_current_addr_sk] - BroadcastHashJoin [customer_sk,c_customer_sk] + Exchange [c_current_addr_sk,c_customer_sk] #3 + WholeStageCodegen + HashAggregate [c_current_addr_sk,c_customer_sk] + Project [c_current_addr_sk,c_customer_sk] + BroadcastHashJoin [c_customer_sk,customer_sk] Project [customer_sk] - BroadcastHashJoin [sold_date_sk,d_date_sk] - Project [sold_date_sk,customer_sk] - BroadcastHashJoin [item_sk,i_item_sk] + BroadcastHashJoin [d_date_sk,sold_date_sk] + Project [customer_sk,sold_date_sk] + BroadcastHashJoin [i_item_sk,item_sk] InputAdapter Union - WholeStageCodegen (1) - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - Filter [cs_item_sk,cs_sold_date_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - WholeStageCodegen (2) - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] - Filter [ws_item_sk,ws_sold_date_sk,ws_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] + WholeStageCodegen + Project [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + WholeStageCodegen + Project [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [i_item_sk] Filter [i_category,i_class,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_class,i_category] + Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #6 - WholeStageCodegen (5) - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + WholeStageCodegen + Project [c_current_addr_sk,c_customer_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] InputAdapter BroadcastExchange #7 - WholeStageCodegen (7) - Filter [ss_customer_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] + WholeStageCodegen + Project [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #8 - WholeStageCodegen (8) - Filter [ca_address_sk,ca_county,ca_state] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] + WholeStageCodegen + Project [ca_address_sk,ca_county,ca_state] + Filter [ca_address_sk,ca_county,ca_state] + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] [ca_address_sk,ca_county,ca_state] InputAdapter BroadcastExchange #9 - WholeStageCodegen (9) - Filter [s_county,s_state] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_county,s_state] + WholeStageCodegen + Project [s_county,s_state] + Filter [s_county,s_state] + Scan parquet default.store [s_county,s_state] [s_county,s_state] InputAdapter BroadcastExchange #10 - WholeStageCodegen (10) + WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Subquery #1 - WholeStageCodegen (2) + WholeStageCodegen HashAggregate [(d_month_seq + 1)] InputAdapter Exchange [(d_month_seq + 1)] #11 - WholeStageCodegen (1) + WholeStageCodegen HashAggregate [(d_month_seq + 1)] Project [d_month_seq] - Filter [d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] Subquery #2 - WholeStageCodegen (2) + WholeStageCodegen HashAggregate [(d_month_seq + 3)] InputAdapter Exchange [(d_month_seq + 3)] #12 - WholeStageCodegen (1) + WholeStageCodegen HashAggregate [(d_month_seq + 3)] Project [d_month_seq] - Filter [d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen + HashAggregate [(d_month_seq + 1)] + InputAdapter + Exchange [(d_month_seq + 1)] #11 + WholeStageCodegen + HashAggregate [(d_month_seq + 1)] + Project [d_month_seq] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] + Subquery #2 + WholeStageCodegen + HashAggregate [(d_month_seq + 3)] + InputAdapter + Exchange [(d_month_seq + 3)] #12 + WholeStageCodegen + HashAggregate [(d_month_seq + 3)] + Project [d_month_seq] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt index 99c79d204..32402fb5f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt @@ -1,122 +1,20 @@ == Physical Plan == -TakeOrderedAndProject (21) -+- * HashAggregate (20) - +- Exchange (19) - +- * HashAggregate (18) - +- * Project (17) - +- * BroadcastHashJoin Inner BuildRight (16) - :- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Project (4) - : : +- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.date_dim (1) - : +- BroadcastExchange (8) - : +- * Filter (7) - : +- * ColumnarToRow (6) - : +- Scan parquet default.store_sales (5) - +- BroadcastExchange (15) - +- * Project (14) - +- * Filter (13) - +- * ColumnarToRow (12) - +- Scan parquet default.item (11) - - -(1) Scan parquet default.date_dim -Output [3]: [d_date_sk#1, d_year#2, d_moy#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] - -(3) Filter [codegen id : 3] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] -Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND isnotnull(d_date_sk#1)) - -(4) Project [codegen id : 3] -Output [1]: [d_date_sk#1] -Input [3]: [d_date_sk#1, d_year#2, d_moy#3] - -(5) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] - -(7) Filter [codegen id : 1] -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) - -(8) BroadcastExchange -Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#4] -Join condition: None - -(10) Project [codegen id : 3] -Output [2]: [ss_item_sk#5, ss_ext_sales_price#6] -Input [4]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] - -(11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] - -(13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] -Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 28)) AND isnotnull(i_item_sk#8)) - -(14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] - -(15) BroadcastExchange -Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#5] -Right keys [1]: [i_item_sk#8] -Join condition: None - -(17) Project [codegen id : 3] -Output [3]: [ss_ext_sales_price#6, i_brand_id#9, i_brand#10] -Input [5]: [ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_brand_id#9, i_brand#10] - -(18) HashAggregate [codegen id : 3] -Input [3]: [ss_ext_sales_price#6, i_brand_id#9, i_brand#10] -Keys [2]: [i_brand#10, i_brand_id#9] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum#13] -Results [3]: [i_brand#10, i_brand_id#9, sum#14] - -(19) Exchange -Input [3]: [i_brand#10, i_brand_id#9, sum#14] -Arguments: hashpartitioning(i_brand#10, i_brand_id#9, 5), true, [id=#15] - -(20) HashAggregate [codegen id : 4] -Input [3]: [i_brand#10, i_brand_id#9, sum#14] -Keys [2]: [i_brand#10, i_brand_id#9] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] -Results [3]: [i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS ext_price#19] - -(21) TakeOrderedAndProject -Input [3]: [brand_id#17, brand#18, ext_price#19] -Arguments: 100, [ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [brand_id#17, brand#18, ext_price#19] - +TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand_id#2 ASC NULLS FIRST], output=[brand_id#2,brand#3,ext_price#1]) ++- *(4) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[sum(UnscaledValue(ss_ext_sales_price#6))]) + +- Exchange hashpartitioning(i_brand#4, i_brand_id#5, 5) + +- *(3) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#6))]) + +- *(3) Project [ss_ext_sales_price#6, i_brand_id#5, i_brand#4] + +- *(3) BroadcastHashJoin [ss_item_sk#7], [i_item_sk#8], Inner, BuildRight + :- *(3) Project [ss_item_sk#7, ss_ext_sales_price#6] + : +- *(3) BroadcastHashJoin [d_date_sk#9], [ss_sold_date_sk#10], Inner, BuildRight + : :- *(3) Project [d_date_sk#9] + : : +- *(3) Filter ((((isnotnull(d_moy#11) && isnotnull(d_year#12)) && (d_moy#11 = 11)) && (d_year#12 = 1999)) && isnotnull(d_date_sk#9)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#9,d_year#12,d_moy#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [ss_sold_date_sk#10, ss_item_sk#7, ss_ext_sales_price#6] + : +- *(1) Filter (isnotnull(ss_sold_date_sk#10) && isnotnull(ss_item_sk#7)) + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#10,ss_item_sk#7,ss_ext_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#8, i_brand_id#5, i_brand#4] + +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 28)) && isnotnull(i_item_sk#8)) + +- *(2) FileScan parquet default.item[i_item_sk#8,i_brand_id#5,i_brand#4,i_manager_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt index bb2ef8e82..478e4a54b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt @@ -1,31 +1,26 @@ -TakeOrderedAndProject [ext_price,brand_id,brand] - WholeStageCodegen (4) - HashAggregate [i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] +TakeOrderedAndProject [brand,brand_id,ext_price] + WholeStageCodegen + HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [i_brand,i_brand_id] #1 - WholeStageCodegen (3) - HashAggregate [i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price,i_brand_id,i_brand] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] + WholeStageCodegen + HashAggregate [i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [i_brand,i_brand_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [ss_sold_date_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + WholeStageCodegen + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Project [i_item_sk,i_brand_id,i_brand] - Filter [i_manager_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] + WholeStageCodegen + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt index d0d647211..8d1ef0a64 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt @@ -1,378 +1,65 @@ == Physical Plan == -TakeOrderedAndProject (67) -+- * HashAggregate (66) - +- Exchange (65) - +- * HashAggregate (64) - +- Union (63) - :- * HashAggregate (32) - : +- Exchange (31) - : +- * HashAggregate (30) - : +- * Project (29) - : +- * BroadcastHashJoin Inner BuildRight (28) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.customer_address (11) - : +- BroadcastExchange (27) - : +- * BroadcastHashJoin LeftSemi BuildRight (26) - : :- * Filter (20) - : : +- * ColumnarToRow (19) - : : +- Scan parquet default.item (18) - : +- BroadcastExchange (25) - : +- * Project (24) - : +- * Filter (23) - : +- * ColumnarToRow (22) - : +- Scan parquet default.item (21) - :- * HashAggregate (47) - : +- Exchange (46) - : +- * HashAggregate (45) - : +- * Project (44) - : +- * BroadcastHashJoin Inner BuildRight (43) - : :- * Project (41) - : : +- * BroadcastHashJoin Inner BuildRight (40) - : : :- * Project (38) - : : : +- * BroadcastHashJoin Inner BuildRight (37) - : : : :- * Filter (35) - : : : : +- * ColumnarToRow (34) - : : : : +- Scan parquet default.catalog_sales (33) - : : : +- ReusedExchange (36) - : : +- ReusedExchange (39) - : +- ReusedExchange (42) - +- * HashAggregate (62) - +- Exchange (61) - +- * HashAggregate (60) - +- * Project (59) - +- * BroadcastHashJoin Inner BuildRight (58) - :- * Project (56) - : +- * BroadcastHashJoin Inner BuildRight (55) - : :- * Project (53) - : : +- * BroadcastHashJoin Inner BuildRight (52) - : : :- * Filter (50) - : : : +- * ColumnarToRow (49) - : : : +- Scan parquet default.web_sales (48) - : : +- ReusedExchange (51) - : +- ReusedExchange (54) - +- ReusedExchange (57) - - -(1) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] - -(3) Filter [codegen id : 5] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] -Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#5, d_year#6, d_moy#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#6, d_moy#7] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#6, d_moy#7] -Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 2001)) AND (d_moy#7 = 2)) AND isnotnull(d_date_sk#5)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#5] -Input [3]: [d_date_sk#5, d_year#6, d_moy#7] - -(8) BroadcastExchange -Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] - -(9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(10) Project [codegen id : 5] -Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] - -(11) Scan parquet default.customer_address -Output [2]: [ca_address_sk#9, ca_gmt_offset#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [ca_address_sk#9, ca_gmt_offset#10] - -(13) Filter [codegen id : 2] -Input [2]: [ca_address_sk#9, ca_gmt_offset#10] -Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) - -(14) Project [codegen id : 2] -Output [1]: [ca_address_sk#9] -Input [2]: [ca_address_sk#9, ca_gmt_offset#10] - -(15) BroadcastExchange -Input [1]: [ca_address_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] - -(16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#9] -Join condition: None - -(17) Project [codegen id : 5] -Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] -Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] - -(18) Scan parquet default.item -Output [2]: [i_item_sk#12, i_item_id#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#12, i_item_id#13] - -(20) Filter [codegen id : 4] -Input [2]: [i_item_sk#12, i_item_id#13] -Condition : isnotnull(i_item_sk#12) - -(21) Scan parquet default.item -Output [2]: [i_item_id#13, i_color#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [In(i_color, [slate,blanched,burnished])] -ReadSchema: struct - -(22) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_id#13, i_color#14] - -(23) Filter [codegen id : 3] -Input [2]: [i_item_id#13, i_color#14] -Condition : i_color#14 IN (slate,blanched,burnished) - -(24) Project [codegen id : 3] -Output [1]: [i_item_id#13 AS i_item_id#13#15] -Input [2]: [i_item_id#13, i_color#14] - -(25) BroadcastExchange -Input [1]: [i_item_id#13#15] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#16] - -(26) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_id#13] -Right keys [1]: [i_item_id#13#15] -Join condition: None - -(27) BroadcastExchange -Input [2]: [i_item_sk#12, i_item_id#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] - -(28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#12] -Join condition: None - -(29) Project [codegen id : 5] -Output [2]: [ss_ext_sales_price#4, i_item_id#13] -Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_item_id#13] - -(30) HashAggregate [codegen id : 5] -Input [2]: [ss_ext_sales_price#4, i_item_id#13] -Keys [1]: [i_item_id#13] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [1]: [sum#18] -Results [2]: [i_item_id#13, sum#19] - -(31) Exchange -Input [2]: [i_item_id#13, sum#19] -Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#20] - -(32) HashAggregate [codegen id : 6] -Input [2]: [i_item_id#13, sum#19] -Keys [1]: [i_item_id#13] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] -Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] - -(33) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] -ReadSchema: struct - -(34) ColumnarToRow [codegen id : 11] -Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] - -(35) Filter [codegen id : 11] -Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] -Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) - -(36) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(37) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#23] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(38) Project [codegen id : 11] -Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] -Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] - -(39) ReusedExchange [Reuses operator id: 15] -Output [1]: [ca_address_sk#9] - -(40) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_addr_sk#24] -Right keys [1]: [ca_address_sk#9] -Join condition: None - -(41) Project [codegen id : 11] -Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] -Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] - -(42) ReusedExchange [Reuses operator id: 27] -Output [2]: [i_item_sk#12, i_item_id#13] - -(43) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#25] -Right keys [1]: [i_item_sk#12] -Join condition: None - -(44) Project [codegen id : 11] -Output [2]: [cs_ext_sales_price#26, i_item_id#13] -Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_item_id#13] - -(45) HashAggregate [codegen id : 11] -Input [2]: [cs_ext_sales_price#26, i_item_id#13] -Keys [1]: [i_item_id#13] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] -Aggregate Attributes [1]: [sum#27] -Results [2]: [i_item_id#13, sum#28] - -(46) Exchange -Input [2]: [i_item_id#13, sum#28] -Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#29] - -(47) HashAggregate [codegen id : 12] -Input [2]: [i_item_id#13, sum#28] -Keys [1]: [i_item_id#13] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] -Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] - -(48) Scan parquet default.web_sales -Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] -ReadSchema: struct - -(49) ColumnarToRow [codegen id : 17] -Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] - -(50) Filter [codegen id : 17] -Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] -Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) - -(51) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(52) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#32] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(53) Project [codegen id : 17] -Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] -Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] - -(54) ReusedExchange [Reuses operator id: 15] -Output [1]: [ca_address_sk#9] - -(55) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_bill_addr_sk#34] -Right keys [1]: [ca_address_sk#9] -Join condition: None - -(56) Project [codegen id : 17] -Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] -Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] - -(57) ReusedExchange [Reuses operator id: 27] -Output [2]: [i_item_sk#12, i_item_id#13] - -(58) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#33] -Right keys [1]: [i_item_sk#12] -Join condition: None - -(59) Project [codegen id : 17] -Output [2]: [ws_ext_sales_price#35, i_item_id#13] -Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_item_id#13] - -(60) HashAggregate [codegen id : 17] -Input [2]: [ws_ext_sales_price#35, i_item_id#13] -Keys [1]: [i_item_id#13] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] -Aggregate Attributes [1]: [sum#36] -Results [2]: [i_item_id#13, sum#37] - -(61) Exchange -Input [2]: [i_item_id#13, sum#37] -Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#38] - -(62) HashAggregate [codegen id : 18] -Input [2]: [i_item_id#13, sum#37] -Keys [1]: [i_item_id#13] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] -Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] - -(63) Union - -(64) HashAggregate [codegen id : 19] -Input [2]: [i_item_id#13, total_sales#22] -Keys [1]: [i_item_id#13] -Functions [1]: [partial_sum(total_sales#22)] -Aggregate Attributes [2]: [sum#41, isEmpty#42] -Results [3]: [i_item_id#13, sum#43, isEmpty#44] - -(65) Exchange -Input [3]: [i_item_id#13, sum#43, isEmpty#44] -Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#45] - -(66) HashAggregate [codegen id : 20] -Input [3]: [i_item_id#13, sum#43, isEmpty#44] -Keys [1]: [i_item_id#13] -Functions [1]: [sum(total_sales#22)] -Aggregate Attributes [1]: [sum(total_sales#22)#46] -Results [2]: [i_item_id#13, sum(total_sales#22)#46 AS total_sales#47] - -(67) TakeOrderedAndProject -Input [2]: [i_item_id#13, total_sales#47] -Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_item_id#13, total_sales#47] - +TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_item_id#2,total_sales#1]) ++- *(20) HashAggregate(keys=[i_item_id#2], functions=[sum(total_sales#3)]) + +- Exchange hashpartitioning(i_item_id#2, 5) + +- *(19) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(total_sales#3)]) + +- Union + :- *(6) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange hashpartitioning(i_item_id#2, 5) + : +- *(5) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(5) Project [ss_ext_sales_price#4, i_item_id#2] + : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#10] + : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 2001)) && (d_moy#12 = 2)) && isnotnull(d_date_sk#10)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [ca_address_sk#8] + : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) BroadcastHashJoin [i_item_id#2], [i_item_id#2#14], LeftSemi, BuildRight + : :- *(4) Project [i_item_sk#6, i_item_id#2] + : : +- *(4) Filter isnotnull(i_item_sk#6) + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(3) Project [i_item_id#2 AS i_item_id#2#14] + : +- *(3) Filter i_color#15 IN (slate,blanched,burnished) + : +- *(3) FileScan parquet default.item[i_item_id#2,i_color#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_color, [slate,blanched,burnished])], ReadSchema: struct + :- *(12) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- Exchange hashpartitioning(i_item_id#2, 5) + : +- *(11) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- *(11) Project [cs_ext_sales_price#16, i_item_id#2] + : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight + : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] + : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight + : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(18) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) + +- Exchange hashpartitioning(i_item_id#2, 5) + +- *(17) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) + +- *(17) Project [ws_ext_sales_price#20, i_item_id#2] + +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight + :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] + : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight + : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight + : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt index 5825c6f6e..0433f198d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt @@ -1,101 +1,91 @@ -TakeOrderedAndProject [total_sales,i_item_id] - WholeStageCodegen (20) - HashAggregate [i_item_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] +TakeOrderedAndProject [i_item_id,total_sales] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen (19) - HashAggregate [i_item_id,total_sales] [sum,isEmpty,sum,isEmpty] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum,total_sales] [sum,sum] InputAdapter Union - WholeStageCodegen (6) - HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #2 - WholeStageCodegen (5) - HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + WholeStageCodegen + HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + Filter [ca_address_sk,ca_gmt_offset] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen BroadcastHashJoin [i_item_id,i_item_id] - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen (3) + WholeStageCodegen Project [i_item_id] Filter [i_color] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_id,i_color] - WholeStageCodegen (12) - HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + Scan parquet default.item [i_color,i_item_id] [i_color,i_item_id] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #7 - WholeStageCodegen (11) - HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] + WholeStageCodegen + HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] Project [cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_ext_sales_price] - BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] - Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] #4 + ReusedExchange [ca_address_sk] [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_item_id] #5 - WholeStageCodegen (18) - HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #8 - WholeStageCodegen (17) - HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] - Project [ws_ext_sales_price,i_item_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] #4 + ReusedExchange [ca_address_sk] [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_item_id] #5 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt index 4e1123185..275c06c2d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt @@ -1,278 +1,51 @@ == Physical Plan == -TakeOrderedAndProject (51) -+- * Project (50) - +- * BroadcastHashJoin Inner BuildRight (49) - :- * Project (42) - : +- * BroadcastHashJoin Inner BuildRight (41) - : :- * Filter (32) - : : +- Window (31) - : : +- * Sort (30) - : : +- Exchange (29) - : : +- * Project (28) - : : +- Window (27) - : : +- * Sort (26) - : : +- Exchange (25) - : : +- * HashAggregate (24) - : : +- Exchange (23) - : : +- * HashAggregate (22) - : : +- * Project (21) - : : +- * BroadcastHashJoin Inner BuildRight (20) - : : :- * Project (15) - : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : :- * Project (9) - : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.item (1) - : : : : +- BroadcastExchange (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.catalog_sales (4) - : : : +- BroadcastExchange (13) - : : : +- * Filter (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.date_dim (10) - : : +- BroadcastExchange (19) - : : +- * Filter (18) - : : +- * ColumnarToRow (17) - : : +- Scan parquet default.call_center (16) - : +- BroadcastExchange (40) - : +- * Project (39) - : +- * Filter (38) - : +- Window (37) - : +- * Sort (36) - : +- Exchange (35) - : +- * HashAggregate (34) - : +- ReusedExchange (33) - +- BroadcastExchange (48) - +- * Project (47) - +- * Filter (46) - +- Window (45) - +- * Sort (44) - +- ReusedExchange (43) - - -(1) Scan parquet default.item -Output [3]: [i_item_sk#1, i_brand#2, i_category#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [3]: [i_item_sk#1, i_brand#2, i_category#3] - -(3) Filter [codegen id : 4] -Input [3]: [i_item_sk#1, i_brand#2, i_category#3] -Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) - -(4) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] - -(6) Filter [codegen id : 1] -Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] -Condition : ((isnotnull(cs_item_sk#6) AND isnotnull(cs_sold_date_sk#4)) AND isnotnull(cs_call_center_sk#5)) - -(7) BroadcastExchange -Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#8] - -(8) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_sk#1] -Right keys [1]: [cs_item_sk#6] -Join condition: None - -(9) Project [codegen id : 4] -Output [5]: [i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_sales_price#7] -Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] - -(10) Scan parquet default.date_dim -Output [3]: [d_date_sk#9, d_year#10, d_moy#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] - -(12) Filter [codegen id : 2] -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) - -(13) BroadcastExchange -Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] - -(14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#4] -Right keys [1]: [d_date_sk#9] -Join condition: None - -(15) Project [codegen id : 4] -Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#5, cs_sales_price#7, d_year#10, d_moy#11] -Input [8]: [i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_sales_price#7, d_date_sk#9, d_year#10, d_moy#11] - -(16) Scan parquet default.call_center -Output [2]: [cc_call_center_sk#13, cc_name#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/call_center] -PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [2]: [cc_call_center_sk#13, cc_name#14] - -(18) Filter [codegen id : 3] -Input [2]: [cc_call_center_sk#13, cc_name#14] -Condition : (isnotnull(cc_call_center_sk#13) AND isnotnull(cc_name#14)) - -(19) BroadcastExchange -Input [2]: [cc_call_center_sk#13, cc_name#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] - -(20) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_call_center_sk#5] -Right keys [1]: [cc_call_center_sk#13] -Join condition: None - -(21) Project [codegen id : 4] -Output [6]: [i_brand#2, i_category#3, cs_sales_price#7, d_year#10, d_moy#11, cc_name#14] -Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#5, cs_sales_price#7, d_year#10, d_moy#11, cc_call_center_sk#13, cc_name#14] - -(22) HashAggregate [codegen id : 4] -Input [6]: [i_brand#2, i_category#3, cs_sales_price#7, d_year#10, d_moy#11, cc_name#14] -Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11] -Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#7))] -Aggregate Attributes [1]: [sum#16] -Results [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] - -(23) Exchange -Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] -Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, 5), true, [id=#18] - -(24) HashAggregate [codegen id : 5] -Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] -Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11] -Functions [1]: [sum(UnscaledValue(cs_sales_price#7))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#7))#19] -Results [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#19,17,2) AS sum_sales#20, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#19,17,2) AS _w0#21] - -(25) Exchange -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] -Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, d_year#10, 5), true, [id=#22] - -(26) Sort [codegen id : 6] -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] -Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], false, 0 - -(27) Window -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] -Arguments: [avg(_w0#21) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#23], [i_category#3, i_brand#2, cc_name#14, d_year#10] - -(28) Project [codegen id : 7] -Output [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] -Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21, avg_monthly_sales#23] - -(29) Exchange -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] -Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, 5), true, [id=#24] - -(30) Sort [codegen id : 8] -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] -Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 - -(31) Window -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] -Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#25], [i_category#3, i_brand#2, cc_name#14], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] - -(32) Filter [codegen id : 23] -Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25] -Condition : (((((isnotnull(d_year#10) AND isnotnull(avg_monthly_sales#23)) AND (d_year#10 = 1999)) AND (avg_monthly_sales#23 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#25)) - -(33) ReusedExchange [Reuses operator id: 23] -Output [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31] - -(34) HashAggregate [codegen id : 13] -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31] -Keys [5]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30] -Functions [1]: [sum(UnscaledValue(cs_sales_price#7))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#7))#32] -Results [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#32,17,2) AS sum_sales#33] - -(35) Exchange -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] -Arguments: hashpartitioning(i_category#26, i_brand#27, cc_name#28, 5), true, [id=#34] - -(36) Sort [codegen id : 14] -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] -Arguments: [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0 - -(37) Window -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] -Arguments: [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#26, i_brand#27, cc_name#28, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#26, i_brand#27, cc_name#28], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] - -(38) Filter [codegen id : 15] -Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33, rn#35] -Condition : isnotnull(rn#35) - -(39) Project [codegen id : 15] -Output [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] -Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33, rn#35] - -(40) BroadcastExchange -Input [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] + 1)),false), [id=#36] - -(41) BroadcastHashJoin [codegen id : 23] -Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#25] -Right keys [4]: [i_category#26, i_brand#27, cc_name#28, (rn#35 + 1)] -Join condition: None - -(42) Project [codegen id : 23] -Output [9]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, sum_sales#33] -Input [13]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] - -(43) ReusedExchange [Reuses operator id: 35] -Output [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] - -(44) Sort [codegen id : 21] -Input [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] -Arguments: [i_category#37 ASC NULLS FIRST, i_brand#38 ASC NULLS FIRST, cc_name#39 ASC NULLS FIRST, d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST], false, 0 - -(45) Window -Input [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] -Arguments: [rank(d_year#40, d_moy#41) windowspecdefinition(i_category#37, i_brand#38, cc_name#39, d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#43], [i_category#37, i_brand#38, cc_name#39], [d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST] - -(46) Filter [codegen id : 22] -Input [7]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42, rn#43] -Condition : isnotnull(rn#43) - -(47) Project [codegen id : 22] -Output [5]: [i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] -Input [7]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42, rn#43] - -(48) BroadcastExchange -Input [5]: [i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] - 1)),false), [id=#44] - -(49) BroadcastHashJoin [codegen id : 23] -Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#25] -Right keys [4]: [i_category#37, i_brand#38, cc_name#39, (rn#43 - 1)] -Join condition: None - -(50) Project [codegen id : 23] -Output [9]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, sum_sales#33 AS psum#45, sum_sales#42 AS nsum#46] -Input [14]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, sum_sales#33, i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] - -(51) TakeOrderedAndProject -Input [9]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, psum#45, nsum#46] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, psum#45, nsum#46] - +TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,cc_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,cc_name#3,d_year#6,d_moy#7,avg_monthly_sales#2,sum_sales#1,psum#8,nsum#9]) ++- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, avg_monthly_sales#2, sum_sales#1, sum_sales#10 AS psum#8, sum_sales#11 AS nsum#9] + +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, cc_name#3, rn#12], [i_category#13, i_brand#14, cc_name#15, (rn#16 - 1)], Inner, BuildRight + :- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, sum_sales#1, avg_monthly_sales#2, rn#12, sum_sales#10] + : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, cc_name#3, rn#12], [i_category#17, i_brand#18, cc_name#19, (rn#20 + 1)], Inner, BuildRight + : :- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, sum_sales#1, avg_monthly_sales#2, rn#12] + : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#12)) + : : +- Window [avg(_w0#21) windowspecdefinition(i_category#4, i_brand#5, cc_name#3, d_year#6, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, cc_name#3, d_year#6] + : : +- *(7) Filter (isnotnull(d_year#6) && (d_year#6 = 1999)) + : : +- Window [rank(d_year#6, d_moy#7) windowspecdefinition(i_category#4, i_brand#5, cc_name#3, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#12], [i_category#4, i_brand#5, cc_name#3], [d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST] + : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, cc_name#3 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST], false, 0 + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, 5) + : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[sum(UnscaledValue(cs_sales_price#22))]) + : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 5) + : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[partial_sum(UnscaledValue(cs_sales_price#22))]) + : : +- *(4) Project [i_brand#5, i_category#4, cs_sales_price#22, d_year#6, d_moy#7, cc_name#3] + : : +- *(4) BroadcastHashJoin [cs_call_center_sk#23], [cc_call_center_sk#24], Inner, BuildRight + : : :- *(4) Project [i_brand#5, i_category#4, cs_call_center_sk#23, cs_sales_price#22, d_year#6, d_moy#7] + : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#26], Inner, BuildRight + : : : :- *(4) Project [i_brand#5, i_category#4, cs_sold_date_sk#25, cs_call_center_sk#23, cs_sales_price#22] + : : : : +- *(4) BroadcastHashJoin [i_item_sk#27], [cs_item_sk#28], Inner, BuildRight + : : : : :- *(4) Project [i_item_sk#27, i_brand#5, i_category#4] + : : : : : +- *(4) Filter ((isnotnull(i_item_sk#27) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#27,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) + : : : : +- *(1) Project [cs_sold_date_sk#25, cs_call_center_sk#23, cs_item_sk#28, cs_sales_price#22] + : : : : +- *(1) Filter ((isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#25)) && isnotnull(cs_call_center_sk#23)) + : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_call_center_sk#23,cs_item_sk#28,cs_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [d_date_sk#26, d_year#6, d_moy#7] + : : : +- *(2) Filter ((((d_year#6 = 1999) || ((d_year#6 = 1998) && (d_moy#7 = 12))) || ((d_year#6 = 2000) && (d_moy#7 = 1))) && isnotnull(d_date_sk#26)) + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#26,d_year#6,d_moy#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [cc_call_center_sk#24, cc_name#3] + : : +- *(3) Filter (isnotnull(cc_call_center_sk#24) && isnotnull(cc_name#3)) + : : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#24,cc_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] + 1))) + : +- *(14) Project [i_category#17, i_brand#18, cc_name#19, sum_sales#10, rn#20] + : +- *(14) Filter isnotnull(rn#20) + : +- Window [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#17, i_brand#18, cc_name#19, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#20], [i_category#17, i_brand#18, cc_name#19], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] + : +- *(13) Sort [i_category#17 ASC NULLS FIRST, i_brand#18 ASC NULLS FIRST, cc_name#19 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 5) + : +- *(12) HashAggregate(keys=[i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30], functions=[sum(UnscaledValue(cs_sales_price#22))]) + : +- ReusedExchange [i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30, sum#31], Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 5) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] - 1))) + +- *(21) Project [i_category#13, i_brand#14, cc_name#15, sum_sales#11, rn#16] + +- *(21) Filter isnotnull(rn#16) + +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#13, i_brand#14, cc_name#15, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#16], [i_category#13, i_brand#14, cc_name#15], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] + +- *(20) Sort [i_category#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#15 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 + +- ReusedExchange [i_category#13, i_brand#14, cc_name#15, d_year#32, d_moy#33, sum_sales#11], Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 5) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt index 432441bb7..3d71eb1d9 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt @@ -1,84 +1,77 @@ -TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_year,d_moy,psum,nsum] - WholeStageCodegen (23) - Project [i_category,i_brand,cc_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] - BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] - Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] - BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] - Filter [d_year,avg_monthly_sales,sum_sales,rn] - InputAdapter - Window [d_year,d_moy,i_category,i_brand,cc_name] - WholeStageCodegen (8) - Sort [i_category,i_brand,cc_name,d_year,d_moy] - InputAdapter - Exchange [i_category,i_brand,cc_name] #1 - WholeStageCodegen (7) - Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales] - InputAdapter - Window [_w0,i_category,i_brand,cc_name,d_year] - WholeStageCodegen (6) - Sort [i_category,i_brand,cc_name,d_year] - InputAdapter - Exchange [i_category,i_brand,cc_name,d_year] #2 - WholeStageCodegen (5) - HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] - InputAdapter - Exchange [i_category,i_brand,cc_name,d_year,d_moy] #3 - WholeStageCodegen (4) - HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,cs_sales_price] [sum,sum] - Project [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name] - BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] - Project [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [i_brand,i_category,cs_sold_date_sk,cs_call_center_sk,cs_sales_price] - BroadcastHashJoin [i_item_sk,cs_item_sk] - Filter [i_item_sk,i_category,i_brand] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand,i_category] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (1) - Filter [cs_item_sk,cs_sold_date_sk,cs_call_center_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_item_sk,cs_sales_price] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (2) - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (3) - Filter [cc_call_center_sk,cc_name] - ColumnarToRow - InputAdapter - Scan parquet default.call_center [cc_call_center_sk,cc_name] +TakeOrderedAndProject [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,nsum,psum,sum_sales] + WholeStageCodegen + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales,sum_sales] + BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] + Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales] + Filter [avg_monthly_sales,rn,sum_sales] + InputAdapter + Window [_w0,cc_name,d_year,i_brand,i_category] + WholeStageCodegen + Filter [d_year] + InputAdapter + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen + Sort [cc_name,d_moy,d_year,i_brand,i_category] + InputAdapter + Exchange [cc_name,i_brand,i_category] #1 + WholeStageCodegen + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum,sum(UnscaledValue(cs_sales_price))] [_w0,sum,sum(UnscaledValue(cs_sales_price)),sum_sales] + InputAdapter + Exchange [cc_name,d_moy,d_year,i_brand,i_category] #2 + WholeStageCodegen + HashAggregate [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category,sum,sum] [sum,sum] + Project [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_sales_price,d_moy,d_year,i_brand,i_category] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_call_center_sk,cs_sales_price,cs_sold_date_sk,i_brand,i_category] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Project [i_brand,i_category,i_item_sk] + Filter [i_brand,i_category,i_item_sk] + Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + Filter [cs_call_center_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk,d_moy,d_year] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + Project [cc_call_center_sk,cc_name] + Filter [cc_call_center_sk,cc_name] + Scan parquet default.call_center [cc_call_center_sk,cc_name] [cc_call_center_sk,cc_name] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (15) - Project [i_category,i_brand,cc_name,sum_sales,rn] + BroadcastExchange #6 + WholeStageCodegen + Project [cc_name,i_brand,i_category,rn,sum_sales] Filter [rn] InputAdapter - Window [d_year,d_moy,i_category,i_brand,cc_name] - WholeStageCodegen (14) - Sort [i_category,i_brand,cc_name,d_year,d_moy] + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen + Sort [cc_name,d_moy,d_year,i_brand,i_category] InputAdapter - Exchange [i_category,i_brand,cc_name] #8 - WholeStageCodegen (13) - HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] + Exchange [cc_name,i_brand,i_category] #7 + WholeStageCodegen + HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum,sum(UnscaledValue(cs_sales_price))] [sum,sum(UnscaledValue(cs_sales_price)),sum_sales] InputAdapter - ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #3 + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum] [cc_name,d_moy,d_year,i_brand,i_category,sum] #2 InputAdapter - BroadcastExchange #9 - WholeStageCodegen (22) - Project [i_category,i_brand,cc_name,sum_sales,rn] + BroadcastExchange #8 + WholeStageCodegen + Project [cc_name,i_brand,i_category,rn,sum_sales] Filter [rn] InputAdapter - Window [d_year,d_moy,i_category,i_brand,cc_name] - WholeStageCodegen (21) - Sort [i_category,i_brand,cc_name,d_year,d_moy] + Window [cc_name,d_moy,d_year,i_brand,i_category] + WholeStageCodegen + Sort [cc_name,d_moy,d_year,i_brand,i_category] InputAdapter - ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #8 + ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] #7 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt index 40a6836ae..81c7f9d64 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt @@ -1,477 +1,101 @@ == Physical Plan == -TakeOrderedAndProject (79) -+- * Project (78) - +- * BroadcastHashJoin Inner BuildRight (77) - :- * Project (52) - : +- * BroadcastHashJoin Inner BuildRight (51) - : :- * Filter (26) - : : +- * HashAggregate (25) - : : +- Exchange (24) - : : +- * HashAggregate (23) - : : +- * Project (22) - : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Project (9) - : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.item (4) - : : +- BroadcastExchange (20) - : : +- * Project (19) - : : +- * BroadcastHashJoin LeftSemi BuildRight (18) - : : :- * Filter (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.date_dim (10) - : : +- BroadcastExchange (17) - : : +- * Project (16) - : : +- * Filter (15) - : : +- * ColumnarToRow (14) - : : +- Scan parquet default.date_dim (13) - : +- BroadcastExchange (50) - : +- * Filter (49) - : +- * HashAggregate (48) - : +- Exchange (47) - : +- * HashAggregate (46) - : +- * Project (45) - : +- * BroadcastHashJoin Inner BuildRight (44) - : :- * Project (32) - : : +- * BroadcastHashJoin Inner BuildRight (31) - : : :- * Filter (29) - : : : +- * ColumnarToRow (28) - : : : +- Scan parquet default.catalog_sales (27) - : : +- ReusedExchange (30) - : +- BroadcastExchange (43) - : +- * Project (42) - : +- * BroadcastHashJoin LeftSemi BuildRight (41) - : :- * Filter (35) - : : +- * ColumnarToRow (34) - : : +- Scan parquet default.date_dim (33) - : +- BroadcastExchange (40) - : +- * Project (39) - : +- * Filter (38) - : +- * ColumnarToRow (37) - : +- Scan parquet default.date_dim (36) - +- BroadcastExchange (76) - +- * Filter (75) - +- * HashAggregate (74) - +- Exchange (73) - +- * HashAggregate (72) - +- * Project (71) - +- * BroadcastHashJoin Inner BuildRight (70) - :- * Project (58) - : +- * BroadcastHashJoin Inner BuildRight (57) - : :- * Filter (55) - : : +- * ColumnarToRow (54) - : : +- Scan parquet default.web_sales (53) - : +- ReusedExchange (56) - +- BroadcastExchange (69) - +- * Project (68) - +- * BroadcastHashJoin LeftSemi BuildRight (67) - :- * Filter (61) - : +- * ColumnarToRow (60) - : +- Scan parquet default.date_dim (59) - +- BroadcastExchange (66) - +- * Project (65) - +- * Filter (64) - +- * ColumnarToRow (63) - +- Scan parquet default.date_dim (62) - - -(1) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] - -(3) Filter [codegen id : 4] -Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] -Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) - -(4) Scan parquet default.item -Output [2]: [i_item_sk#4, i_item_id#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [i_item_sk#4, i_item_id#5] - -(6) Filter [codegen id : 1] -Input [2]: [i_item_sk#4, i_item_id#5] -Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) - -(7) BroadcastExchange -Input [2]: [i_item_sk#4, i_item_id#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] - -(8) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#4] -Join condition: None - -(9) Project [codegen id : 4] -Output [3]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_item_id#5] - -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_date#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#7, d_date#8] - -(12) Filter [codegen id : 3] -Input [2]: [d_date_sk#7, d_date#8] -Condition : isnotnull(d_date_sk#7) - -(13) Scan parquet default.date_dim -Output [2]: [d_date#8, d_week_seq#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_week_seq)] -ReadSchema: struct - -(14) ColumnarToRow [codegen id : 2] -Input [2]: [d_date#8, d_week_seq#9] - -(15) Filter [codegen id : 2] -Input [2]: [d_date#8, d_week_seq#9] -Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = Subquery scalar-subquery#10, [id=#11])) - -(16) Project [codegen id : 2] -Output [1]: [d_date#8 AS d_date#8#12] -Input [2]: [d_date#8, d_week_seq#9] - -(17) BroadcastExchange -Input [1]: [d_date#8#12] -Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#13] - -(18) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [d_date#8] -Right keys [1]: [d_date#8#12] -Join condition: None - -(19) Project [codegen id : 3] -Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_date#8] - -(20) BroadcastExchange -Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#7] -Join condition: None - -(22) Project [codegen id : 4] -Output [2]: [ss_ext_sales_price#3, i_item_id#5] -Input [4]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, d_date_sk#7] - -(23) HashAggregate [codegen id : 4] -Input [2]: [ss_ext_sales_price#3, i_item_id#5] -Keys [1]: [i_item_id#5] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#15] -Results [2]: [i_item_id#5, sum#16] - -(24) Exchange -Input [2]: [i_item_id#5, sum#16] -Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#17] - -(25) HashAggregate [codegen id : 15] -Input [2]: [i_item_id#5, sum#16] -Keys [1]: [i_item_id#5] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#18] -Results [2]: [i_item_id#5 AS item_id#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS ss_item_rev#20] - -(26) Filter [codegen id : 15] -Input [2]: [item_id#19, ss_item_rev#20] -Condition : isnotnull(ss_item_rev#20) - -(27) Scan parquet default.catalog_sales -Output [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(28) ColumnarToRow [codegen id : 8] -Input [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] - -(29) Filter [codegen id : 8] -Input [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] -Condition : (isnotnull(cs_item_sk#22) AND isnotnull(cs_sold_date_sk#21)) - -(30) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#4, i_item_id#5] - -(31) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_item_sk#22] -Right keys [1]: [i_item_sk#4] -Join condition: None - -(32) Project [codegen id : 8] -Output [3]: [cs_sold_date_sk#21, cs_ext_sales_price#23, i_item_id#5] -Input [5]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23, i_item_sk#4, i_item_id#5] - -(33) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_date#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date_sk)] -ReadSchema: struct - -(34) ColumnarToRow [codegen id : 7] -Input [2]: [d_date_sk#7, d_date#8] - -(35) Filter [codegen id : 7] -Input [2]: [d_date_sk#7, d_date#8] -Condition : isnotnull(d_date_sk#7) - -(36) Scan parquet default.date_dim -Output [2]: [d_date#8, d_week_seq#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_week_seq)] -ReadSchema: struct - -(37) ColumnarToRow [codegen id : 6] -Input [2]: [d_date#8, d_week_seq#9] - -(38) Filter [codegen id : 6] -Input [2]: [d_date#8, d_week_seq#9] -Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = ReusedSubquery Subquery scalar-subquery#10, [id=#11])) - -(39) Project [codegen id : 6] -Output [1]: [d_date#8 AS d_date#8#24] -Input [2]: [d_date#8, d_week_seq#9] - -(40) BroadcastExchange -Input [1]: [d_date#8#24] -Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#25] - -(41) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [d_date#8] -Right keys [1]: [d_date#8#24] -Join condition: None - -(42) Project [codegen id : 7] -Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_date#8] - -(43) BroadcastExchange -Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] - -(44) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#21] -Right keys [1]: [d_date_sk#7] -Join condition: None - -(45) Project [codegen id : 8] -Output [2]: [cs_ext_sales_price#23, i_item_id#5] -Input [4]: [cs_sold_date_sk#21, cs_ext_sales_price#23, i_item_id#5, d_date_sk#7] - -(46) HashAggregate [codegen id : 8] -Input [2]: [cs_ext_sales_price#23, i_item_id#5] -Keys [1]: [i_item_id#5] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum#27] -Results [2]: [i_item_id#5, sum#28] - -(47) Exchange -Input [2]: [i_item_id#5, sum#28] -Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#29] - -(48) HashAggregate [codegen id : 9] -Input [2]: [i_item_id#5, sum#28] -Keys [1]: [i_item_id#5] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#23))#30] -Results [2]: [i_item_id#5 AS item_id#31, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#23))#30,17,2) AS cs_item_rev#32] - -(49) Filter [codegen id : 9] -Input [2]: [item_id#31, cs_item_rev#32] -Condition : isnotnull(cs_item_rev#32) - -(50) BroadcastExchange -Input [2]: [item_id#31, cs_item_rev#32] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#33] - -(51) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [item_id#19] -Right keys [1]: [item_id#31] -Join condition: ((((cast(ss_item_rev#20 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#32)), DecimalType(19,3), true)) AND (cast(ss_item_rev#20 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#32)), DecimalType(20,3), true))) AND (cast(cs_item_rev#32 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#20)), DecimalType(19,3), true))) AND (cast(cs_item_rev#32 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#20)), DecimalType(20,3), true))) - -(52) Project [codegen id : 15] -Output [3]: [item_id#19, ss_item_rev#20, cs_item_rev#32] -Input [4]: [item_id#19, ss_item_rev#20, item_id#31, cs_item_rev#32] - -(53) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(54) ColumnarToRow [codegen id : 13] -Input [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] - -(55) Filter [codegen id : 13] -Input [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] -Condition : (isnotnull(ws_item_sk#35) AND isnotnull(ws_sold_date_sk#34)) - -(56) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#4, i_item_id#5] - -(57) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ws_item_sk#35] -Right keys [1]: [i_item_sk#4] -Join condition: None - -(58) Project [codegen id : 13] -Output [3]: [ws_sold_date_sk#34, ws_ext_sales_price#36, i_item_id#5] -Input [5]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36, i_item_sk#4, i_item_id#5] - -(59) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_date#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date_sk)] -ReadSchema: struct - -(60) ColumnarToRow [codegen id : 12] -Input [2]: [d_date_sk#7, d_date#8] - -(61) Filter [codegen id : 12] -Input [2]: [d_date_sk#7, d_date#8] -Condition : isnotnull(d_date_sk#7) - -(62) Scan parquet default.date_dim -Output [2]: [d_date#8, d_week_seq#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_week_seq)] -ReadSchema: struct - -(63) ColumnarToRow [codegen id : 11] -Input [2]: [d_date#8, d_week_seq#9] - -(64) Filter [codegen id : 11] -Input [2]: [d_date#8, d_week_seq#9] -Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = ReusedSubquery Subquery scalar-subquery#10, [id=#11])) - -(65) Project [codegen id : 11] -Output [1]: [d_date#8 AS d_date#8#37] -Input [2]: [d_date#8, d_week_seq#9] - -(66) BroadcastExchange -Input [1]: [d_date#8#37] -Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#38] - -(67) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [d_date#8] -Right keys [1]: [d_date#8#37] -Join condition: None - -(68) Project [codegen id : 12] -Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_date#8] - -(69) BroadcastExchange -Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] - -(70) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ws_sold_date_sk#34] -Right keys [1]: [d_date_sk#7] -Join condition: None - -(71) Project [codegen id : 13] -Output [2]: [ws_ext_sales_price#36, i_item_id#5] -Input [4]: [ws_sold_date_sk#34, ws_ext_sales_price#36, i_item_id#5, d_date_sk#7] - -(72) HashAggregate [codegen id : 13] -Input [2]: [ws_ext_sales_price#36, i_item_id#5] -Keys [1]: [i_item_id#5] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum#40] -Results [2]: [i_item_id#5, sum#41] - -(73) Exchange -Input [2]: [i_item_id#5, sum#41] -Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#42] - -(74) HashAggregate [codegen id : 14] -Input [2]: [i_item_id#5, sum#41] -Keys [1]: [i_item_id#5] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#43] -Results [2]: [i_item_id#5 AS item_id#44, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#43,17,2) AS ws_item_rev#45] - -(75) Filter [codegen id : 14] -Input [2]: [item_id#44, ws_item_rev#45] -Condition : isnotnull(ws_item_rev#45) - -(76) BroadcastExchange -Input [2]: [item_id#44, ws_item_rev#45] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#46] - -(77) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [item_id#19] -Right keys [1]: [item_id#44] -Join condition: ((((((((cast(ss_item_rev#20 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#45)), DecimalType(19,3), true)) AND (cast(ss_item_rev#20 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#45)), DecimalType(20,3), true))) AND (cast(cs_item_rev#32 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#45)), DecimalType(19,3), true))) AND (cast(cs_item_rev#32 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#45)), DecimalType(20,3), true))) AND (cast(ws_item_rev#45 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#20)), DecimalType(19,3), true))) AND (cast(ws_item_rev#45 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#20)), DecimalType(20,3), true))) AND (cast(ws_item_rev#45 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#32)), DecimalType(19,3), true))) AND (cast(ws_item_rev#45 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#32)), DecimalType(20,3), true))) - -(78) Project [codegen id : 15] -Output [8]: [item_id#19, ss_item_rev#20, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ss_dev#47, cs_item_rev#32, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#32 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS cs_dev#48, ws_item_rev#45, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#45 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ws_dev#49, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true)) / 3.00), DecimalType(23,6), true) AS average#50] -Input [5]: [item_id#19, ss_item_rev#20, cs_item_rev#32, item_id#44, ws_item_rev#45] - -(79) TakeOrderedAndProject -Input [8]: [item_id#19, ss_item_rev#20, ss_dev#47, cs_item_rev#32, cs_dev#48, ws_item_rev#45, ws_dev#49, average#50] -Arguments: 100, [item_id#19 ASC NULLS FIRST, ss_item_rev#20 ASC NULLS FIRST], [item_id#19, ss_item_rev#20, ss_dev#47, cs_item_rev#32, cs_dev#48, ws_item_rev#45, ws_dev#49, average#50] - -===== Subqueries ===== - -Subquery:1 Hosting operator id = 15 Hosting Expression = Subquery scalar-subquery#10, [id=#11] -* Project (83) -+- * Filter (82) - +- * ColumnarToRow (81) - +- Scan parquet default.date_dim (80) - - -(80) Scan parquet default.date_dim -Output [2]: [d_date#8, d_week_seq#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), EqualTo(d_date,2000-01-03)] -ReadSchema: struct - -(81) ColumnarToRow [codegen id : 1] -Input [2]: [d_date#8, d_week_seq#9] - -(82) Filter [codegen id : 1] -Input [2]: [d_date#8, d_week_seq#9] -Condition : (isnotnull(d_date#8) AND (d_date#8 = 10959)) - -(83) Project [codegen id : 1] -Output [1]: [d_week_seq#9] -Input [2]: [d_date#8, d_week_seq#9] - -Subquery:2 Hosting operator id = 38 Hosting Expression = ReusedSubquery Subquery scalar-subquery#10, [id=#11] - -Subquery:3 Hosting operator id = 64 Hosting Expression = ReusedSubquery Subquery scalar-subquery#10, [id=#11] - - +TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev#2 ASC NULLS FIRST], output=[item_id#1,ss_item_rev#2,ss_dev#3,cs_item_rev#4,cs_dev#5,ws_item_rev#6,ws_dev#7,average#8]) ++- *(15) Project [item_id#1, ss_item_rev#2, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#3, cs_item_rev#4, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#4 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#5, ws_item_rev#6, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#6 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#7, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#8] + +- *(15) BroadcastHashJoin [item_id#1], [item_id#9], Inner, BuildRight, ((((((((cast(ss_item_rev#2 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#6)), DecimalType(19,3))) && (cast(ss_item_rev#2 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#6)), DecimalType(20,3)))) && (cast(cs_item_rev#4 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#6)), DecimalType(19,3)))) && (cast(cs_item_rev#4 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#6)), DecimalType(20,3)))) && (cast(ws_item_rev#6 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#2)), DecimalType(19,3)))) && (cast(ws_item_rev#6 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#2)), DecimalType(20,3)))) && (cast(ws_item_rev#6 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#4)), DecimalType(19,3)))) && (cast(ws_item_rev#6 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#4)), DecimalType(20,3)))) + :- *(15) Project [item_id#1, ss_item_rev#2, cs_item_rev#4] + : +- *(15) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight, ((((cast(ss_item_rev#2 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#4)), DecimalType(19,3))) && (cast(ss_item_rev#2 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#4)), DecimalType(20,3)))) && (cast(cs_item_rev#4 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#2)), DecimalType(19,3)))) && (cast(cs_item_rev#4 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#2)), DecimalType(20,3)))) + : :- *(15) Filter isnotnull(ss_item_rev#2) + : : +- *(15) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ss_ext_sales_price#12))]) + : : +- Exchange hashpartitioning(i_item_id#11, 5) + : : +- *(4) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#12))]) + : : +- *(4) Project [ss_ext_sales_price#12, i_item_id#11] + : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : : :- *(4) Project [ss_sold_date_sk#13, ss_ext_sales_price#12, i_item_id#11] + : : : +- *(4) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#16], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#13, ss_item_sk#15, ss_ext_sales_price#12] + : : : : +- *(4) Filter (isnotnull(ss_item_sk#15) && isnotnull(ss_sold_date_sk#13)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#15,ss_ext_sales_price#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#16, i_item_id#11] + : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) + : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#14] + : : +- *(3) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight + : : :- *(3) Project [d_date_sk#14, d_date#17] + : : : +- *(3) Filter isnotnull(d_date_sk#14) + : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + : : +- *(2) Project [d_date#17 AS d_date#17#18] + : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12128)) + : : : +- Subquery subquery12128 + : : : +- *(1) Project [d_week_seq#19] + : : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + : : +- Subquery subquery12128 + : : +- *(1) Project [d_week_seq#19] + : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(9) Filter isnotnull(cs_item_rev#4) + : +- *(9) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(cs_ext_sales_price#20))]) + : +- Exchange hashpartitioning(i_item_id#11, 5) + : +- *(8) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#20))]) + : +- *(8) Project [cs_ext_sales_price#20, i_item_id#11] + : +- *(8) BroadcastHashJoin [cs_sold_date_sk#21], [d_date_sk#14], Inner, BuildRight + : :- *(8) Project [cs_sold_date_sk#21, cs_ext_sales_price#20, i_item_id#11] + : : +- *(8) BroadcastHashJoin [cs_item_sk#22], [i_item_sk#16], Inner, BuildRight + : : :- *(8) Project [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#20] + : : : +- *(8) Filter (isnotnull(cs_item_sk#22) && isnotnull(cs_sold_date_sk#21)) + : : : +- *(8) FileScan parquet default.catalog_sales[cs_sold_date_sk#21,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [d_date_sk#14] + : +- *(7) BroadcastHashJoin [d_date#17], [d_date#17#23], LeftSemi, BuildRight + : :- *(7) Project [d_date_sk#14, d_date#17] + : : +- *(7) Filter isnotnull(d_date_sk#14) + : : +- *(7) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + : +- *(6) Project [d_date#17 AS d_date#17#23] + : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12132)) + : : +- Subquery subquery12132 + : : +- *(1) Project [d_week_seq#19] + : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : +- *(6) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + : +- Subquery subquery12132 + : +- *(1) Project [d_week_seq#19] + : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(14) Filter isnotnull(ws_item_rev#6) + +- *(14) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ws_ext_sales_price#24))]) + +- Exchange hashpartitioning(i_item_id#11, 5) + +- *(13) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#24))]) + +- *(13) Project [ws_ext_sales_price#24, i_item_id#11] + +- *(13) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#14], Inner, BuildRight + :- *(13) Project [ws_sold_date_sk#25, ws_ext_sales_price#24, i_item_id#11] + : +- *(13) BroadcastHashJoin [ws_item_sk#26], [i_item_sk#16], Inner, BuildRight + : :- *(13) Project [ws_sold_date_sk#25, ws_item_sk#26, ws_ext_sales_price#24] + : : +- *(13) Filter (isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#25)) + : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_item_sk#26,ws_ext_sales_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(12) Project [d_date_sk#14] + +- *(12) BroadcastHashJoin [d_date#17], [d_date#17#27], LeftSemi, BuildRight + :- *(12) Project [d_date_sk#14, d_date#17] + : +- *(12) Filter isnotnull(d_date_sk#14) + : +- *(12) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + +- *(11) Project [d_date#17 AS d_date#17#27] + +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12136)) + : +- Subquery subquery12136 + : +- *(1) Project [d_week_seq#19] + : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + +- *(11) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + +- Subquery subquery12136 + +- *(1) Project [d_week_seq#19] + +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) + +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt index 5081efe94..ea45e8665 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt @@ -1,125 +1,133 @@ -TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev,ws_dev,average] - WholeStageCodegen (15) - Project [item_id,ss_item_rev,cs_item_rev,ws_item_rev] - BroadcastHashJoin [item_id,item_id,ss_item_rev,ws_item_rev,cs_item_rev] - Project [item_id,ss_item_rev,cs_item_rev] - BroadcastHashJoin [item_id,item_id,ss_item_rev,cs_item_rev] +TakeOrderedAndProject [average,cs_dev,cs_item_rev,item_id,ss_dev,ss_item_rev,ws_dev,ws_item_rev] + WholeStageCodegen + Project [cs_item_rev,item_id,ss_item_rev,ws_item_rev] + BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev,ws_item_rev] + Project [cs_item_rev,item_id,ss_item_rev] + BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev] Filter [ss_item_rev] - HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),item_id,ss_item_rev,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [item_id,ss_item_rev,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen (4) - HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price,i_item_id] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_ext_sales_price,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + WholeStageCodegen + HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_item_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [i_item_sk,i_item_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id] + WholeStageCodegen + Project [i_item_id,i_item_sk] + Filter [i_item_id,i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Filter [d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Project [d_date,d_date_sk] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date] Filter [d_week_seq] Subquery #1 - WholeStageCodegen (1) + WholeStageCodegen Project [d_week_seq] Filter [d_date] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Subquery #1 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] InputAdapter BroadcastExchange #5 - WholeStageCodegen (9) + WholeStageCodegen Filter [cs_item_rev] - HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),item_id,cs_item_rev,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [cs_item_rev,item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] InputAdapter Exchange [i_item_id] #6 - WholeStageCodegen (8) - HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] + WholeStageCodegen + HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] Project [cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ext_sales_price,i_item_id] + Project [cs_ext_sales_price,cs_sold_date_sk,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_item_id] #2 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 InputAdapter BroadcastExchange #7 - WholeStageCodegen (7) + WholeStageCodegen Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Filter [d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Project [d_date,d_date_sk] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #8 - WholeStageCodegen (6) + WholeStageCodegen Project [d_date] Filter [d_week_seq] - ReusedSubquery [d_week_seq] #1 - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date,d_week_seq] + Subquery #2 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Subquery #2 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] InputAdapter BroadcastExchange #9 - WholeStageCodegen (14) + WholeStageCodegen Filter [ws_item_rev] - HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),item_id,ws_item_rev,sum] + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [item_id,sum,sum(UnscaledValue(ws_ext_sales_price)),ws_item_rev] InputAdapter Exchange [i_item_id] #10 - WholeStageCodegen (13) - HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] - Project [ws_ext_sales_price,i_item_id] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_ext_sales_price,i_item_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Filter [ws_item_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_item_id,ws_ext_sales_price,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_item_id] #2 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 InputAdapter BroadcastExchange #11 - WholeStageCodegen (12) + WholeStageCodegen Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Filter [d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Project [d_date,d_date_sk] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #12 - WholeStageCodegen (11) + WholeStageCodegen Project [d_date] Filter [d_week_seq] - ReusedSubquery [d_week_seq] #1 - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date,d_week_seq] + Subquery #3 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + Subquery #3 + WholeStageCodegen + Project [d_week_seq] + Filter [d_date] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt index 6edd0e4b0..9236b71dd 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt @@ -1,249 +1,43 @@ == Physical Plan == -TakeOrderedAndProject (44) -+- * Project (43) - +- * BroadcastHashJoin Inner BuildRight (42) - :- * Project (25) - : +- * BroadcastHashJoin Inner BuildRight (24) - : :- * Project (18) - : : +- * BroadcastHashJoin Inner BuildRight (17) - : : :- * HashAggregate (12) - : : : +- Exchange (11) - : : : +- * HashAggregate (10) - : : : +- * Project (9) - : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (16) - : : +- * Filter (15) - : : +- * ColumnarToRow (14) - : : +- Scan parquet default.store (13) - : +- BroadcastExchange (23) - : +- * Project (22) - : +- * Filter (21) - : +- * ColumnarToRow (20) - : +- Scan parquet default.date_dim (19) - +- BroadcastExchange (41) - +- * Project (40) - +- * BroadcastHashJoin Inner BuildRight (39) - :- * Project (33) - : +- * BroadcastHashJoin Inner BuildRight (32) - : :- * HashAggregate (27) - : : +- ReusedExchange (26) - : +- BroadcastExchange (31) - : +- * Filter (30) - : +- * ColumnarToRow (29) - : +- Scan parquet default.store (28) - +- BroadcastExchange (38) - +- * Project (37) - +- * Filter (36) - +- * ColumnarToRow (35) - +- Scan parquet default.date_dim (34) - - -(1) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 2] -Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] - -(3) Filter [codegen id : 2] -Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] -Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] -Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) - -(7) BroadcastExchange -Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] - -(8) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#4] -Join condition: None - -(9) Project [codegen id : 2] -Output [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] -Input [6]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3, d_date_sk#4, d_week_seq#5, d_day_name#6] - -(10) HashAggregate [codegen id : 2] -Input [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] -Keys [2]: [d_week_seq#5, ss_store_sk#2] -Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] -Aggregate Attributes [7]: [sum#8, sum#9, sum#10, sum#11, sum#12, sum#13, sum#14] -Results [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] - -(11) Exchange -Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] -Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#2, 5), true, [id=#22] - -(12) HashAggregate [codegen id : 10] -Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] -Keys [2]: [d_week_seq#5, ss_store_sk#2] -Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29] -Results [9]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29,17,2) AS sat_sales#36] - -(13) Scan parquet default.store -Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] -ReadSchema: struct - -(14) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] - -(15) Filter [codegen id : 3] -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] -Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) - -(16) BroadcastExchange -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] - -(17) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#37] -Join condition: None - -(18) Project [codegen id : 10] -Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39] -Input [12]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39] - -(19) Scan parquet default.date_dim -Output [2]: [d_month_seq#41, d_week_seq#42] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_week_seq)] -ReadSchema: struct - -(20) ColumnarToRow [codegen id : 4] -Input [2]: [d_month_seq#41, d_week_seq#42] - -(21) Filter [codegen id : 4] -Input [2]: [d_month_seq#41, d_week_seq#42] -Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1212)) AND (d_month_seq#41 <= 1223)) AND isnotnull(d_week_seq#42)) - -(22) Project [codegen id : 4] -Output [1]: [d_week_seq#42] -Input [2]: [d_month_seq#41, d_week_seq#42] - -(23) BroadcastExchange -Input [1]: [d_week_seq#42] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#43] - -(24) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [d_week_seq#5] -Right keys [1]: [d_week_seq#42] -Join condition: None - -(25) Project [codegen id : 10] -Output [10]: [s_store_name#39 AS s_store_name1#44, d_week_seq#5 AS d_week_seq1#45, s_store_id#38 AS s_store_id1#46, sun_sales#30 AS sun_sales1#47, mon_sales#31 AS mon_sales1#48, tue_sales#32 AS tue_sales1#49, wed_sales#33 AS wed_sales1#50, thu_sales#34 AS thu_sales1#51, fri_sales#35 AS fri_sales1#52, sat_sales#36 AS sat_sales1#53] -Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#42] - -(26) ReusedExchange [Reuses operator id: 11] -Output [9]: [d_week_seq#5, ss_store_sk#2, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] - -(27) HashAggregate [codegen id : 9] -Input [9]: [d_week_seq#5, ss_store_sk#2, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] -Keys [2]: [d_week_seq#5, ss_store_sk#2] -Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#66, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#67] -Results [9]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#61,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#62,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#63,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#64,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#65,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#66,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#67,17,2) AS sat_sales#36] - -(28) Scan parquet default.store -Output [2]: [s_store_sk#37, s_store_id#38] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 7] -Input [2]: [s_store_sk#37, s_store_id#38] - -(30) Filter [codegen id : 7] -Input [2]: [s_store_sk#37, s_store_id#38] -Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) - -(31) BroadcastExchange -Input [2]: [s_store_sk#37, s_store_id#38] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#68] - -(32) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#37] -Join condition: None - -(33) Project [codegen id : 9] -Output [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38] -Input [11]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38] - -(34) Scan parquet default.date_dim -Output [2]: [d_month_seq#69, d_week_seq#70] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235), IsNotNull(d_week_seq)] -ReadSchema: struct - -(35) ColumnarToRow [codegen id : 8] -Input [2]: [d_month_seq#69, d_week_seq#70] - -(36) Filter [codegen id : 8] -Input [2]: [d_month_seq#69, d_week_seq#70] -Condition : (((isnotnull(d_month_seq#69) AND (d_month_seq#69 >= 1224)) AND (d_month_seq#69 <= 1235)) AND isnotnull(d_week_seq#70)) - -(37) Project [codegen id : 8] -Output [1]: [d_week_seq#70] -Input [2]: [d_month_seq#69, d_week_seq#70] - -(38) BroadcastExchange -Input [1]: [d_week_seq#70] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#71] - -(39) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [d_week_seq#5] -Right keys [1]: [d_week_seq#70] -Join condition: None - -(40) Project [codegen id : 9] -Output [9]: [d_week_seq#5 AS d_week_seq2#72, s_store_id#38 AS s_store_id2#73, sun_sales#30 AS sun_sales2#74, mon_sales#31 AS mon_sales2#75, tue_sales#32 AS tue_sales2#76, wed_sales#33 AS wed_sales2#77, thu_sales#34 AS thu_sales2#78, fri_sales#35 AS fri_sales2#79, sat_sales#36 AS sat_sales2#80] -Input [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, d_week_seq#70] - -(41) BroadcastExchange -Input [9]: [d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [id=#81] - -(42) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [s_store_id1#46, d_week_seq1#45] -Right keys [2]: [s_store_id2#73, (d_week_seq2#72 - 52)] -Join condition: None - -(43) Project [codegen id : 10] -Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#74)), DecimalType(37,20), true) AS (sun_sales1 / sun_sales2)#82, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#75)), DecimalType(37,20), true) AS (mon_sales1 / mon_sales2)#83, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales2#76)), DecimalType(37,20), true) AS (tue_sales1 / tue_sales2)#84, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#77)), DecimalType(37,20), true) AS (wed_sales1 / wed_sales2)#85, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#78)), DecimalType(37,20), true) AS (thu_sales1 / thu_sales2)#86, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#79)), DecimalType(37,20), true) AS (fri_sales1 / fri_sales2)#87, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#80)), DecimalType(37,20), true) AS (sat_sales1 / sat_sales2)#88] -Input [19]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] - -(44) TakeOrderedAndProject -Input [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales2)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] -Arguments: 100, [s_store_name1#44 ASC NULLS FIRST, s_store_id1#46 ASC NULLS FIRST, d_week_seq1#45 ASC NULLS FIRST], [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales2)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] - +TakeOrderedAndProject(limit=100, orderBy=[s_store_name1#1 ASC NULLS FIRST,s_store_id1#2 ASC NULLS FIRST,d_week_seq1#3 ASC NULLS FIRST], output=[s_store_name1#1,s_store_id1#2,d_week_seq1#3,(sun_sales1 / sun_sales2)#4,(mon_sales1 / mon_sales2)#5,(tue_sales1 / tue_sales2)#6,(wed_sales1 / wed_sales2)#7,(thu_sales1 / thu_sales2)#8,(fri_sales1 / fri_sales2)#9,(sat_sales1 / sat_sales2)#10]) ++- *(10) Project [s_store_name1#1, s_store_id1#2, d_week_seq1#3, CheckOverflow((promote_precision(sun_sales1#11) / promote_precision(sun_sales2#12)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#4, CheckOverflow((promote_precision(mon_sales1#13) / promote_precision(mon_sales2#14)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#5, CheckOverflow((promote_precision(tue_sales1#15) / promote_precision(tue_sales2#16)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#6, CheckOverflow((promote_precision(wed_sales1#17) / promote_precision(wed_sales2#18)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#7, CheckOverflow((promote_precision(thu_sales1#19) / promote_precision(thu_sales2#20)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#8, CheckOverflow((promote_precision(fri_sales1#21) / promote_precision(fri_sales2#22)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#9, CheckOverflow((promote_precision(sat_sales1#23) / promote_precision(sat_sales2#24)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#10] + +- *(10) BroadcastHashJoin [s_store_id1#2, d_week_seq1#3], [s_store_id2#25, (d_week_seq2#26 - 52)], Inner, BuildRight + :- *(10) Project [s_store_name#27 AS s_store_name1#1, d_week_seq#28 AS d_week_seq1#3, s_store_id#29 AS s_store_id1#2, sun_sales#30 AS sun_sales1#11, mon_sales#31 AS mon_sales1#13, tue_sales#32 AS tue_sales1#15, wed_sales#33 AS wed_sales1#17, thu_sales#34 AS thu_sales1#19, fri_sales#35 AS fri_sales1#21, sat_sales#36 AS sat_sales1#23] + : +- *(10) BroadcastHashJoin [d_week_seq#28], [d_week_seq#37], Inner, BuildRight + : :- *(10) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29, s_store_name#27] + : : +- *(10) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight + : : :- *(10) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) + : : : +- Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 5) + : : : +- *(2) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) + : : : +- *(2) Project [ss_store_sk#38, ss_sales_price#41, d_week_seq#28, d_day_name#40] + : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#42], [d_date_sk#43], Inner, BuildRight + : : : :- *(2) Project [ss_sold_date_sk#42, ss_store_sk#38, ss_sales_price#41] + : : : : +- *(2) Filter (isnotnull(ss_sold_date_sk#42) && isnotnull(ss_store_sk#38)) + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#42,ss_store_sk#38,ss_sales_price#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#43, d_week_seq#28, d_day_name#40] + : : : +- *(1) Filter (isnotnull(d_date_sk#43) && isnotnull(d_week_seq#28)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#43,d_week_seq#28,d_day_name#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [s_store_sk#39, s_store_id#29, s_store_name#27] + : : +- *(3) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) + : : +- *(3) FileScan parquet default.store[s_store_sk#39,s_store_id#29,s_store_name#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [d_week_seq#37] + : +- *(4) Filter (((isnotnull(d_month_seq#44) && (d_month_seq#44 >= 1212)) && (d_month_seq#44 <= 1223)) && isnotnull(d_week_seq#37)) + : +- *(4) FileScan parquet default.date_dim[d_month_seq#44,d_week_seq#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223),..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52))) + +- *(9) Project [d_week_seq#28 AS d_week_seq2#26, s_store_id#29 AS s_store_id2#25, sun_sales#30 AS sun_sales2#12, mon_sales#31 AS mon_sales2#14, tue_sales#32 AS tue_sales2#16, wed_sales#33 AS wed_sales2#18, thu_sales#34 AS thu_sales2#20, fri_sales#35 AS fri_sales2#22, sat_sales#36 AS sat_sales2#24] + +- *(9) BroadcastHashJoin [d_week_seq#28], [d_week_seq#45], Inner, BuildRight + :- *(9) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29] + : +- *(9) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight + : :- *(9) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) + : : +- ReusedExchange [d_week_seq#28, ss_store_sk#38, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51, sum#52], Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 5) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [s_store_sk#39, s_store_id#29] + : +- *(7) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) + : +- *(7) FileScan parquet default.store[s_store_sk#39,s_store_id#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [d_week_seq#45] + +- *(8) Filter (((isnotnull(d_month_seq#53) && (d_month_seq#53 >= 1224)) && (d_month_seq#53 <= 1235)) && isnotnull(d_week_seq#45)) + +- *(8) FileScan parquet default.date_dim[d_month_seq#53,d_week_seq#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt index 3f3cc409e..038219a53 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt @@ -1,66 +1,58 @@ -TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_sales2),(mon_sales1 / mon_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),(thu_sales1 / thu_sales2),(fri_sales1 / fri_sales2),(sat_sales1 / sat_sales2)] - WholeStageCodegen (10) - Project [s_store_name1,s_store_id1,d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] - BroadcastHashJoin [s_store_id1,d_week_seq1,s_store_id2,d_week_seq2] - Project [s_store_name,d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] +TakeOrderedAndProject [(fri_sales1 / fri_sales2),(mon_sales1 / mon_sales2),(sat_sales1 / sat_sales2),(sun_sales1 / sun_sales2),(thu_sales1 / thu_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),d_week_seq1,s_store_id1,s_store_name1] + WholeStageCodegen + Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,s_store_id1,s_store_name1,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] + BroadcastHashJoin [d_week_seq1,d_week_seq2,s_store_id1,s_store_id2] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id,s_store_name] - BroadcastHashJoin [ss_store_sk,s_store_sk] - HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] InputAdapter Exchange [d_week_seq,ss_store_sk] #1 - WholeStageCodegen (2) - HashAggregate [d_week_seq,ss_store_sk,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_sales_price] + WholeStageCodegen + HashAggregate [d_day_name,d_week_seq,ss_sales_price,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [d_date_sk,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] + WholeStageCodegen + Project [d_date_sk,d_day_name,d_week_seq] + Filter [d_date_sk,d_week_seq] + Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] [d_date_sk,d_day_name,d_week_seq] InputAdapter BroadcastExchange #3 - WholeStageCodegen (3) - Filter [s_store_sk,s_store_id] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id,s_store_name] + WholeStageCodegen + Project [s_store_id,s_store_name,s_store_sk] + Filter [s_store_id,s_store_sk] + Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (4) + WholeStageCodegen Project [d_week_seq] Filter [d_month_seq,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_month_seq,d_week_seq] + Scan parquet default.date_dim [d_month_seq,d_week_seq] [d_month_seq,d_week_seq] InputAdapter BroadcastExchange #5 - WholeStageCodegen (9) - Project [d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + WholeStageCodegen + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id] - BroadcastHashJoin [ss_store_sk,s_store_sk] - HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] + Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + BroadcastHashJoin [s_store_sk,ss_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] InputAdapter - ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 + ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 InputAdapter BroadcastExchange #6 - WholeStageCodegen (7) - Filter [s_store_sk,s_store_id] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] + WholeStageCodegen + Project [s_store_id,s_store_sk] + Filter [s_store_id,s_store_sk] + Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] InputAdapter BroadcastExchange #7 - WholeStageCodegen (8) + WholeStageCodegen Project [d_week_seq] Filter [d_month_seq,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_month_seq,d_week_seq] + Scan parquet default.date_dim [d_month_seq,d_week_seq] [d_month_seq,d_week_seq] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt index 822d24b2f..eb06bf9b7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt @@ -1,301 +1,58 @@ == Physical Plan == -TakeOrderedAndProject (44) -+- * Project (43) - +- * Filter (42) - +- * HashAggregate (41) - +- Exchange (40) - +- * HashAggregate (39) - +- * Project (38) - +- * BroadcastHashJoin Inner BuildRight (37) - :- * Project (22) - : +- * BroadcastHashJoin Inner BuildRight (21) - : :- * Project (15) - : : +- * BroadcastHashJoin Inner BuildRight (14) - : : :- * Project (9) - : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.customer_address (1) - : : : +- BroadcastExchange (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.customer (4) - : : +- BroadcastExchange (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.store_sales (10) - : +- BroadcastExchange (20) - : +- * Project (19) - : +- * Filter (18) - : +- * ColumnarToRow (17) - : +- Scan parquet default.date_dim (16) - +- BroadcastExchange (36) - +- * Project (35) - +- * Filter (34) - +- * BroadcastHashJoin LeftOuter BuildRight (33) - :- * Filter (25) - : +- * ColumnarToRow (24) - : +- Scan parquet default.item (23) - +- BroadcastExchange (32) - +- * HashAggregate (31) - +- Exchange (30) - +- * HashAggregate (29) - +- * Filter (28) - +- * ColumnarToRow (27) - +- Scan parquet default.item (26) - - -(1) Scan parquet default.customer_address -Output [2]: [ca_address_sk#1, ca_state#2] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#1, ca_state#2] - -(3) Filter [codegen id : 7] -Input [2]: [ca_address_sk#1, ca_state#2] -Condition : isnotnull(ca_address_sk#1) - -(4) Scan parquet default.customer -Output [2]: [c_customer_sk#3, c_current_addr_sk#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [c_customer_sk#3, c_current_addr_sk#4] - -(6) Filter [codegen id : 1] -Input [2]: [c_customer_sk#3, c_current_addr_sk#4] -Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) - -(7) BroadcastExchange -Input [2]: [c_customer_sk#3, c_current_addr_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#5] - -(8) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ca_address_sk#1] -Right keys [1]: [c_current_addr_sk#4] -Join condition: None - -(9) Project [codegen id : 7] -Output [2]: [ca_state#2, c_customer_sk#3] -Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] - -(10) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] - -(12) Filter [codegen id : 2] -Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] -Condition : ((isnotnull(ss_customer_sk#8) AND isnotnull(ss_sold_date_sk#6)) AND isnotnull(ss_item_sk#7)) - -(13) BroadcastExchange -Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#9] - -(14) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ss_customer_sk#8] -Join condition: None - -(15) Project [codegen id : 7] -Output [3]: [ca_state#2, ss_sold_date_sk#6, ss_item_sk#7] -Input [5]: [ca_state#2, c_customer_sk#3, ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] - -(16) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_month_seq#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#10, d_month_seq#11] - -(18) Filter [codegen id : 3] -Input [2]: [d_date_sk#10, d_month_seq#11] -Condition : ((isnotnull(d_month_seq#11) AND (d_month_seq#11 = Subquery scalar-subquery#12, [id=#13])) AND isnotnull(d_date_sk#10)) - -(19) Project [codegen id : 3] -Output [1]: [d_date_sk#10] -Input [2]: [d_date_sk#10, d_month_seq#11] - -(20) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] - -(21) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(22) Project [codegen id : 7] -Output [2]: [ca_state#2, ss_item_sk#7] -Input [4]: [ca_state#2, ss_sold_date_sk#6, ss_item_sk#7, d_date_sk#10] - -(23) Scan parquet default.item -Output [3]: [i_item_sk#15, i_current_price#16, i_category#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 6] -Input [3]: [i_item_sk#15, i_current_price#16, i_category#17] - -(25) Filter [codegen id : 6] -Input [3]: [i_item_sk#15, i_current_price#16, i_category#17] -Condition : (isnotnull(i_current_price#16) AND isnotnull(i_item_sk#15)) - -(26) Scan parquet default.item -Output [2]: [i_current_price#16, i_category#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_category)] -ReadSchema: struct - -(27) ColumnarToRow [codegen id : 4] -Input [2]: [i_current_price#16, i_category#17] - -(28) Filter [codegen id : 4] -Input [2]: [i_current_price#16, i_category#17] -Condition : isnotnull(i_category#17) - -(29) HashAggregate [codegen id : 4] -Input [2]: [i_current_price#16, i_category#17] -Keys [1]: [i_category#17] -Functions [1]: [partial_avg(UnscaledValue(i_current_price#16))] -Aggregate Attributes [2]: [sum#18, count#19] -Results [3]: [i_category#17, sum#20, count#21] - -(30) Exchange -Input [3]: [i_category#17, sum#20, count#21] -Arguments: hashpartitioning(i_category#17, 5), true, [id=#22] - -(31) HashAggregate [codegen id : 5] -Input [3]: [i_category#17, sum#20, count#21] -Keys [1]: [i_category#17] -Functions [1]: [avg(UnscaledValue(i_current_price#16))] -Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#16))#23] -Results [2]: [cast((avg(UnscaledValue(i_current_price#16))#23 / 100.0) as decimal(11,6)) AS avg(i_current_price)#24, i_category#17 AS i_category#17#25] - -(32) BroadcastExchange -Input [2]: [avg(i_current_price)#24, i_category#17#25] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#26] - -(33) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [i_category#17] -Right keys [1]: [i_category#17#25] -Join condition: None - -(34) Filter [codegen id : 6] -Input [5]: [i_item_sk#15, i_current_price#16, i_category#17, avg(i_current_price)#24, i_category#17#25] -Condition : (cast(i_current_price#16 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#24)), DecimalType(14,7), true)) - -(35) Project [codegen id : 6] -Output [1]: [i_item_sk#15] -Input [5]: [i_item_sk#15, i_current_price#16, i_category#17, avg(i_current_price)#24, i_category#17#25] - -(36) BroadcastExchange -Input [1]: [i_item_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] - -(37) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_item_sk#7] -Right keys [1]: [i_item_sk#15] -Join condition: None - -(38) Project [codegen id : 7] -Output [1]: [ca_state#2] -Input [3]: [ca_state#2, ss_item_sk#7, i_item_sk#15] - -(39) HashAggregate [codegen id : 7] -Input [1]: [ca_state#2] -Keys [1]: [ca_state#2] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#28] -Results [2]: [ca_state#2, count#29] - -(40) Exchange -Input [2]: [ca_state#2, count#29] -Arguments: hashpartitioning(ca_state#2, 5), true, [id=#30] - -(41) HashAggregate [codegen id : 8] -Input [2]: [ca_state#2, count#29] -Keys [1]: [ca_state#2] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#31] -Results [3]: [ca_state#2 AS state#32, count(1)#31 AS cnt#33, count(1)#31 AS count(1)#34] - -(42) Filter [codegen id : 8] -Input [3]: [state#32, cnt#33, count(1)#34] -Condition : (count(1)#34 >= 10) - -(43) Project [codegen id : 8] -Output [2]: [state#32, cnt#33] -Input [3]: [state#32, cnt#33, count(1)#34] - -(44) TakeOrderedAndProject -Input [2]: [state#32, cnt#33] -Arguments: 100, [cnt#33 ASC NULLS FIRST], [state#32, cnt#33] - -===== Subqueries ===== - -Subquery:1 Hosting operator id = 18 Hosting Expression = Subquery scalar-subquery#12, [id=#13] -* HashAggregate (51) -+- Exchange (50) - +- * HashAggregate (49) - +- * Project (48) - +- * Filter (47) - +- * ColumnarToRow (46) - +- Scan parquet default.date_dim (45) - - -(45) Scan parquet default.date_dim -Output [3]: [d_month_seq#11, d_year#35, d_moy#36] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] -ReadSchema: struct - -(46) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#11, d_year#35, d_moy#36] - -(47) Filter [codegen id : 1] -Input [3]: [d_month_seq#11, d_year#35, d_moy#36] -Condition : (((isnotnull(d_year#35) AND isnotnull(d_moy#36)) AND (d_year#35 = 2000)) AND (d_moy#36 = 1)) - -(48) Project [codegen id : 1] -Output [1]: [d_month_seq#11] -Input [3]: [d_month_seq#11, d_year#35, d_moy#36] - -(49) HashAggregate [codegen id : 1] -Input [1]: [d_month_seq#11] -Keys [1]: [d_month_seq#11] -Functions: [] -Aggregate Attributes: [] -Results [1]: [d_month_seq#11] - -(50) Exchange -Input [1]: [d_month_seq#11] -Arguments: hashpartitioning(d_month_seq#11, 5), true, [id=#37] - -(51) HashAggregate [codegen id : 2] -Input [1]: [d_month_seq#11] -Keys [1]: [d_month_seq#11] -Functions: [] -Aggregate Attributes: [] -Results [1]: [d_month_seq#11] - - +TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state#2,cnt#1]) ++- *(8) Project [state#2, cnt#1] + +- *(8) Filter (count(1)#3 >= 10) + +- *(8) HashAggregate(keys=[ca_state#4], functions=[count(1)]) + +- Exchange hashpartitioning(ca_state#4, 5) + +- *(7) HashAggregate(keys=[ca_state#4], functions=[partial_count(1)]) + +- *(7) Project [ca_state#4] + +- *(7) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + :- *(7) Project [ca_state#4, ss_item_sk#5] + : +- *(7) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + : :- *(7) Project [ca_state#4, ss_sold_date_sk#7, ss_item_sk#5] + : : +- *(7) BroadcastHashJoin [c_customer_sk#9], [ss_customer_sk#10], Inner, BuildRight + : : :- *(7) Project [ca_state#4, c_customer_sk#9] + : : : +- *(7) BroadcastHashJoin [ca_address_sk#11], [c_current_addr_sk#12], Inner, BuildRight + : : : :- *(7) Project [ca_address_sk#11, ca_state#4] + : : : : +- *(7) Filter isnotnull(ca_address_sk#11) + : : : : +- *(7) FileScan parquet default.customer_address[ca_address_sk#11,ca_state#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : +- *(1) Project [c_customer_sk#9, c_current_addr_sk#12] + : : : +- *(1) Filter (isnotnull(c_current_addr_sk#12) && isnotnull(c_customer_sk#9)) + : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) + : : +- *(2) Project [ss_sold_date_sk#7, ss_item_sk#5, ss_customer_sk#10] + : : +- *(2) Filter ((isnotnull(ss_customer_sk#10) && isnotnull(ss_sold_date_sk#7)) && isnotnull(ss_item_sk#5)) + : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#5,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [d_date_sk#8] + : +- *(3) Filter ((isnotnull(d_month_seq#13) && (d_month_seq#13 = Subquery subquery982)) && isnotnull(d_date_sk#8)) + : : +- Subquery subquery982 + : : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) + : : +- Exchange hashpartitioning(d_month_seq#13, 5) + : : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) + : : +- *(1) Project [d_month_seq#13] + : : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) + : : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct + : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : +- Subquery subquery982 + : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) + : +- Exchange hashpartitioning(d_month_seq#13, 5) + : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) + : +- *(1) Project [d_month_seq#13] + : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) + : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(6) Project [i_item_sk#6] + +- *(6) Filter (cast(i_current_price#16 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#17)), DecimalType(14,7))) + +- *(6) BroadcastHashJoin [i_category#18], [i_category#18#19], LeftOuter, BuildRight + :- *(6) Project [i_item_sk#6, i_current_price#16, i_category#18] + : +- *(6) Filter (isnotnull(i_current_price#16) && isnotnull(i_item_sk#6)) + : +- *(6) FileScan parquet default.item[i_item_sk#6,i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) + +- *(5) HashAggregate(keys=[i_category#18], functions=[avg(UnscaledValue(i_current_price#16))]) + +- Exchange hashpartitioning(i_category#18, 5) + +- *(4) HashAggregate(keys=[i_category#18], functions=[partial_avg(UnscaledValue(i_current_price#16))]) + +- *(4) Project [i_current_price#16, i_category#18] + +- *(4) Filter isnotnull(i_category#18) + +- *(4) FileScan parquet default.item[i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt index 97ba16294..2bf134280 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt @@ -1,77 +1,78 @@ TakeOrderedAndProject [cnt,state] - WholeStageCodegen (8) - Project [state,cnt] + WholeStageCodegen + Project [cnt,state] Filter [count(1)] - HashAggregate [ca_state,count] [count(1),state,cnt,count(1),count] + HashAggregate [ca_state,count,count(1)] [cnt,count,count(1),count(1),state] InputAdapter Exchange [ca_state] #1 - WholeStageCodegen (7) - HashAggregate [ca_state] [count,count] + WholeStageCodegen + HashAggregate [ca_state,count,count] [count,count] Project [ca_state] - BroadcastHashJoin [ss_item_sk,i_item_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] Project [ca_state,ss_item_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ca_state,ss_sold_date_sk,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ca_state,ss_item_sk,ss_sold_date_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [ca_state,c_customer_sk] - BroadcastHashJoin [ca_address_sk,c_current_addr_sk] - Filter [ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_state] + Project [c_customer_sk,ca_state] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ca_address_sk,ca_state] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [c_current_addr_sk,c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk] + WholeStageCodegen + Project [c_current_addr_sk,c_customer_sk] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [ss_customer_sk,ss_sold_date_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + WholeStageCodegen + Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] [ss_customer_sk,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] + Filter [d_date_sk,d_month_seq] Subquery #1 - WholeStageCodegen (2) + WholeStageCodegen HashAggregate [d_month_seq] InputAdapter Exchange [d_month_seq] #5 - WholeStageCodegen (1) + WholeStageCodegen HashAggregate [d_month_seq] Project [d_month_seq] - Filter [d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_month_seq,d_year,d_moy] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Subquery #1 + WholeStageCodegen + HashAggregate [d_month_seq] + InputAdapter + Exchange [d_month_seq] #5 + WholeStageCodegen + HashAggregate [d_month_seq] + Project [d_month_seq] + Filter [d_moy,d_year] + Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] InputAdapter BroadcastExchange #6 - WholeStageCodegen (6) + WholeStageCodegen Project [i_item_sk] - Filter [i_current_price,avg(i_current_price)] + Filter [avg(i_current_price),i_current_price] BroadcastHashJoin [i_category,i_category] - Filter [i_current_price,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_current_price,i_category] + Project [i_category,i_current_price,i_item_sk] + Filter [i_current_price,i_item_sk] + Scan parquet default.item [i_category,i_current_price,i_item_sk] [i_category,i_current_price,i_item_sk] InputAdapter BroadcastExchange #7 - WholeStageCodegen (5) - HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),i_category,sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(i_current_price)),count,i_category,sum] [avg(UnscaledValue(i_current_price)),avg(i_current_price),count,i_category,sum] InputAdapter Exchange [i_category] #8 - WholeStageCodegen (4) - HashAggregate [i_category,i_current_price] [sum,count,sum,count] - Filter [i_category] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_current_price,i_category] + WholeStageCodegen + HashAggregate [count,count,i_category,i_current_price,sum,sum] [count,count,sum,sum] + Project [i_category,i_current_price] + Filter [i_category] + Scan parquet default.item [i_category,i_current_price] [i_category,i_current_price] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt index f838f8f1a..e4e212165 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt @@ -1,378 +1,65 @@ == Physical Plan == -TakeOrderedAndProject (67) -+- * HashAggregate (66) - +- Exchange (65) - +- * HashAggregate (64) - +- Union (63) - :- * HashAggregate (32) - : +- Exchange (31) - : +- * HashAggregate (30) - : +- * Project (29) - : +- * BroadcastHashJoin Inner BuildRight (28) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.customer_address (11) - : +- BroadcastExchange (27) - : +- * BroadcastHashJoin LeftSemi BuildRight (26) - : :- * Filter (20) - : : +- * ColumnarToRow (19) - : : +- Scan parquet default.item (18) - : +- BroadcastExchange (25) - : +- * Project (24) - : +- * Filter (23) - : +- * ColumnarToRow (22) - : +- Scan parquet default.item (21) - :- * HashAggregate (47) - : +- Exchange (46) - : +- * HashAggregate (45) - : +- * Project (44) - : +- * BroadcastHashJoin Inner BuildRight (43) - : :- * Project (41) - : : +- * BroadcastHashJoin Inner BuildRight (40) - : : :- * Project (38) - : : : +- * BroadcastHashJoin Inner BuildRight (37) - : : : :- * Filter (35) - : : : : +- * ColumnarToRow (34) - : : : : +- Scan parquet default.catalog_sales (33) - : : : +- ReusedExchange (36) - : : +- ReusedExchange (39) - : +- ReusedExchange (42) - +- * HashAggregate (62) - +- Exchange (61) - +- * HashAggregate (60) - +- * Project (59) - +- * BroadcastHashJoin Inner BuildRight (58) - :- * Project (56) - : +- * BroadcastHashJoin Inner BuildRight (55) - : :- * Project (53) - : : +- * BroadcastHashJoin Inner BuildRight (52) - : : :- * Filter (50) - : : : +- * ColumnarToRow (49) - : : : +- Scan parquet default.web_sales (48) - : : +- ReusedExchange (51) - : +- ReusedExchange (54) - +- ReusedExchange (57) - - -(1) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] - -(3) Filter [codegen id : 5] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] -Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#5, d_year#6, d_moy#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#6, d_moy#7] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#6, d_moy#7] -Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 9)) AND isnotnull(d_date_sk#5)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#5] -Input [3]: [d_date_sk#5, d_year#6, d_moy#7] - -(8) BroadcastExchange -Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] - -(9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(10) Project [codegen id : 5] -Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] - -(11) Scan parquet default.customer_address -Output [2]: [ca_address_sk#9, ca_gmt_offset#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [ca_address_sk#9, ca_gmt_offset#10] - -(13) Filter [codegen id : 2] -Input [2]: [ca_address_sk#9, ca_gmt_offset#10] -Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) - -(14) Project [codegen id : 2] -Output [1]: [ca_address_sk#9] -Input [2]: [ca_address_sk#9, ca_gmt_offset#10] - -(15) BroadcastExchange -Input [1]: [ca_address_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] - -(16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#9] -Join condition: None - -(17) Project [codegen id : 5] -Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] -Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] - -(18) Scan parquet default.item -Output [2]: [i_item_sk#12, i_item_id#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#12, i_item_id#13] - -(20) Filter [codegen id : 4] -Input [2]: [i_item_sk#12, i_item_id#13] -Condition : isnotnull(i_item_sk#12) - -(21) Scan parquet default.item -Output [2]: [i_item_id#13, i_category#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)] -ReadSchema: struct - -(22) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_id#13, i_category#14] - -(23) Filter [codegen id : 3] -Input [2]: [i_item_id#13, i_category#14] -Condition : (isnotnull(i_category#14) AND (i_category#14 = Music)) - -(24) Project [codegen id : 3] -Output [1]: [i_item_id#13 AS i_item_id#13#15] -Input [2]: [i_item_id#13, i_category#14] - -(25) BroadcastExchange -Input [1]: [i_item_id#13#15] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#16] - -(26) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_id#13] -Right keys [1]: [i_item_id#13#15] -Join condition: None - -(27) BroadcastExchange -Input [2]: [i_item_sk#12, i_item_id#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] - -(28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#12] -Join condition: None - -(29) Project [codegen id : 5] -Output [2]: [ss_ext_sales_price#4, i_item_id#13] -Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_item_id#13] - -(30) HashAggregate [codegen id : 5] -Input [2]: [ss_ext_sales_price#4, i_item_id#13] -Keys [1]: [i_item_id#13] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [1]: [sum#18] -Results [2]: [i_item_id#13, sum#19] - -(31) Exchange -Input [2]: [i_item_id#13, sum#19] -Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#20] - -(32) HashAggregate [codegen id : 6] -Input [2]: [i_item_id#13, sum#19] -Keys [1]: [i_item_id#13] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] -Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] - -(33) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] -ReadSchema: struct - -(34) ColumnarToRow [codegen id : 11] -Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] - -(35) Filter [codegen id : 11] -Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] -Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) - -(36) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(37) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#23] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(38) Project [codegen id : 11] -Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] -Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] - -(39) ReusedExchange [Reuses operator id: 15] -Output [1]: [ca_address_sk#9] - -(40) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_addr_sk#24] -Right keys [1]: [ca_address_sk#9] -Join condition: None - -(41) Project [codegen id : 11] -Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] -Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] - -(42) ReusedExchange [Reuses operator id: 27] -Output [2]: [i_item_sk#12, i_item_id#13] - -(43) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#25] -Right keys [1]: [i_item_sk#12] -Join condition: None - -(44) Project [codegen id : 11] -Output [2]: [cs_ext_sales_price#26, i_item_id#13] -Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_item_id#13] - -(45) HashAggregate [codegen id : 11] -Input [2]: [cs_ext_sales_price#26, i_item_id#13] -Keys [1]: [i_item_id#13] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] -Aggregate Attributes [1]: [sum#27] -Results [2]: [i_item_id#13, sum#28] - -(46) Exchange -Input [2]: [i_item_id#13, sum#28] -Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#29] - -(47) HashAggregate [codegen id : 12] -Input [2]: [i_item_id#13, sum#28] -Keys [1]: [i_item_id#13] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] -Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] - -(48) Scan parquet default.web_sales -Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] -ReadSchema: struct - -(49) ColumnarToRow [codegen id : 17] -Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] - -(50) Filter [codegen id : 17] -Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] -Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) - -(51) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(52) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#32] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(53) Project [codegen id : 17] -Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] -Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] - -(54) ReusedExchange [Reuses operator id: 15] -Output [1]: [ca_address_sk#9] - -(55) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_bill_addr_sk#34] -Right keys [1]: [ca_address_sk#9] -Join condition: None - -(56) Project [codegen id : 17] -Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] -Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] - -(57) ReusedExchange [Reuses operator id: 27] -Output [2]: [i_item_sk#12, i_item_id#13] - -(58) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#33] -Right keys [1]: [i_item_sk#12] -Join condition: None - -(59) Project [codegen id : 17] -Output [2]: [ws_ext_sales_price#35, i_item_id#13] -Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_item_id#13] - -(60) HashAggregate [codegen id : 17] -Input [2]: [ws_ext_sales_price#35, i_item_id#13] -Keys [1]: [i_item_id#13] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] -Aggregate Attributes [1]: [sum#36] -Results [2]: [i_item_id#13, sum#37] - -(61) Exchange -Input [2]: [i_item_id#13, sum#37] -Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#38] - -(62) HashAggregate [codegen id : 18] -Input [2]: [i_item_id#13, sum#37] -Keys [1]: [i_item_id#13] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] -Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] - -(63) Union - -(64) HashAggregate [codegen id : 19] -Input [2]: [i_item_id#13, total_sales#22] -Keys [1]: [i_item_id#13] -Functions [1]: [partial_sum(total_sales#22)] -Aggregate Attributes [2]: [sum#41, isEmpty#42] -Results [3]: [i_item_id#13, sum#43, isEmpty#44] - -(65) Exchange -Input [3]: [i_item_id#13, sum#43, isEmpty#44] -Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#45] - -(66) HashAggregate [codegen id : 20] -Input [3]: [i_item_id#13, sum#43, isEmpty#44] -Keys [1]: [i_item_id#13] -Functions [1]: [sum(total_sales#22)] -Aggregate Attributes [1]: [sum(total_sales#22)#46] -Results [2]: [i_item_id#13, sum(total_sales#22)#46 AS total_sales#47] - -(67) TakeOrderedAndProject -Input [2]: [i_item_id#13, total_sales#47] -Arguments: 100, [i_item_id#13 ASC NULLS FIRST, total_sales#47 ASC NULLS FIRST], [i_item_id#13, total_sales#47] - +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,total_sales#2 ASC NULLS FIRST], output=[i_item_id#1,total_sales#2]) ++- *(20) HashAggregate(keys=[i_item_id#1], functions=[sum(total_sales#3)]) + +- Exchange hashpartitioning(i_item_id#1, 5) + +- *(19) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(total_sales#3)]) + +- Union + :- *(6) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange hashpartitioning(i_item_id#1, 5) + : +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(5) Project [ss_ext_sales_price#4, i_item_id#1] + : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] + : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#10] + : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 9)) && isnotnull(d_date_sk#10)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [ca_address_sk#8] + : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) BroadcastHashJoin [i_item_id#1], [i_item_id#1#14], LeftSemi, BuildRight + : :- *(4) Project [i_item_sk#6, i_item_id#1] + : : +- *(4) Filter isnotnull(i_item_sk#6) + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(3) Project [i_item_id#1 AS i_item_id#1#14] + : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Music)) + : +- *(3) FileScan parquet default.item[i_item_id#1,i_category#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)], ReadSchema: struct + :- *(12) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- Exchange hashpartitioning(i_item_id#1, 5) + : +- *(11) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) + : +- *(11) Project [cs_ext_sales_price#16, i_item_id#1] + : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight + : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] + : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight + : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] + : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(18) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) + +- Exchange hashpartitioning(i_item_id#1, 5) + +- *(17) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) + +- *(17) Project [ws_ext_sales_price#20, i_item_id#1] + +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight + :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] + : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight + : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight + : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] + : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt index fb9e4e507..01ea963ce 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt @@ -1,101 +1,91 @@ TakeOrderedAndProject [i_item_id,total_sales] - WholeStageCodegen (20) - HashAggregate [i_item_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen (19) - HashAggregate [i_item_id,total_sales] [sum,isEmpty,sum,isEmpty] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum,total_sales] [sum,sum] InputAdapter Union - WholeStageCodegen (6) - HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #2 - WholeStageCodegen (5) - HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] + WholeStageCodegen + HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [i_item_id,ss_ext_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + Filter [ca_address_sk,ca_gmt_offset] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen BroadcastHashJoin [i_item_id,i_item_id] - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id] + Project [i_item_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen (3) + WholeStageCodegen Project [i_item_id] Filter [i_category] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_id,i_category] - WholeStageCodegen (12) - HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] + Scan parquet default.item [i_category,i_item_id] [i_category,i_item_id] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #7 - WholeStageCodegen (11) - HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] + WholeStageCodegen + HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] Project [cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_ext_sales_price] - BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] - Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk] + BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] #4 + ReusedExchange [ca_address_sk] [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_item_id] #5 - WholeStageCodegen (18) - HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] InputAdapter Exchange [i_item_id] #8 - WholeStageCodegen (17) - HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] - Project [ws_ext_sales_price,i_item_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] - Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] + Project [i_item_id,ws_ext_sales_price] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] #4 + ReusedExchange [ca_address_sk] [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_item_id] #5 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt index f56f48726..49bd37330 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt @@ -1,396 +1,68 @@ == Physical Plan == -TakeOrderedAndProject (72) -+- * Project (71) - +- BroadcastNestedLoopJoin Inner BuildRight (70) - :- * HashAggregate (47) - : +- Exchange (46) - : +- * HashAggregate (45) - : +- * Project (44) - : +- * BroadcastHashJoin Inner BuildRight (43) - : :- * Project (37) - : : +- * BroadcastHashJoin Inner BuildRight (36) - : : :- * Project (30) - : : : +- * BroadcastHashJoin Inner BuildRight (29) - : : : :- * Project (24) - : : : : +- * BroadcastHashJoin Inner BuildRight (23) - : : : : :- * Project (17) - : : : : : +- * BroadcastHashJoin Inner BuildRight (16) - : : : : : :- * Project (10) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : : :- * Filter (3) - : : : : : : : +- * ColumnarToRow (2) - : : : : : : : +- Scan parquet default.store_sales (1) - : : : : : : +- BroadcastExchange (8) - : : : : : : +- * Project (7) - : : : : : : +- * Filter (6) - : : : : : : +- * ColumnarToRow (5) - : : : : : : +- Scan parquet default.store (4) - : : : : : +- BroadcastExchange (15) - : : : : : +- * Project (14) - : : : : : +- * Filter (13) - : : : : : +- * ColumnarToRow (12) - : : : : : +- Scan parquet default.promotion (11) - : : : : +- BroadcastExchange (22) - : : : : +- * Project (21) - : : : : +- * Filter (20) - : : : : +- * ColumnarToRow (19) - : : : : +- Scan parquet default.date_dim (18) - : : : +- BroadcastExchange (28) - : : : +- * Filter (27) - : : : +- * ColumnarToRow (26) - : : : +- Scan parquet default.customer (25) - : : +- BroadcastExchange (35) - : : +- * Project (34) - : : +- * Filter (33) - : : +- * ColumnarToRow (32) - : : +- Scan parquet default.customer_address (31) - : +- BroadcastExchange (42) - : +- * Project (41) - : +- * Filter (40) - : +- * ColumnarToRow (39) - : +- Scan parquet default.item (38) - +- BroadcastExchange (69) - +- * HashAggregate (68) - +- Exchange (67) - +- * HashAggregate (66) - +- * Project (65) - +- * BroadcastHashJoin Inner BuildRight (64) - :- * Project (62) - : +- * BroadcastHashJoin Inner BuildRight (61) - : :- * Project (59) - : : +- * BroadcastHashJoin Inner BuildRight (58) - : : :- * Project (56) - : : : +- * BroadcastHashJoin Inner BuildRight (55) - : : : :- * Project (53) - : : : : +- * BroadcastHashJoin Inner BuildRight (52) - : : : : :- * Filter (50) - : : : : : +- * ColumnarToRow (49) - : : : : : +- Scan parquet default.store_sales (48) - : : : : +- ReusedExchange (51) - : : : +- ReusedExchange (54) - : : +- ReusedExchange (57) - : +- ReusedExchange (60) - +- ReusedExchange (63) - - -(1) Scan parquet default.store_sales -Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 7] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] - -(3) Filter [codegen id : 7] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] -Condition : ((((isnotnull(ss_store_sk#4) AND isnotnull(ss_promo_sk#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_item_sk#2)) - -(4) Scan parquet default.store -Output [2]: [s_store_sk#7, s_gmt_offset#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [s_store_sk#7, s_gmt_offset#8] - -(6) Filter [codegen id : 1] -Input [2]: [s_store_sk#7, s_gmt_offset#8] -Condition : ((isnotnull(s_gmt_offset#8) AND (s_gmt_offset#8 = -5.00)) AND isnotnull(s_store_sk#7)) - -(7) Project [codegen id : 1] -Output [1]: [s_store_sk#7] -Input [2]: [s_store_sk#7, s_gmt_offset#8] - -(8) BroadcastExchange -Input [1]: [s_store_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] - -(9) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#7] -Join condition: None - -(10) Project [codegen id : 7] -Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_promo_sk#5, ss_ext_sales_price#6] -Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6, s_store_sk#7] - -(11) Scan parquet default.promotion -Output [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/promotion] -PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull(p_promo_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] - -(13) Filter [codegen id : 2] -Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] -Condition : ((((p_channel_dmail#11 = Y) OR (p_channel_email#12 = Y)) OR (p_channel_tv#13 = Y)) AND isnotnull(p_promo_sk#10)) - -(14) Project [codegen id : 2] -Output [1]: [p_promo_sk#10] -Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] - -(15) BroadcastExchange -Input [1]: [p_promo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] - -(16) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_promo_sk#5] -Right keys [1]: [p_promo_sk#10] -Join condition: None - -(17) Project [codegen id : 7] -Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_promo_sk#5, ss_ext_sales_price#6, p_promo_sk#10] - -(18) Scan parquet default.date_dim -Output [3]: [d_date_sk#15, d_year#16, d_moy#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [3]: [d_date_sk#15, d_year#16, d_moy#17] - -(20) Filter [codegen id : 3] -Input [3]: [d_date_sk#15, d_year#16, d_moy#17] -Condition : ((((isnotnull(d_year#16) AND isnotnull(d_moy#17)) AND (d_year#16 = 1998)) AND (d_moy#17 = 11)) AND isnotnull(d_date_sk#15)) - -(21) Project [codegen id : 3] -Output [1]: [d_date_sk#15] -Input [3]: [d_date_sk#15, d_year#16, d_moy#17] - -(22) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] - -(23) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#15] -Join condition: None - -(24) Project [codegen id : 7] -Output [3]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, d_date_sk#15] - -(25) Scan parquet default.customer -Output [2]: [c_customer_sk#19, c_current_addr_sk#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(26) ColumnarToRow [codegen id : 4] -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] - -(27) Filter [codegen id : 4] -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] -Condition : (isnotnull(c_customer_sk#19) AND isnotnull(c_current_addr_sk#20)) - -(28) BroadcastExchange -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] - -(29) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_customer_sk#3] -Right keys [1]: [c_customer_sk#19] -Join condition: None - -(30) Project [codegen id : 7] -Output [3]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20] -Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, c_customer_sk#19, c_current_addr_sk#20] - -(31) Scan parquet default.customer_address -Output [2]: [ca_address_sk#22, ca_gmt_offset#23] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(32) ColumnarToRow [codegen id : 5] -Input [2]: [ca_address_sk#22, ca_gmt_offset#23] - -(33) Filter [codegen id : 5] -Input [2]: [ca_address_sk#22, ca_gmt_offset#23] -Condition : ((isnotnull(ca_gmt_offset#23) AND (ca_gmt_offset#23 = -5.00)) AND isnotnull(ca_address_sk#22)) - -(34) Project [codegen id : 5] -Output [1]: [ca_address_sk#22] -Input [2]: [ca_address_sk#22, ca_gmt_offset#23] - -(35) BroadcastExchange -Input [1]: [ca_address_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] - -(36) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#20] -Right keys [1]: [ca_address_sk#22] -Join condition: None - -(37) Project [codegen id : 7] -Output [2]: [ss_item_sk#2, ss_ext_sales_price#6] -Input [4]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20, ca_address_sk#22] - -(38) Scan parquet default.item -Output [2]: [i_item_sk#25, i_category#26] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)] -ReadSchema: struct - -(39) ColumnarToRow [codegen id : 6] -Input [2]: [i_item_sk#25, i_category#26] - -(40) Filter [codegen id : 6] -Input [2]: [i_item_sk#25, i_category#26] -Condition : ((isnotnull(i_category#26) AND (i_category#26 = Jewelry)) AND isnotnull(i_item_sk#25)) - -(41) Project [codegen id : 6] -Output [1]: [i_item_sk#25] -Input [2]: [i_item_sk#25, i_category#26] - -(42) BroadcastExchange -Input [1]: [i_item_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] - -(43) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#25] -Join condition: None - -(44) Project [codegen id : 7] -Output [1]: [ss_ext_sales_price#6] -Input [3]: [ss_item_sk#2, ss_ext_sales_price#6, i_item_sk#25] - -(45) HashAggregate [codegen id : 7] -Input [1]: [ss_ext_sales_price#6] -Keys: [] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum#28] -Results [1]: [sum#29] - -(46) Exchange -Input [1]: [sum#29] -Arguments: SinglePartition, true, [id=#30] - -(47) HashAggregate [codegen id : 8] -Input [1]: [sum#29] -Keys: [] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#31] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#31,17,2) AS promotions#32] - -(48) Scan parquet default.store_sales -Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(49) ColumnarToRow [codegen id : 14] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] - -(50) Filter [codegen id : 14] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] -Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_item_sk#2)) - -(51) ReusedExchange [Reuses operator id: 8] -Output [1]: [s_store_sk#7] - -(52) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#7] -Join condition: None - -(53) Project [codegen id : 14] -Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6, s_store_sk#7] - -(54) ReusedExchange [Reuses operator id: 22] -Output [1]: [d_date_sk#15] - -(55) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#15] -Join condition: None - -(56) Project [codegen id : 14] -Output [3]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, d_date_sk#15] - -(57) ReusedExchange [Reuses operator id: 28] -Output [2]: [c_customer_sk#19, c_current_addr_sk#20] - -(58) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ss_customer_sk#3] -Right keys [1]: [c_customer_sk#19] -Join condition: None - -(59) Project [codegen id : 14] -Output [3]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20] -Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, c_customer_sk#19, c_current_addr_sk#20] - -(60) ReusedExchange [Reuses operator id: 35] -Output [1]: [ca_address_sk#22] - -(61) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [c_current_addr_sk#20] -Right keys [1]: [ca_address_sk#22] -Join condition: None - -(62) Project [codegen id : 14] -Output [2]: [ss_item_sk#2, ss_ext_sales_price#6] -Input [4]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20, ca_address_sk#22] - -(63) ReusedExchange [Reuses operator id: 42] -Output [1]: [i_item_sk#25] - -(64) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#25] -Join condition: None - -(65) Project [codegen id : 14] -Output [1]: [ss_ext_sales_price#6] -Input [3]: [ss_item_sk#2, ss_ext_sales_price#6, i_item_sk#25] - -(66) HashAggregate [codegen id : 14] -Input [1]: [ss_ext_sales_price#6] -Keys: [] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum#33] -Results [1]: [sum#34] - -(67) Exchange -Input [1]: [sum#34] -Arguments: SinglePartition, true, [id=#35] - -(68) HashAggregate [codegen id : 15] -Input [1]: [sum#34] -Keys: [] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#36] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#36,17,2) AS total#37] - -(69) BroadcastExchange -Input [1]: [total#37] -Arguments: IdentityBroadcastMode, [id=#38] - -(70) BroadcastNestedLoopJoin -Join condition: None - -(71) Project [codegen id : 16] -Output [3]: [promotions#32, total#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#37 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] -Input [2]: [promotions#32, total#37] - -(72) TakeOrderedAndProject -Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] -Arguments: 100, [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] - +TakeOrderedAndProject(limit=100, orderBy=[promotions#1 ASC NULLS FIRST,total#2 ASC NULLS FIRST], output=[promotions#1,total#2,(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#3]) ++- *(16) Project [promotions#1, total#2, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#1 as decimal(15,4))) / promote_precision(cast(total#2 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#3] + +- BroadcastNestedLoopJoin BuildRight, Inner + :- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- Exchange SinglePartition + : +- *(7) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + : +- *(7) Project [ss_ext_sales_price#4] + : +- *(7) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + : :- *(7) Project [ss_item_sk#5, ss_ext_sales_price#4] + : : +- *(7) BroadcastHashJoin [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : : :- *(7) Project [ss_item_sk#5, ss_ext_sales_price#4, c_current_addr_sk#7] + : : : +- *(7) BroadcastHashJoin [ss_customer_sk#9], [c_customer_sk#10], Inner, BuildRight + : : : :- *(7) Project [ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : : : +- *(7) BroadcastHashJoin [ss_promo_sk#13], [p_promo_sk#14], Inner, BuildRight + : : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_promo_sk#13, ss_ext_sales_price#4] + : : : : : : +- *(7) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : : : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_promo_sk#13, ss_ext_sales_price#4] + : : : : : : : +- *(7) Filter ((((isnotnull(ss_store_sk#15) && isnotnull(ss_promo_sk#13)) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) + : : : : : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_promo_sk#13,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(2) Project [p_promo_sk#14] + : : : : : +- *(2) Filter ((((p_channel_dmail#18 = Y) || (p_channel_email#19 = Y)) || (p_channel_tv#20 = Y)) && isnotnull(p_promo_sk#14)) + : : : : : +- *(2) FileScan parquet default.promotion[p_promo_sk#14,p_channel_dmail#18,p_channel_email#19,p_channel_tv#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [d_date_sk#12] + : : : : +- *(3) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#22)) && (d_year#21 = 1998)) && (d_moy#22 = 11)) && isnotnull(d_date_sk#12)) + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_year#21,d_moy#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [c_customer_sk#10, c_current_addr_sk#7] + : : : +- *(4) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#7)) + : : : +- *(4) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [ca_address_sk#8] + : : +- *(5) Filter ((isnotnull(ca_gmt_offset#23) && (ca_gmt_offset#23 = -5.00)) && isnotnull(ca_address_sk#8)) + : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [i_item_sk#6] + : +- *(6) Filter ((isnotnull(i_category#24) && (i_category#24 = Jewelry)) && isnotnull(i_item_sk#6)) + : +- *(6) FileScan parquet default.item[i_item_sk#6,i_category#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange IdentityBroadcastMode + +- *(15) HashAggregate(keys=[], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) + +- Exchange SinglePartition + +- *(14) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) + +- *(14) Project [ss_ext_sales_price#4] + +- *(14) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight + :- *(14) Project [ss_item_sk#5, ss_ext_sales_price#4] + : +- *(14) BroadcastHashJoin [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight + : :- *(14) Project [ss_item_sk#5, ss_ext_sales_price#4, c_current_addr_sk#7] + : : +- *(14) BroadcastHashJoin [ss_customer_sk#9], [c_customer_sk#10], Inner, BuildRight + : : :- *(14) Project [ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : +- *(14) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : : :- *(14) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] + : : : : +- *(14) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : : : : :- *(14) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_ext_sales_price#4] + : : : : : +- *(14) Filter (((isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) + : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item..., ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] - -(3) Filter [codegen id : 5] -Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] -Condition : (((isnotnull(ws_warehouse_sk#5) AND isnotnull(ws_ship_mode_sk#4)) AND isnotnull(ws_web_site_sk#3)) AND isnotnull(ws_ship_date_sk#2)) - -(4) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/warehouse] -PushedFilters: [IsNotNull(w_warehouse_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] - -(6) Filter [codegen id : 1] -Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] -Condition : isnotnull(w_warehouse_sk#6) - -(7) BroadcastExchange -Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] - -(8) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_warehouse_sk#5] -Right keys [1]: [w_warehouse_sk#6] -Join condition: None - -(9) Project [codegen id : 5] -Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, w_warehouse_name#7] -Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5, w_warehouse_sk#6, w_warehouse_name#7] - -(10) Scan parquet default.ship_mode -Output [2]: [sm_ship_mode_sk#9, sm_type#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/ship_mode] -PushedFilters: [IsNotNull(sm_ship_mode_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [sm_ship_mode_sk#9, sm_type#10] - -(12) Filter [codegen id : 2] -Input [2]: [sm_ship_mode_sk#9, sm_type#10] -Condition : isnotnull(sm_ship_mode_sk#9) - -(13) BroadcastExchange -Input [2]: [sm_ship_mode_sk#9, sm_type#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] - -(14) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_ship_mode_sk#4] -Right keys [1]: [sm_ship_mode_sk#9] -Join condition: None - -(15) Project [codegen id : 5] -Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, w_warehouse_name#7, sm_type#10] -Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, w_warehouse_name#7, sm_ship_mode_sk#9, sm_type#10] - -(16) Scan parquet default.web_site -Output [2]: [web_site_sk#12, web_name#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_site] -PushedFilters: [IsNotNull(web_site_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [2]: [web_site_sk#12, web_name#13] - -(18) Filter [codegen id : 3] -Input [2]: [web_site_sk#12, web_name#13] -Condition : isnotnull(web_site_sk#12) - -(19) BroadcastExchange -Input [2]: [web_site_sk#12, web_name#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] - -(20) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_web_site_sk#3] -Right keys [1]: [web_site_sk#12] -Join condition: None - -(21) Project [codegen id : 5] -Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13] -Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, w_warehouse_name#7, sm_type#10, web_site_sk#12, web_name#13] - -(22) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_month_seq#16] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct - -(23) ColumnarToRow [codegen id : 4] -Input [2]: [d_date_sk#15, d_month_seq#16] - -(24) Filter [codegen id : 4] -Input [2]: [d_date_sk#15, d_month_seq#16] -Condition : (((isnotnull(d_month_seq#16) AND (d_month_seq#16 >= 1200)) AND (d_month_seq#16 <= 1211)) AND isnotnull(d_date_sk#15)) - -(25) Project [codegen id : 4] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_month_seq#16] - -(26) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] - -(27) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_ship_date_sk#2] -Right keys [1]: [d_date_sk#15] -Join condition: None - -(28) Project [codegen id : 5] -Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13] -Input [6]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13, d_date_sk#15] - -(29) HashAggregate [codegen id : 5] -Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13] -Keys [3]: [substr(w_warehouse_name#7, 1, 20) AS substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13] -Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] -Results [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] - -(30) Exchange -Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, 5), true, [id=#29] - -(31) HashAggregate [codegen id : 6] -Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Keys [3]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13] -Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] -Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] - -(32) TakeOrderedAndProject -Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] -Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#10 ASC NULLS FIRST, web_name#13 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] - +TakeOrderedAndProject(limit=100, orderBy=[substring(w_warehouse_name, 1, 20)#1 ASC NULLS FIRST,sm_type#2 ASC NULLS FIRST,web_name#3 ASC NULLS FIRST], output=[substring(w_warehouse_name, 1, 20)#1,sm_type#2,web_name#3,30 days #4,31 - 60 days #5,61 - 90 days #6,91 - 120 days #7,>120 days #8]) ++- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3, 5) + +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, w_warehouse_name#9, sm_type#2, web_name#3] + +- *(5) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight + :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, w_warehouse_name#9, sm_type#2, web_name#3] + : +- *(5) BroadcastHashJoin [ws_web_site_sk#14], [web_site_sk#15], Inner, BuildRight + : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, w_warehouse_name#9, sm_type#2] + : : +- *(5) BroadcastHashJoin [ws_ship_mode_sk#16], [sm_ship_mode_sk#17], Inner, BuildRight + : : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, ws_ship_mode_sk#16, w_warehouse_name#9] + : : : +- *(5) BroadcastHashJoin [ws_warehouse_sk#18], [w_warehouse_sk#19], Inner, BuildRight + : : : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, ws_ship_mode_sk#16, ws_warehouse_sk#18] + : : : : +- *(5) Filter (((isnotnull(ws_warehouse_sk#18) && isnotnull(ws_ship_mode_sk#16)) && isnotnull(ws_web_site_sk#14)) && isnotnull(ws_ship_date_sk#11)) + : : : : +- *(5) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_ship_date_sk#11,ws_web_site_sk#14,ws_ship_mode_sk#16,ws_warehouse_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] + : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) + : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [web_site_sk#15, web_name#3] + : +- *(3) Filter isnotnull(web_site_sk#15) + : +- *(3) FileScan parquet default.web_site[web_site_sk#15,web_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#13] + +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt index 803326b2a..b8879730e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt @@ -1,48 +1,42 @@ -TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] - WholeStageCodegen (6) - HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sm_type,substring(w_warehouse_name, 1, 20),web_name] + WholeStageCodegen + HashAggregate [sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),web_name] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] InputAdapter - Exchange [substr(w_warehouse_name, 1, 20),sm_type,web_name] #1 - WholeStageCodegen (5) - HashAggregate [w_warehouse_name,sm_type,web_name,ws_ship_date_sk,ws_sold_date_sk] [sum,sum,sum,sum,sum,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum] - Project [ws_sold_date_sk,ws_ship_date_sk,w_warehouse_name,sm_type,web_name] - BroadcastHashJoin [ws_ship_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_ship_date_sk,w_warehouse_name,sm_type,web_name] - BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,w_warehouse_name,sm_type] - BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] - Project [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,w_warehouse_name] - BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] - Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk] + Exchange [sm_type,substring(w_warehouse_name, 1, 20),web_name] #1 + WholeStageCodegen + HashAggregate [sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] [substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + Project [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [sm_type,w_warehouse_name,ws_ship_date_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk] + Project [w_warehouse_name,ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [w_warehouse_sk,ws_warehouse_sk] + Project [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] + Filter [ws_ship_date_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [w_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] + WholeStageCodegen + Project [w_warehouse_name,w_warehouse_sk] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [sm_ship_mode_sk] - ColumnarToRow - InputAdapter - Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] + WholeStageCodegen + Project [sm_ship_mode_sk,sm_type] + Filter [sm_ship_mode_sk] + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] [sm_ship_mode_sk,sm_type] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_name] + WholeStageCodegen + Project [web_name,web_site_sk] + Filter [web_site_sk] + Scan parquet default.web_site [web_name,web_site_sk] [web_name,web_site_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt index 284a9203a..98e27704a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt @@ -1,180 +1,31 @@ == Physical Plan == -TakeOrderedAndProject (32) -+- * Project (31) - +- * Filter (30) - +- Window (29) - +- * Sort (28) - +- Exchange (27) - +- * HashAggregate (26) - +- Exchange (25) - +- * HashAggregate (24) - +- * Project (23) - +- * BroadcastHashJoin Inner BuildRight (22) - :- * Project (17) - : +- * BroadcastHashJoin Inner BuildRight (16) - : :- * Project (10) - : : +- * BroadcastHashJoin Inner BuildRight (9) - : : :- * Project (4) - : : : +- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.item (1) - : : +- BroadcastExchange (8) - : : +- * Filter (7) - : : +- * ColumnarToRow (6) - : : +- Scan parquet default.store_sales (5) - : +- BroadcastExchange (15) - : +- * Project (14) - : +- * Filter (13) - : +- * ColumnarToRow (12) - : +- Scan parquet default.date_dim (11) - +- BroadcastExchange (21) - +- * Filter (20) - +- * ColumnarToRow (19) - +- Scan parquet default.store (18) - - -(1) Scan parquet default.item -Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,refernece,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] - -(3) Filter [codegen id : 4] -Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] -Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,refernece,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) - -(4) Project [codegen id : 4] -Output [2]: [i_item_sk#1, i_manager_id#5] -Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] - -(5) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] - -(7) Filter [codegen id : 1] -Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] -Condition : ((isnotnull(ss_item_sk#11) AND isnotnull(ss_sold_date_sk#10)) AND isnotnull(ss_store_sk#12)) - -(8) BroadcastExchange -Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#14] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#11] -Join condition: None - -(10) Project [codegen id : 4] -Output [4]: [i_manager_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13] -Input [6]: [i_item_sk#1, i_manager_id#5, ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] - -(11) Scan parquet default.date_dim -Output [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] - -(13) Filter [codegen id : 2] -Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] -Condition : (d_month_seq#16 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) AND isnotnull(d_date_sk#15)) - -(14) Project [codegen id : 2] -Output [2]: [d_date_sk#15, d_moy#17] -Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] - -(15) BroadcastExchange -Input [2]: [d_date_sk#15, d_moy#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] - -(16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#10] -Right keys [1]: [d_date_sk#15] -Join condition: None - -(17) Project [codegen id : 4] -Output [4]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, d_moy#17] -Input [6]: [i_manager_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13, d_date_sk#15, d_moy#17] - -(18) Scan parquet default.store -Output [1]: [s_store_sk#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [1]: [s_store_sk#19] - -(20) Filter [codegen id : 3] -Input [1]: [s_store_sk#19] -Condition : isnotnull(s_store_sk#19) - -(21) BroadcastExchange -Input [1]: [s_store_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] - -(22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#12] -Right keys [1]: [s_store_sk#19] -Join condition: None - -(23) Project [codegen id : 4] -Output [3]: [i_manager_id#5, ss_sales_price#13, d_moy#17] -Input [5]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, d_moy#17, s_store_sk#19] - -(24) HashAggregate [codegen id : 4] -Input [3]: [i_manager_id#5, ss_sales_price#13, d_moy#17] -Keys [2]: [i_manager_id#5, d_moy#17] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum#21] -Results [3]: [i_manager_id#5, d_moy#17, sum#22] - -(25) Exchange -Input [3]: [i_manager_id#5, d_moy#17, sum#22] -Arguments: hashpartitioning(i_manager_id#5, d_moy#17, 5), true, [id=#23] - -(26) HashAggregate [codegen id : 5] -Input [3]: [i_manager_id#5, d_moy#17, sum#22] -Keys [2]: [i_manager_id#5, d_moy#17] -Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#24] -Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS _w0#26] - -(27) Exchange -Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] -Arguments: hashpartitioning(i_manager_id#5, 5), true, [id=#27] - -(28) Sort [codegen id : 6] -Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] -Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 - -(29) Window -Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] -Arguments: [avg(_w0#26) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#28], [i_manager_id#5] - -(30) Filter [codegen id : 7] -Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] -Condition : (CASE WHEN (avg_monthly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) - -(31) Project [codegen id : 7] -Output [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] -Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] - -(32) TakeOrderedAndProject -Input [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] -Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST], [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] - +TakeOrderedAndProject(limit=100, orderBy=[i_manager_id#1 ASC NULLS FIRST,avg_monthly_sales#2 ASC NULLS FIRST,sum_sales#3 ASC NULLS FIRST], output=[i_manager_id#1,sum_sales#3,avg_monthly_sales#2]) ++- *(7) Project [i_manager_id#1, sum_sales#3, avg_monthly_sales#2] + +- *(7) Filter (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#3 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) + +- Window [avg(_w0#4) windowspecdefinition(i_manager_id#1, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_manager_id#1] + +- *(6) Sort [i_manager_id#1 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_manager_id#1, 5) + +- *(5) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) + +- Exchange hashpartitioning(i_manager_id#1, d_moy#5, 5) + +- *(4) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) + +- *(4) Project [i_manager_id#1, ss_sales_price#6, d_moy#5] + +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight + :- *(4) Project [i_manager_id#1, ss_store_sk#7, ss_sales_price#6, d_moy#5] + : +- *(4) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight + : :- *(4) Project [i_manager_id#1, ss_sold_date_sk#9, ss_store_sk#7, ss_sales_price#6] + : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight + : : :- *(4) Project [i_item_sk#11, i_manager_id#1] + : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,refernece,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manager_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,refernece..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] + : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#10, d_moy#5] + : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_moy#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#8] + +- *(3) Filter isnotnull(s_store_sk#8) + +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt index 7272c01ab..fc602334c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt @@ -1,49 +1,43 @@ -TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] - WholeStageCodegen (7) - Project [i_manager_id,sum_sales,avg_monthly_sales] +TakeOrderedAndProject [avg_monthly_sales,i_manager_id,sum_sales] + WholeStageCodegen + Project [avg_monthly_sales,i_manager_id,sum_sales] Filter [avg_monthly_sales,sum_sales] InputAdapter Window [_w0,i_manager_id] - WholeStageCodegen (6) + WholeStageCodegen Sort [i_manager_id] InputAdapter Exchange [i_manager_id] #1 - WholeStageCodegen (5) - HashAggregate [i_manager_id,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + WholeStageCodegen + HashAggregate [d_moy,i_manager_id,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] InputAdapter - Exchange [i_manager_id,d_moy] #2 - WholeStageCodegen (4) - HashAggregate [i_manager_id,d_moy,ss_sales_price] [sum,sum] - Project [i_manager_id,ss_sales_price,d_moy] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_manager_id,ss_store_sk,ss_sales_price,d_moy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_manager_id,ss_sold_date_sk,ss_store_sk,ss_sales_price] + Exchange [d_moy,i_manager_id] #2 + WholeStageCodegen + HashAggregate [d_moy,i_manager_id,ss_sales_price,sum,sum] [sum,sum] + Project [d_moy,i_manager_id,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,i_manager_id,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_manager_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_manager_id] - Filter [i_category,i_class,i_brand,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id] + Filter [i_brand,i_category,i_class,i_item_sk] + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manager_id] [i_brand,i_category,i_class,i_item_sk,i_manager_id] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + WholeStageCodegen + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] [d_date_sk,d_month_seq,d_moy] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk] + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt index 0e658ff99..9319775c4 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt @@ -1,918 +1,170 @@ == Physical Plan == -* Sort (170) -+- Exchange (169) - +- * Project (168) - +- * BroadcastHashJoin Inner BuildRight (167) - :- * HashAggregate (105) - : +- Exchange (104) - : +- * HashAggregate (103) - : +- * Project (102) - : +- * BroadcastHashJoin Inner BuildRight (101) - : :- * Project (95) - : : +- * BroadcastHashJoin Inner BuildRight (94) - : : :- * Project (92) - : : : +- * BroadcastHashJoin Inner BuildRight (91) - : : : :- * Project (86) - : : : : +- * BroadcastHashJoin Inner BuildRight (85) - : : : : :- * Project (83) - : : : : : +- * BroadcastHashJoin Inner BuildRight (82) - : : : : : :- * Project (77) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (76) - : : : : : : :- * Project (74) - : : : : : : : +- * BroadcastHashJoin Inner BuildRight (73) - : : : : : : : :- * Project (68) - : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (67) - : : : : : : : : :- * Project (62) - : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (61) - : : : : : : : : : :- * Project (59) - : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (58) - : : : : : : : : : : :- * Project (53) - : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (52) - : : : : : : : : : : : :- * Project (50) - : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (49) - : : : : : : : : : : : : :- * Project (44) - : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (43) - : : : : : : : : : : : : : :- * Project (38) - : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (37) - : : : : : : : : : : : : : : :- * Project (32) - : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (31) - : : : : : : : : : : : : : : : :- * Project (26) - : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (25) - : : : : : : : : : : : : : : : : :- * Project (9) - : : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : : : : : : : : : : : : : : :- * Filter (3) - : : : : : : : : : : : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : : : : : : : : : : : +- Scan parquet default.store_sales (1) - : : : : : : : : : : : : : : : : : +- BroadcastExchange (7) - : : : : : : : : : : : : : : : : : +- * Filter (6) - : : : : : : : : : : : : : : : : : +- * ColumnarToRow (5) - : : : : : : : : : : : : : : : : : +- Scan parquet default.store_returns (4) - : : : : : : : : : : : : : : : : +- BroadcastExchange (24) - : : : : : : : : : : : : : : : : +- * Project (23) - : : : : : : : : : : : : : : : : +- * Filter (22) - : : : : : : : : : : : : : : : : +- * HashAggregate (21) - : : : : : : : : : : : : : : : : +- Exchange (20) - : : : : : : : : : : : : : : : : +- * HashAggregate (19) - : : : : : : : : : : : : : : : : +- * Project (18) - : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) - : : : : : : : : : : : : : : : : :- * Filter (12) - : : : : : : : : : : : : : : : : : +- * ColumnarToRow (11) - : : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_sales (10) - : : : : : : : : : : : : : : : : +- BroadcastExchange (16) - : : : : : : : : : : : : : : : : +- * Filter (15) - : : : : : : : : : : : : : : : : +- * ColumnarToRow (14) - : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_returns (13) - : : : : : : : : : : : : : : : +- BroadcastExchange (30) - : : : : : : : : : : : : : : : +- * Filter (29) - : : : : : : : : : : : : : : : +- * ColumnarToRow (28) - : : : : : : : : : : : : : : : +- Scan parquet default.date_dim (27) - : : : : : : : : : : : : : : +- BroadcastExchange (36) - : : : : : : : : : : : : : : +- * Filter (35) - : : : : : : : : : : : : : : +- * ColumnarToRow (34) - : : : : : : : : : : : : : : +- Scan parquet default.store (33) - : : : : : : : : : : : : : +- BroadcastExchange (42) - : : : : : : : : : : : : : +- * Filter (41) - : : : : : : : : : : : : : +- * ColumnarToRow (40) - : : : : : : : : : : : : : +- Scan parquet default.customer (39) - : : : : : : : : : : : : +- BroadcastExchange (48) - : : : : : : : : : : : : +- * Filter (47) - : : : : : : : : : : : : +- * ColumnarToRow (46) - : : : : : : : : : : : : +- Scan parquet default.date_dim (45) - : : : : : : : : : : : +- ReusedExchange (51) - : : : : : : : : : : +- BroadcastExchange (57) - : : : : : : : : : : +- * Filter (56) - : : : : : : : : : : +- * ColumnarToRow (55) - : : : : : : : : : : +- Scan parquet default.customer_demographics (54) - : : : : : : : : : +- ReusedExchange (60) - : : : : : : : : +- BroadcastExchange (66) - : : : : : : : : +- * Filter (65) - : : : : : : : : +- * ColumnarToRow (64) - : : : : : : : : +- Scan parquet default.promotion (63) - : : : : : : : +- BroadcastExchange (72) - : : : : : : : +- * Filter (71) - : : : : : : : +- * ColumnarToRow (70) - : : : : : : : +- Scan parquet default.household_demographics (69) - : : : : : : +- ReusedExchange (75) - : : : : : +- BroadcastExchange (81) - : : : : : +- * Filter (80) - : : : : : +- * ColumnarToRow (79) - : : : : : +- Scan parquet default.customer_address (78) - : : : : +- ReusedExchange (84) - : : : +- BroadcastExchange (90) - : : : +- * Filter (89) - : : : +- * ColumnarToRow (88) - : : : +- Scan parquet default.income_band (87) - : : +- ReusedExchange (93) - : +- BroadcastExchange (100) - : +- * Project (99) - : +- * Filter (98) - : +- * ColumnarToRow (97) - : +- Scan parquet default.item (96) - +- BroadcastExchange (166) - +- * HashAggregate (165) - +- Exchange (164) - +- * HashAggregate (163) - +- * Project (162) - +- * BroadcastHashJoin Inner BuildRight (161) - :- * Project (159) - : +- * BroadcastHashJoin Inner BuildRight (158) - : :- * Project (156) - : : +- * BroadcastHashJoin Inner BuildRight (155) - : : :- * Project (153) - : : : +- * BroadcastHashJoin Inner BuildRight (152) - : : : :- * Project (150) - : : : : +- * BroadcastHashJoin Inner BuildRight (149) - : : : : :- * Project (147) - : : : : : +- * BroadcastHashJoin Inner BuildRight (146) - : : : : : :- * Project (144) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (143) - : : : : : : :- * Project (141) - : : : : : : : +- * BroadcastHashJoin Inner BuildRight (140) - : : : : : : : :- * Project (138) - : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (137) - : : : : : : : : :- * Project (135) - : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (134) - : : : : : : : : : :- * Project (132) - : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (131) - : : : : : : : : : : :- * Project (129) - : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (128) - : : : : : : : : : : : :- * Project (126) - : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (125) - : : : : : : : : : : : : :- * Project (123) - : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (122) - : : : : : : : : : : : : : :- * Project (120) - : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (119) - : : : : : : : : : : : : : : :- * Project (114) - : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (113) - : : : : : : : : : : : : : : : :- * Project (111) - : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (110) - : : : : : : : : : : : : : : : : :- * Filter (108) - : : : : : : : : : : : : : : : : : +- * ColumnarToRow (107) - : : : : : : : : : : : : : : : : : +- Scan parquet default.store_sales (106) - : : : : : : : : : : : : : : : : +- ReusedExchange (109) - : : : : : : : : : : : : : : : +- ReusedExchange (112) - : : : : : : : : : : : : : : +- BroadcastExchange (118) - : : : : : : : : : : : : : : +- * Filter (117) - : : : : : : : : : : : : : : +- * ColumnarToRow (116) - : : : : : : : : : : : : : : +- Scan parquet default.date_dim (115) - : : : : : : : : : : : : : +- ReusedExchange (121) - : : : : : : : : : : : : +- ReusedExchange (124) - : : : : : : : : : : : +- ReusedExchange (127) - : : : : : : : : : : +- ReusedExchange (130) - : : : : : : : : : +- ReusedExchange (133) - : : : : : : : : +- ReusedExchange (136) - : : : : : : : +- ReusedExchange (139) - : : : : : : +- ReusedExchange (142) - : : : : : +- ReusedExchange (145) - : : : : +- ReusedExchange (148) - : : : +- ReusedExchange (151) - : : +- ReusedExchange (154) - : +- ReusedExchange (157) - +- ReusedExchange (160) - - -(1) Scan parquet default.store_sales -Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 20] -Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] - -(3) Filter [codegen id : 20] -Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] -Condition : ((((((((isnotnull(ss_item_sk#2) AND isnotnull(ss_ticket_number#9)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_cdemo_sk#4)) AND isnotnull(ss_promo_sk#8)) AND isnotnull(ss_hdemo_sk#5)) AND isnotnull(ss_addr_sk#6)) - -(4) Scan parquet default.store_returns -Output [2]: [sr_item_sk#13, sr_ticket_number#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [sr_item_sk#13, sr_ticket_number#14] - -(6) Filter [codegen id : 1] -Input [2]: [sr_item_sk#13, sr_ticket_number#14] -Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) - -(7) BroadcastExchange -Input [2]: [sr_item_sk#13, sr_ticket_number#14] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#15] - -(8) BroadcastHashJoin [codegen id : 20] -Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] -Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] -Join condition: None - -(9) Project [codegen id : 20] -Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] -Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#13, sr_ticket_number#14] - -(10) Scan parquet default.catalog_sales -Output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 3] -Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] - -(12) Filter [codegen id : 3] -Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] -Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) - -(13) Scan parquet default.catalog_returns -Output [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] -ReadSchema: struct - -(14) ColumnarToRow [codegen id : 2] -Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] - -(15) Filter [codegen id : 2] -Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] -Condition : (isnotnull(cr_item_sk#19) AND isnotnull(cr_order_number#20)) - -(16) BroadcastExchange -Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#24] - -(17) BroadcastHashJoin [codegen id : 3] -Left keys [2]: [cs_item_sk#16, cs_order_number#17] -Right keys [2]: [cr_item_sk#19, cr_order_number#20] -Join condition: None - -(18) Project [codegen id : 3] -Output [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] -Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] - -(19) HashAggregate [codegen id : 3] -Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] -Keys [1]: [cs_item_sk#16] -Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))] -Aggregate Attributes [3]: [sum#25, sum#26, isEmpty#27] -Results [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] - -(20) Exchange -Input [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] -Arguments: hashpartitioning(cs_item_sk#16, 5), true, [id=#31] - -(21) HashAggregate [codegen id : 4] -Input [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] -Keys [1]: [cs_item_sk#16] -Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#18))#32, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#33] -Results [3]: [cs_item_sk#16, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#18))#32,17,2) AS sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] - -(22) Filter [codegen id : 4] -Input [3]: [cs_item_sk#16, sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] -Condition : (isnotnull(sum(cs_ext_list_price#18)#34) AND (cast(sum(cs_ext_list_price#18)#34 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35)), DecimalType(21,2), true))) - -(23) Project [codegen id : 4] -Output [1]: [cs_item_sk#16] -Input [3]: [cs_item_sk#16, sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] - -(24) BroadcastExchange -Input [1]: [cs_item_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] - -(25) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [cs_item_sk#16] -Join condition: None - -(26) Project [codegen id : 20] -Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] -Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#16] - -(27) Scan parquet default.date_dim -Output [2]: [d_date_sk#37, d_year#38] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] -ReadSchema: struct - -(28) ColumnarToRow [codegen id : 5] -Input [2]: [d_date_sk#37, d_year#38] - -(29) Filter [codegen id : 5] -Input [2]: [d_date_sk#37, d_year#38] -Condition : ((isnotnull(d_year#38) AND (d_year#38 = 1999)) AND isnotnull(d_date_sk#37)) - -(30) BroadcastExchange -Input [2]: [d_date_sk#37, d_year#38] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#39] - -(31) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#37] -Join condition: None - -(32) Project [codegen id : 20] -Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38] -Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#37, d_year#38] - -(33) Scan parquet default.store -Output [3]: [s_store_sk#40, s_store_name#41, s_zip#42] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)] -ReadSchema: struct - -(34) ColumnarToRow [codegen id : 6] -Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] - -(35) Filter [codegen id : 6] -Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] -Condition : ((isnotnull(s_store_sk#40) AND isnotnull(s_store_name#41)) AND isnotnull(s_zip#42)) - -(36) BroadcastExchange -Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#43] - -(37) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ss_store_sk#7] -Right keys [1]: [s_store_sk#40] -Join condition: None - -(38) Project [codegen id : 20] -Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42] -Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_sk#40, s_store_name#41, s_zip#42] - -(39) Scan parquet default.customer -Output [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(40) ColumnarToRow [codegen id : 7] -Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] - -(41) Filter [codegen id : 7] -Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] -Condition : (((((isnotnull(c_customer_sk#44) AND isnotnull(c_first_sales_date_sk#49)) AND isnotnull(c_first_shipto_date_sk#48)) AND isnotnull(c_current_cdemo_sk#45)) AND isnotnull(c_current_hdemo_sk#46)) AND isnotnull(c_current_addr_sk#47)) - -(42) BroadcastExchange -Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#50] - -(43) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ss_customer_sk#3] -Right keys [1]: [c_customer_sk#44] -Join condition: None - -(44) Project [codegen id : 20] -Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] -Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] - -(45) Scan parquet default.date_dim -Output [2]: [d_date_sk#51, d_year#52] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date_sk)] -ReadSchema: struct - -(46) ColumnarToRow [codegen id : 8] -Input [2]: [d_date_sk#51, d_year#52] - -(47) Filter [codegen id : 8] -Input [2]: [d_date_sk#51, d_year#52] -Condition : isnotnull(d_date_sk#51) - -(48) BroadcastExchange -Input [2]: [d_date_sk#51, d_year#52] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#53] - -(49) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [c_first_sales_date_sk#49] -Right keys [1]: [d_date_sk#51] -Join condition: None - -(50) Project [codegen id : 20] -Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#52] -Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49, d_date_sk#51, d_year#52] - -(51) ReusedExchange [Reuses operator id: 48] -Output [2]: [d_date_sk#54, d_year#55] - -(52) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [c_first_shipto_date_sk#48] -Right keys [1]: [d_date_sk#54] -Join condition: None - -(53) Project [codegen id : 20] -Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] -Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#52, d_date_sk#54, d_year#55] - -(54) Scan parquet default.customer_demographics -Output [2]: [cd_demo_sk#56, cd_marital_status#57] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] -ReadSchema: struct - -(55) ColumnarToRow [codegen id : 10] -Input [2]: [cd_demo_sk#56, cd_marital_status#57] - -(56) Filter [codegen id : 10] -Input [2]: [cd_demo_sk#56, cd_marital_status#57] -Condition : (isnotnull(cd_demo_sk#56) AND isnotnull(cd_marital_status#57)) - -(57) BroadcastExchange -Input [2]: [cd_demo_sk#56, cd_marital_status#57] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#58] - -(58) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ss_cdemo_sk#4] -Right keys [1]: [cd_demo_sk#56] -Join condition: None - -(59) Project [codegen id : 20] -Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_marital_status#57] -Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_demo_sk#56, cd_marital_status#57] - -(60) ReusedExchange [Reuses operator id: 57] -Output [2]: [cd_demo_sk#59, cd_marital_status#60] - -(61) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [c_current_cdemo_sk#45] -Right keys [1]: [cd_demo_sk#59] -Join condition: NOT (cd_marital_status#57 = cd_marital_status#60) - -(62) Project [codegen id : 20] -Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] -Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_marital_status#57, cd_demo_sk#59, cd_marital_status#60] - -(63) Scan parquet default.promotion -Output [1]: [p_promo_sk#61] -Batched: true -Location [not included in comparison]/{warehouse_dir}/promotion] -PushedFilters: [IsNotNull(p_promo_sk)] -ReadSchema: struct - -(64) ColumnarToRow [codegen id : 12] -Input [1]: [p_promo_sk#61] - -(65) Filter [codegen id : 12] -Input [1]: [p_promo_sk#61] -Condition : isnotnull(p_promo_sk#61) - -(66) BroadcastExchange -Input [1]: [p_promo_sk#61] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] - -(67) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ss_promo_sk#8] -Right keys [1]: [p_promo_sk#61] -Join condition: None - -(68) Project [codegen id : 20] -Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] -Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, p_promo_sk#61] - -(69) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#63, hd_income_band_sk#64] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] -ReadSchema: struct - -(70) ColumnarToRow [codegen id : 13] -Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] - -(71) Filter [codegen id : 13] -Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] -Condition : (isnotnull(hd_demo_sk#63) AND isnotnull(hd_income_band_sk#64)) - -(72) BroadcastExchange -Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65] - -(73) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ss_hdemo_sk#5] -Right keys [1]: [hd_demo_sk#63] -Join condition: None - -(74) Project [codegen id : 20] -Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64] -Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_demo_sk#63, hd_income_band_sk#64] - -(75) ReusedExchange [Reuses operator id: 72] -Output [2]: [hd_demo_sk#66, hd_income_band_sk#67] - -(76) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [c_current_hdemo_sk#46] -Right keys [1]: [hd_demo_sk#66] -Join condition: None - -(77) Project [codegen id : 20] -Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67] -Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_demo_sk#66, hd_income_band_sk#67] - -(78) Scan parquet default.customer_address -Output [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk)] -ReadSchema: struct - -(79) ColumnarToRow [codegen id : 15] -Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] - -(80) Filter [codegen id : 15] -Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] -Condition : isnotnull(ca_address_sk#68) - -(81) BroadcastExchange -Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#73] - -(82) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ss_addr_sk#6] -Right keys [1]: [ca_address_sk#68] -Join condition: None - -(83) Project [codegen id : 20] -Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] -Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] - -(84) ReusedExchange [Reuses operator id: 81] -Output [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] - -(85) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [c_current_addr_sk#47] -Right keys [1]: [ca_address_sk#74] -Join condition: None - -(86) Project [codegen id : 20] -Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] -Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] - -(87) Scan parquet default.income_band -Output [1]: [ib_income_band_sk#79] -Batched: true -Location [not included in comparison]/{warehouse_dir}/income_band] -PushedFilters: [IsNotNull(ib_income_band_sk)] -ReadSchema: struct - -(88) ColumnarToRow [codegen id : 17] -Input [1]: [ib_income_band_sk#79] - -(89) Filter [codegen id : 17] -Input [1]: [ib_income_band_sk#79] -Condition : isnotnull(ib_income_band_sk#79) - -(90) BroadcastExchange -Input [1]: [ib_income_band_sk#79] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#80] - -(91) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [hd_income_band_sk#64] -Right keys [1]: [ib_income_band_sk#79] -Join condition: None - -(92) Project [codegen id : 20] -Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] -Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ib_income_band_sk#79] - -(93) ReusedExchange [Reuses operator id: 90] -Output [1]: [ib_income_band_sk#81] - -(94) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [hd_income_band_sk#67] -Right keys [1]: [ib_income_band_sk#81] -Join condition: None - -(95) Project [codegen id : 20] -Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] -Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ib_income_band_sk#81] - -(96) Scan parquet default.item -Output [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), GreaterThanOrEqual(i_current_price,64.00), IsNotNull(i_item_sk)] -ReadSchema: struct - -(97) ColumnarToRow [codegen id : 19] -Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] - -(98) Filter [codegen id : 19] -Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] -Condition : ((((((isnotnull(i_current_price#83) AND i_color#84 IN (purple,burlywood,indian,spring,floral,medium)) AND (i_current_price#83 >= 64.00)) AND (cast(i_current_price#83 as decimal(12,2)) <= 74.00)) AND (cast(i_current_price#83 as decimal(12,2)) >= 65.00)) AND (cast(i_current_price#83 as decimal(12,2)) <= 79.00)) AND isnotnull(i_item_sk#82)) - -(99) Project [codegen id : 19] -Output [2]: [i_item_sk#82, i_product_name#85] -Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] - -(100) BroadcastExchange -Input [2]: [i_item_sk#82, i_product_name#85] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#86] - -(101) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#82] -Join condition: None - -(102) Project [codegen id : 20] -Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#52, d_year#55, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] -Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] - -(103) HashAggregate [codegen id : 20] -Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#52, d_year#55, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] -Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55] -Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] -Aggregate Attributes [4]: [count#87, sum#88, sum#89, sum#90] -Results [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] - -(104) Exchange -Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] -Arguments: hashpartitioning(i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, 5), true, [id=#95] - -(105) HashAggregate [codegen id : 42] -Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] -Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55] -Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] -Aggregate Attributes [4]: [count(1)#96, sum(UnscaledValue(ss_wholesale_cost#10))#97, sum(UnscaledValue(ss_list_price#11))#98, sum(UnscaledValue(ss_coupon_amt#12))#99] -Results [17]: [i_product_name#85 AS product_name#100, i_item_sk#82 AS item_sk#101, s_store_name#41 AS store_name#102, s_zip#42 AS store_zip#103, ca_street_number#69 AS b_street_number#104, ca_street_name#70 AS b_streen_name#105, ca_city#71 AS b_city#106, ca_zip#72 AS b_zip#107, ca_street_number#75 AS c_street_number#108, ca_street_name#76 AS c_street_name#109, ca_city#77 AS c_city#110, ca_zip#78 AS c_zip#111, d_year#38 AS syear#112, count(1)#96 AS cnt#113, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#97,17,2) AS s1#114, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#98,17,2) AS s2#115, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#99,17,2) AS s3#116] - -(106) Scan parquet default.store_sales -Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] -ReadSchema: struct - -(107) ColumnarToRow [codegen id : 40] -Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] - -(108) Filter [codegen id : 40] -Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] -Condition : ((((((((isnotnull(ss_item_sk#2) AND isnotnull(ss_ticket_number#9)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_cdemo_sk#4)) AND isnotnull(ss_promo_sk#8)) AND isnotnull(ss_hdemo_sk#5)) AND isnotnull(ss_addr_sk#6)) - -(109) ReusedExchange [Reuses operator id: 7] -Output [2]: [sr_item_sk#13, sr_ticket_number#14] - -(110) BroadcastHashJoin [codegen id : 40] -Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] -Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] -Join condition: None - -(111) Project [codegen id : 40] -Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] -Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#13, sr_ticket_number#14] - -(112) ReusedExchange [Reuses operator id: 24] -Output [1]: [cs_item_sk#16] - -(113) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [cs_item_sk#16] -Join condition: None - -(114) Project [codegen id : 40] -Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] -Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#16] - -(115) Scan parquet default.date_dim -Output [2]: [d_date_sk#37, d_year#38] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(116) ColumnarToRow [codegen id : 25] -Input [2]: [d_date_sk#37, d_year#38] - -(117) Filter [codegen id : 25] -Input [2]: [d_date_sk#37, d_year#38] -Condition : ((isnotnull(d_year#38) AND (d_year#38 = 2000)) AND isnotnull(d_date_sk#37)) - -(118) BroadcastExchange -Input [2]: [d_date_sk#37, d_year#38] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#117] - -(119) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#37] -Join condition: None - -(120) Project [codegen id : 40] -Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38] -Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#37, d_year#38] - -(121) ReusedExchange [Reuses operator id: 36] -Output [3]: [s_store_sk#40, s_store_name#41, s_zip#42] - -(122) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [ss_store_sk#7] -Right keys [1]: [s_store_sk#40] -Join condition: None - -(123) Project [codegen id : 40] -Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42] -Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_sk#40, s_store_name#41, s_zip#42] - -(124) ReusedExchange [Reuses operator id: 42] -Output [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] - -(125) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [ss_customer_sk#3] -Right keys [1]: [c_customer_sk#44] -Join condition: None - -(126) Project [codegen id : 40] -Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] -Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] - -(127) ReusedExchange [Reuses operator id: 48] -Output [2]: [d_date_sk#118, d_year#119] - -(128) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [c_first_sales_date_sk#49] -Right keys [1]: [d_date_sk#118] -Join condition: None - -(129) Project [codegen id : 40] -Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#119] -Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49, d_date_sk#118, d_year#119] - -(130) ReusedExchange [Reuses operator id: 48] -Output [2]: [d_date_sk#120, d_year#121] - -(131) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [c_first_shipto_date_sk#48] -Right keys [1]: [d_date_sk#120] -Join condition: None - -(132) Project [codegen id : 40] -Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] -Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#119, d_date_sk#120, d_year#121] - -(133) ReusedExchange [Reuses operator id: 57] -Output [2]: [cd_demo_sk#56, cd_marital_status#57] - -(134) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [ss_cdemo_sk#4] -Right keys [1]: [cd_demo_sk#56] -Join condition: None - -(135) Project [codegen id : 40] -Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_marital_status#57] -Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_demo_sk#56, cd_marital_status#57] - -(136) ReusedExchange [Reuses operator id: 57] -Output [2]: [cd_demo_sk#122, cd_marital_status#123] - -(137) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [c_current_cdemo_sk#45] -Right keys [1]: [cd_demo_sk#122] -Join condition: NOT (cd_marital_status#57 = cd_marital_status#123) - -(138) Project [codegen id : 40] -Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] -Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_marital_status#57, cd_demo_sk#122, cd_marital_status#123] - -(139) ReusedExchange [Reuses operator id: 66] -Output [1]: [p_promo_sk#61] - -(140) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [ss_promo_sk#8] -Right keys [1]: [p_promo_sk#61] -Join condition: None - -(141) Project [codegen id : 40] -Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] -Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, p_promo_sk#61] - -(142) ReusedExchange [Reuses operator id: 72] -Output [2]: [hd_demo_sk#63, hd_income_band_sk#64] - -(143) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [ss_hdemo_sk#5] -Right keys [1]: [hd_demo_sk#63] -Join condition: None - -(144) Project [codegen id : 40] -Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64] -Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_demo_sk#63, hd_income_band_sk#64] - -(145) ReusedExchange [Reuses operator id: 72] -Output [2]: [hd_demo_sk#124, hd_income_band_sk#125] - -(146) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [c_current_hdemo_sk#46] -Right keys [1]: [hd_demo_sk#124] -Join condition: None - -(147) Project [codegen id : 40] -Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125] -Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_demo_sk#124, hd_income_band_sk#125] - -(148) ReusedExchange [Reuses operator id: 81] -Output [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] - -(149) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [ss_addr_sk#6] -Right keys [1]: [ca_address_sk#68] -Join condition: None - -(150) Project [codegen id : 40] -Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] -Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] - -(151) ReusedExchange [Reuses operator id: 81] -Output [5]: [ca_address_sk#126, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] - -(152) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [c_current_addr_sk#47] -Right keys [1]: [ca_address_sk#126] -Join condition: None - -(153) Project [codegen id : 40] -Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] -Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_address_sk#126, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] - -(154) ReusedExchange [Reuses operator id: 90] -Output [1]: [ib_income_band_sk#79] - -(155) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [hd_income_band_sk#64] -Right keys [1]: [ib_income_band_sk#79] -Join condition: None - -(156) Project [codegen id : 40] -Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] -Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, ib_income_band_sk#79] - -(157) ReusedExchange [Reuses operator id: 90] -Output [1]: [ib_income_band_sk#131] - -(158) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [hd_income_band_sk#125] -Right keys [1]: [ib_income_band_sk#131] -Join condition: None - -(159) Project [codegen id : 40] -Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] -Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, ib_income_band_sk#131] - -(160) ReusedExchange [Reuses operator id: 100] -Output [2]: [i_item_sk#82, i_product_name#85] - -(161) BroadcastHashJoin [codegen id : 40] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#82] -Join condition: None - -(162) Project [codegen id : 40] -Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#119, d_year#121, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] -Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] - -(163) HashAggregate [codegen id : 40] -Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#119, d_year#121, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] -Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121] -Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] -Aggregate Attributes [4]: [count#132, sum#133, sum#134, sum#135] -Results [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] - -(164) Exchange -Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] -Arguments: hashpartitioning(i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, 5), true, [id=#140] - -(165) HashAggregate [codegen id : 41] -Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] -Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121] -Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] -Aggregate Attributes [4]: [count(1)#141, sum(UnscaledValue(ss_wholesale_cost#10))#142, sum(UnscaledValue(ss_list_price#11))#143, sum(UnscaledValue(ss_coupon_amt#12))#144] -Results [8]: [i_item_sk#82 AS item_sk#145, s_store_name#41 AS store_name#146, s_zip#42 AS store_zip#147, d_year#38 AS syear#148, count(1)#141 AS cnt#149, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#142,17,2) AS s1#150, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#143,17,2) AS s2#151, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#144,17,2) AS s3#152] - -(166) BroadcastExchange -Input [8]: [item_sk#145, store_name#146, store_zip#147, syear#148, cnt#149, s1#150, s2#151, s3#152] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, string, true], input[2, string, true]),false), [id=#153] - -(167) BroadcastHashJoin [codegen id : 42] -Left keys [3]: [item_sk#101, store_name#102, store_zip#103] -Right keys [3]: [item_sk#145, store_name#146, store_zip#147] -Join condition: (cnt#149 <= cnt#113) - -(168) Project [codegen id : 42] -Output [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] -Input [25]: [product_name#100, item_sk#101, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, item_sk#145, store_name#146, store_zip#147, syear#148, cnt#149, s1#150, s2#151, s3#152] - -(169) Exchange -Input [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] -Arguments: rangepartitioning(product_name#100 ASC NULLS FIRST, store_name#102 ASC NULLS FIRST, cnt#149 ASC NULLS FIRST, 5), true, [id=#154] - -(170) Sort [codegen id : 43] -Input [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] -Arguments: [product_name#100 ASC NULLS FIRST, store_name#102 ASC NULLS FIRST, cnt#149 ASC NULLS FIRST], true, 0 - +*(43) Sort [product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST, 5) + +- *(42) Project [product_name#1, store_name#2, store_zip#4, b_street_number#5, b_streen_name#6, b_city#7, b_zip#8, c_street_number#9, c_street_name#10, c_city#11, c_zip#12, syear#13, cnt#14, s1#15, s2#16, s3#17, s1#18, s2#19, s3#20, syear#21, cnt#3] + +- *(42) BroadcastHashJoin [item_sk#22, store_name#2, store_zip#4], [item_sk#23, store_name#24, store_zip#25], Inner, BuildRight, (cnt#3 <= cnt#14) + :- *(42) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) + : +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 5) + : +- *(20) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) + : +- *(20) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] + : +- *(20) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight + : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : +- *(20) BroadcastHashJoin [hd_income_band_sk#45], [ib_income_band_sk#46], Inner, BuildRight + : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : : +- *(20) BroadcastHashJoin [hd_income_band_sk#47], [ib_income_band_sk#48], Inner, BuildRight + : : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : : : +- *(20) BroadcastHashJoin [c_current_addr_sk#49], [ca_address_sk#50], Inner, BuildRight + : : : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] + : : : : : +- *(20) BroadcastHashJoin [ss_addr_sk#51], [ca_address_sk#52], Inner, BuildRight + : : : : : :- *(20) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45] + : : : : : : +- *(20) BroadcastHashJoin [c_current_hdemo_sk#53], [hd_demo_sk#54], Inner, BuildRight + : : : : : : :- *(20) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47] + : : : : : : : +- *(20) BroadcastHashJoin [ss_hdemo_sk#55], [hd_demo_sk#56], Inner, BuildRight + : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : +- *(20) BroadcastHashJoin [ss_promo_sk#57], [p_promo_sk#58], Inner, BuildRight + : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : : +- *(20) BroadcastHashJoin [c_current_cdemo_sk#59], [cd_demo_sk#60], Inner, BuildRight, NOT (cd_marital_status#61 = cd_marital_status#62) + : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, cd_marital_status#61] + : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_cdemo_sk#63], [cd_demo_sk#64], Inner, BuildRight + : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : : : : +- *(20) BroadcastHashJoin [c_first_shipto_date_sk#65], [d_date_sk#66], Inner, BuildRight + : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, d_year#39] + : : : : : : : : : : : : +- *(20) BroadcastHashJoin [c_first_sales_date_sk#67], [d_date_sk#68], Inner, BuildRight + : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] + : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_customer_sk#69], [c_customer_sk#70], Inner, BuildRight + : : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29] + : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_store_sk#71], [s_store_sk#72], Inner, BuildRight + : : : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38] + : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_sold_date_sk#73], [d_date_sk#74], Inner, BuildRight + : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_item_sk#44], [cs_item_sk#75], Inner, BuildRight + : : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight + : : : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : : : +- *(20) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) + : : : : : : : : : : : : : : : : : : +- *(20) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct + : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : : : : : +- *(4) Project [cs_item_sk#75] + : : : : : : : : : : : : : : : : +- *(4) Filter (isnotnull(sum(cs_ext_list_price#79)#80) && (cast(sum(cs_ext_list_price#79)#80 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))#84)), DecimalType(21,2)))) + : : : : : : : : : : : : : : : : +- *(4) HashAggregate(keys=[cs_item_sk#75], functions=[sum(UnscaledValue(cs_ext_list_price#79)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) + : : : : : : : : : : : : : : : : +- Exchange hashpartitioning(cs_item_sk#75, 5) + : : : : : : : : : : : : : : : : +- *(3) HashAggregate(keys=[cs_item_sk#75], functions=[partial_sum(UnscaledValue(cs_ext_list_price#79)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) + : : : : : : : : : : : : : : : : +- *(3) Project [cs_item_sk#75, cs_ext_list_price#79, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] + : : : : : : : : : : : : : : : : +- *(3) BroadcastHashJoin [cs_item_sk#75, cs_order_number#85], [cr_item_sk#86, cr_order_number#87], Inner, BuildRight + : : : : : : : : : : : : : : : : :- *(3) Project [cs_item_sk#75, cs_order_number#85, cs_ext_list_price#79] + : : : : : : : : : : : : : : : : : +- *(3) Filter (isnotnull(cs_item_sk#75) && isnotnull(cs_order_number#85)) + : : : : : : : : : : : : : : : : : +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#75,cs_order_number#85,cs_ext_list_price#79] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)], ReadSchema: struct + : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) + : : : : : : : : : : : : : : : : +- *(2) Project [cr_item_sk#86, cr_order_number#87, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] + : : : : : : : : : : : : : : : : +- *(2) Filter (isnotnull(cr_item_sk#86) && isnotnull(cr_order_number#87)) + : : : : : : : : : : : : : : : : +- *(2) FileScan parquet default.catalog_returns[cr_item_sk#86,cr_order_number#87,cr_refunded_cash#81,cr_reversed_charge#82,cr_store_credit#83] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct + : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : : : +- *(6) Project [s_store_sk#72, s_store_name#28, s_zip#29] + : : : : : : : : : : : : : : +- *(6) Filter ((isnotnull(s_store_sk#72) && isnotnull(s_zip#29)) && isnotnull(s_store_name#28)) + : : : : : : : : : : : : : : +- *(6) FileScan parquet default.store[s_store_sk#72,s_store_name#28,s_zip#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip), IsNotNull(s_store_name)], ReadSchema: struct + : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : : +- *(7) Project [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] + : : : : : : : : : : : : : +- *(7) Filter (((((isnotnull(c_customer_sk#70) && isnotnull(c_first_sales_date_sk#67)) && isnotnull(c_first_shipto_date_sk#65)) && isnotnull(c_current_cdemo_sk#59)) && isnotnull(c_current_hdemo_sk#53)) && isnotnull(c_current_addr_sk#49)) + : : : : : : : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#70,c_current_cdemo_sk#59,c_current_hdemo_sk#53,c_current_addr_sk#49,c_first_shipto_date_sk#65,c_first_sales_date_sk#67] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), I..., ReadSchema: struct + : : : : : : : : : : : +- ReusedExchange [d_date_sk#66, d_year#40], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : +- *(10) Project [cd_demo_sk#64, cd_marital_status#61] + : : : : : : : : : : +- *(10) Filter (isnotnull(cd_demo_sk#64) && isnotnull(cd_marital_status#61)) + : : : : : : : : : : +- *(10) FileScan parquet default.customer_demographics[cd_demo_sk#64,cd_marital_status#61] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)], ReadSchema: struct + : : : : : : : : : +- ReusedExchange [cd_demo_sk#60, cd_marital_status#62], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- *(12) Project [p_promo_sk#58] + : : : : : : : : +- *(12) Filter isnotnull(p_promo_sk#58) + : : : : : : : : +- *(12) FileScan parquet default.promotion[p_promo_sk#58] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct + : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : +- *(13) Project [hd_demo_sk#56, hd_income_band_sk#47] + : : : : : : : +- *(13) Filter (isnotnull(hd_demo_sk#56) && isnotnull(hd_income_band_sk#47)) + : : : : : : : +- *(13) FileScan parquet default.household_demographics[hd_demo_sk#56,hd_income_band_sk#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct + : : : : : : +- ReusedExchange [hd_demo_sk#54, hd_income_band_sk#45], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(15) Project [ca_address_sk#52, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] + : : : : : +- *(15) Filter isnotnull(ca_address_sk#52) + : : : : : +- *(15) FileScan parquet default.customer_address[ca_address_sk#52,ca_street_number#30,ca_street_name#31,ca_city#32,ca_zip#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : : : +- ReusedExchange [ca_address_sk#50, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(17) Project [ib_income_band_sk#48] + : : : +- *(17) Filter isnotnull(ib_income_band_sk#48) + : : : +- *(17) FileScan parquet default.income_band[ib_income_band_sk#48] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_income_band_sk)], ReadSchema: struct + : : +- ReusedExchange [ib_income_band_sk#46], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(19) Project [i_item_sk#27, i_product_name#26] + : +- *(19) Filter ((((((isnotnull(i_current_price#88) && i_color#89 IN (purple,burlywood,indian,spring,floral,medium)) && (i_current_price#88 >= 64.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 74.00)) && (cast(i_current_price#88 as decimal(12,2)) >= 65.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 79.00)) && isnotnull(i_item_sk#27)) + : +- *(19) FileScan parquet default.item[i_item_sk#27,i_current_price#88,i_color#89,i_product_name#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), Greater..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, string, true], input[2, string, true])) + +- *(41) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) + +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 5) + +- *(40) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) + +- *(40) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] + +- *(40) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight + :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : +- *(40) BroadcastHashJoin [hd_income_band_sk#45], [ib_income_band_sk#46], Inner, BuildRight + : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : +- *(40) BroadcastHashJoin [hd_income_band_sk#47], [ib_income_band_sk#48], Inner, BuildRight + : : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] + : : : +- *(40) BroadcastHashJoin [c_current_addr_sk#49], [ca_address_sk#50], Inner, BuildRight + : : : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] + : : : : +- *(40) BroadcastHashJoin [ss_addr_sk#51], [ca_address_sk#52], Inner, BuildRight + : : : : :- *(40) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45] + : : : : : +- *(40) BroadcastHashJoin [c_current_hdemo_sk#53], [hd_demo_sk#54], Inner, BuildRight + : : : : : :- *(40) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47] + : : : : : : +- *(40) BroadcastHashJoin [ss_hdemo_sk#55], [hd_demo_sk#56], Inner, BuildRight + : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : +- *(40) BroadcastHashJoin [ss_promo_sk#57], [p_promo_sk#58], Inner, BuildRight + : : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : +- *(40) BroadcastHashJoin [c_current_cdemo_sk#59], [cd_demo_sk#60], Inner, BuildRight, NOT (cd_marital_status#61 = cd_marital_status#62) + : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, cd_marital_status#61] + : : : : : : : : : +- *(40) BroadcastHashJoin [ss_cdemo_sk#63], [cd_demo_sk#64], Inner, BuildRight + : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] + : : : : : : : : : : +- *(40) BroadcastHashJoin [c_first_shipto_date_sk#65], [d_date_sk#66], Inner, BuildRight + : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, d_year#39] + : : : : : : : : : : : +- *(40) BroadcastHashJoin [c_first_sales_date_sk#67], [d_date_sk#68], Inner, BuildRight + : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] + : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_customer_sk#69], [c_customer_sk#70], Inner, BuildRight + : : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29] + : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_store_sk#71], [s_store_sk#72], Inner, BuildRight + : : : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38] + : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_sold_date_sk#73], [d_date_sk#74], Inner, BuildRight + : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_item_sk#44], [cs_item_sk#75], Inner, BuildRight + : : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight + : : : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] + : : : : : : : : : : : : : : : : : +- *(40) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) + : : : : : : : : : : : : : : : : : +- *(40) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct + : : : : : : : : : : : : : +- ReusedExchange [s_store_sk#72, s_store_name#28, s_zip#29], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : : +- ReusedExchange [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : : +- ReusedExchange [d_date_sk#68, d_year#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : : +- ReusedExchange [d_date_sk#66, d_year#40], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : : +- ReusedExchange [cd_demo_sk#64, cd_marital_status#61], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- ReusedExchange [cd_demo_sk#60, cd_marital_status#62], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : +- ReusedExchange [p_promo_sk#58], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- ReusedExchange [hd_demo_sk#56, hd_income_band_sk#47], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- ReusedExchange [hd_demo_sk#54, hd_income_band_sk#45], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- ReusedExchange [ca_address_sk#52, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- ReusedExchange [ca_address_sk#50, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- ReusedExchange [ib_income_band_sk#48], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [ib_income_band_sk#46], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [i_item_sk#27, i_product_name#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt index 4c40a359c..2955e0f78 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt @@ -1,246 +1,229 @@ -WholeStageCodegen (43) - Sort [product_name,store_name,cnt] +WholeStageCodegen + Sort [cnt,product_name,store_name] InputAdapter - Exchange [product_name,store_name,cnt] #1 - WholeStageCodegen (42) - Project [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] - BroadcastHashJoin [item_sk,store_name,store_zip,item_sk,store_name,store_zip,cnt,cnt] - HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + Exchange [cnt,product_name,store_name] #1 + WholeStageCodegen + Project [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,cnt,product_name,s1,s1,s2,s2,s3,s3,store_name,store_zip,syear,syear] + BroadcastHashJoin [cnt,cnt,item_sk,item_sk,store_name,store_name,store_zip,store_zip] + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count(1),d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost))] [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,count,count(1),item_sk,product_name,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] InputAdapter - Exchange [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year] #2 - WholeStageCodegen (20) - HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] - Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #2 + WholeStageCodegen + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastHashJoin [ss_item_sk,cs_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Filter [ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [sr_item_sk,sr_ticket_number] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #4 - WholeStageCodegen (4) + WholeStageCodegen Project [cs_item_sk] - Filter [sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true))] - HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum,sum,isEmpty] + Filter [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(cs_ext_list_price)] + HashAggregate [cs_item_sk,sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(UnscaledValue(cs_ext_list_price))] [sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(UnscaledValue(cs_ext_list_price)),sum(cs_ext_list_price)] InputAdapter Exchange [cs_item_sk] #5 - WholeStageCodegen (3) - HashAggregate [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [sum,sum,isEmpty,sum,sum,isEmpty] - Project [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] - BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] - Filter [cs_item_sk,cs_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price] + WholeStageCodegen + HashAggregate [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_ext_list_price,cs_item_sk,cs_order_number] + Filter [cs_item_sk,cs_order_number] + Scan parquet default.catalog_sales [cs_ext_list_price,cs_item_sk,cs_order_number] [cs_ext_list_price,cs_item_sk,cs_order_number] InputAdapter BroadcastExchange #6 - WholeStageCodegen (2) - Filter [cr_item_sk,cr_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + WholeStageCodegen + Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + Filter [cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] InputAdapter BroadcastExchange #7 - WholeStageCodegen (5) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #8 - WholeStageCodegen (6) - Filter [s_store_sk,s_store_name,s_zip] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name,s_zip] + WholeStageCodegen + Project [s_store_name,s_store_sk,s_zip] + Filter [s_store_name,s_store_sk,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] InputAdapter BroadcastExchange #9 - WholeStageCodegen (7) - Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + WholeStageCodegen + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] InputAdapter BroadcastExchange #10 - WholeStageCodegen (8) - Filter [d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter - ReusedExchange [d_date_sk,d_year] #10 + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 InputAdapter BroadcastExchange #11 - WholeStageCodegen (10) - Filter [cd_demo_sk,cd_marital_status] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + WholeStageCodegen + Project [cd_demo_sk,cd_marital_status] + Filter [cd_demo_sk,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] InputAdapter - ReusedExchange [cd_demo_sk,cd_marital_status] #11 + ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 InputAdapter BroadcastExchange #12 - WholeStageCodegen (12) - Filter [p_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.promotion [p_promo_sk] + WholeStageCodegen + Project [p_promo_sk] + Filter [p_promo_sk] + Scan parquet default.promotion [p_promo_sk] [p_promo_sk] InputAdapter BroadcastExchange #13 - WholeStageCodegen (13) - Filter [hd_demo_sk,hd_income_band_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] + WholeStageCodegen + Project [hd_demo_sk,hd_income_band_sk] + Filter [hd_demo_sk,hd_income_band_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] InputAdapter - ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 + ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 InputAdapter BroadcastExchange #14 - WholeStageCodegen (15) - Filter [ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + WholeStageCodegen + Project [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] + Filter [ca_address_sk] + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] InputAdapter - ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #14 + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 InputAdapter BroadcastExchange #15 - WholeStageCodegen (17) - Filter [ib_income_band_sk] - ColumnarToRow - InputAdapter - Scan parquet default.income_band [ib_income_band_sk] + WholeStageCodegen + Project [ib_income_band_sk] + Filter [ib_income_band_sk] + Scan parquet default.income_band [ib_income_band_sk] [ib_income_band_sk] InputAdapter - ReusedExchange [ib_income_band_sk] #15 + ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 InputAdapter BroadcastExchange #16 - WholeStageCodegen (19) + WholeStageCodegen Project [i_item_sk,i_product_name] - Filter [i_current_price,i_color,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_current_price,i_color,i_product_name] + Filter [i_color,i_current_price,i_item_sk] + Scan parquet default.item [i_color,i_current_price,i_item_sk,i_product_name] [i_color,i_current_price,i_item_sk,i_product_name] InputAdapter BroadcastExchange #17 - WholeStageCodegen (41) - HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] + WholeStageCodegen + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count(1),d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost))] [cnt,count,count(1),item_sk,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] InputAdapter - Exchange [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year] #18 - WholeStageCodegen (40) - HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] - Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #18 + WholeStageCodegen + HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [count,count,sum,sum,sum,sum,sum,sum] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] + Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] + Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastHashJoin [ss_item_sk,cs_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt] - BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Filter [ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [cs_item_sk,ss_item_sk] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] InputAdapter - ReusedExchange [sr_item_sk,sr_ticket_number] #3 + ReusedExchange [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] #3 InputAdapter - ReusedExchange [cs_item_sk] #4 + ReusedExchange [cs_item_sk] [cs_item_sk] #4 InputAdapter BroadcastExchange #19 - WholeStageCodegen (25) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter - ReusedExchange [s_store_sk,s_store_name,s_zip] #8 + ReusedExchange [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] #8 InputAdapter - ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #9 + ReusedExchange [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] #9 InputAdapter - ReusedExchange [d_date_sk,d_year] #10 + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 InputAdapter - ReusedExchange [d_date_sk,d_year] #10 + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 InputAdapter - ReusedExchange [cd_demo_sk,cd_marital_status] #11 + ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 InputAdapter - ReusedExchange [cd_demo_sk,cd_marital_status] #11 + ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 InputAdapter - ReusedExchange [p_promo_sk] #12 + ReusedExchange [p_promo_sk] [p_promo_sk] #12 InputAdapter - ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 + ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 InputAdapter - ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 + ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 InputAdapter - ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #14 + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 InputAdapter - ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #14 + ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 InputAdapter - ReusedExchange [ib_income_band_sk] #15 + ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 InputAdapter - ReusedExchange [ib_income_band_sk] #15 + ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 InputAdapter - ReusedExchange [i_item_sk,i_product_name] #16 + ReusedExchange [i_item_sk,i_product_name] [i_item_sk,i_product_name] #16 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt index ab87816b8..90e6d0221 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt @@ -1,245 +1,42 @@ == Physical Plan == -TakeOrderedAndProject (42) -+- * Project (41) - +- * BroadcastHashJoin Inner BuildRight (40) - :- * Project (26) - : +- * BroadcastHashJoin Inner BuildRight (25) - : :- * Project (20) - : : +- * BroadcastHashJoin Inner BuildRight (19) - : : :- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.store (1) - : : +- BroadcastExchange (18) - : : +- * Filter (17) - : : +- * HashAggregate (16) - : : +- Exchange (15) - : : +- * HashAggregate (14) - : : +- * Project (13) - : : +- * BroadcastHashJoin Inner BuildRight (12) - : : :- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.store_sales (4) - : : +- BroadcastExchange (11) - : : +- * Project (10) - : : +- * Filter (9) - : : +- * ColumnarToRow (8) - : : +- Scan parquet default.date_dim (7) - : +- BroadcastExchange (24) - : +- * Filter (23) - : +- * ColumnarToRow (22) - : +- Scan parquet default.item (21) - +- BroadcastExchange (39) - +- * HashAggregate (38) - +- Exchange (37) - +- * HashAggregate (36) - +- * HashAggregate (35) - +- Exchange (34) - +- * HashAggregate (33) - +- * Project (32) - +- * BroadcastHashJoin Inner BuildRight (31) - :- * Filter (29) - : +- * ColumnarToRow (28) - : +- Scan parquet default.store_sales (27) - +- ReusedExchange (30) - - -(1) Scan parquet default.store -Output [2]: [s_store_sk#1, s_store_name#2] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 9] -Input [2]: [s_store_sk#1, s_store_name#2] - -(3) Filter [codegen id : 9] -Input [2]: [s_store_sk#1, s_store_name#2] -Condition : isnotnull(s_store_sk#1) - -(4) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 2] -Input [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] - -(6) Filter [codegen id : 2] -Input [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] -Condition : ((isnotnull(ss_sold_date_sk#3) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_item_sk#4)) - -(7) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_month_seq#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187), IsNotNull(d_date_sk)] -ReadSchema: struct - -(8) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#7, d_month_seq#8] - -(9) Filter [codegen id : 1] -Input [2]: [d_date_sk#7, d_month_seq#8] -Condition : (((isnotnull(d_month_seq#8) AND (d_month_seq#8 >= 1176)) AND (d_month_seq#8 <= 1187)) AND isnotnull(d_date_sk#7)) - -(10) Project [codegen id : 1] -Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_month_seq#8] - -(11) BroadcastExchange -Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] - -(12) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#7] -Join condition: None - -(13) Project [codegen id : 2] -Output [3]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] -Input [5]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, d_date_sk#7] - -(14) HashAggregate [codegen id : 2] -Input [3]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] -Keys [2]: [ss_store_sk#5, ss_item_sk#4] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [1]: [sum#10] -Results [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] - -(15) Exchange -Input [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] -Arguments: hashpartitioning(ss_store_sk#5, ss_item_sk#4, 5), true, [id=#12] - -(16) HashAggregate [codegen id : 3] -Input [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] -Keys [2]: [ss_store_sk#5, ss_item_sk#4] -Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#13] -Results [3]: [ss_store_sk#5, ss_item_sk#4, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#13,17,2) AS revenue#14] - -(17) Filter [codegen id : 3] -Input [3]: [ss_store_sk#5, ss_item_sk#4, revenue#14] -Condition : isnotnull(revenue#14) - -(18) BroadcastExchange -Input [3]: [ss_store_sk#5, ss_item_sk#4, revenue#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] - -(19) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [s_store_sk#1] -Right keys [1]: [ss_store_sk#5] -Join condition: None - -(20) Project [codegen id : 9] -Output [4]: [s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14] -Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14] - -(21) Scan parquet default.item -Output [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(22) ColumnarToRow [codegen id : 4] -Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] - -(23) Filter [codegen id : 4] -Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] -Condition : isnotnull(i_item_sk#16) - -(24) BroadcastExchange -Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] - -(25) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_item_sk#4] -Right keys [1]: [i_item_sk#16] -Join condition: None - -(26) Project [codegen id : 9] -Output [7]: [s_store_name#2, ss_store_sk#5, revenue#14, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] -Input [9]: [s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14, i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] - -(27) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(28) ColumnarToRow [codegen id : 6] -Input [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] - -(29) Filter [codegen id : 6] -Input [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] -Condition : (isnotnull(ss_sold_date_sk#22) AND isnotnull(ss_store_sk#24)) - -(30) ReusedExchange [Reuses operator id: 11] -Output [1]: [d_date_sk#7] - -(31) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#22] -Right keys [1]: [d_date_sk#7] -Join condition: None - -(32) Project [codegen id : 6] -Output [3]: [ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] -Input [5]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25, d_date_sk#7] - -(33) HashAggregate [codegen id : 6] -Input [3]: [ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] -Keys [2]: [ss_store_sk#24, ss_item_sk#23] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#25))] -Aggregate Attributes [1]: [sum#26] -Results [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] - -(34) Exchange -Input [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] -Arguments: hashpartitioning(ss_store_sk#24, ss_item_sk#23, 5), true, [id=#28] - -(35) HashAggregate [codegen id : 7] -Input [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] -Keys [2]: [ss_store_sk#24, ss_item_sk#23] -Functions [1]: [sum(UnscaledValue(ss_sales_price#25))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#25))#29] -Results [2]: [ss_store_sk#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#25))#29,17,2) AS revenue#30] - -(36) HashAggregate [codegen id : 7] -Input [2]: [ss_store_sk#24, revenue#30] -Keys [1]: [ss_store_sk#24] -Functions [1]: [partial_avg(revenue#30)] -Aggregate Attributes [2]: [sum#31, count#32] -Results [3]: [ss_store_sk#24, sum#33, count#34] - -(37) Exchange -Input [3]: [ss_store_sk#24, sum#33, count#34] -Arguments: hashpartitioning(ss_store_sk#24, 5), true, [id=#35] - -(38) HashAggregate [codegen id : 8] -Input [3]: [ss_store_sk#24, sum#33, count#34] -Keys [1]: [ss_store_sk#24] -Functions [1]: [avg(revenue#30)] -Aggregate Attributes [1]: [avg(revenue#30)#36] -Results [2]: [ss_store_sk#24, avg(revenue#30)#36 AS ave#37] - -(39) BroadcastExchange -Input [2]: [ss_store_sk#24, ave#37] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] - -(40) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_store_sk#5] -Right keys [1]: [ss_store_sk#24] -Join condition: (cast(revenue#14 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#37)), DecimalType(23,7), true)) - -(41) Project [codegen id : 9] -Output [6]: [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] -Input [9]: [s_store_name#2, ss_store_sk#5, revenue#14, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20, ss_store_sk#24, ave#37] - -(42) TakeOrderedAndProject -Input [6]: [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] -Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#17 ASC NULLS FIRST], [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] - +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST], output=[s_store_name#1,i_item_desc#2,revenue#3,i_current_price#4,i_wholesale_cost#5,i_brand#6]) ++- *(9) Project [s_store_name#1, i_item_desc#2, revenue#3, i_current_price#4, i_wholesale_cost#5, i_brand#6] + +- *(9) BroadcastHashJoin [ss_store_sk#7], [ss_store_sk#8], Inner, BuildRight, (cast(revenue#3 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#9)), DecimalType(23,7))) + :- *(9) Project [s_store_name#1, ss_store_sk#7, revenue#3, i_item_desc#2, i_current_price#4, i_wholesale_cost#5, i_brand#6] + : +- *(9) BroadcastHashJoin [ss_item_sk#10], [i_item_sk#11], Inner, BuildRight + : :- *(9) Project [s_store_name#1, ss_store_sk#7, ss_item_sk#10, revenue#3] + : : +- *(9) BroadcastHashJoin [s_store_sk#12], [ss_store_sk#7], Inner, BuildRight + : : :- *(9) Project [s_store_sk#12, s_store_name#1] + : : : +- *(9) Filter isnotnull(s_store_sk#12) + : : : +- *(9) FileScan parquet default.store[s_store_sk#12,s_store_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Filter isnotnull(revenue#3) + : : +- *(3) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[sum(UnscaledValue(ss_sales_price#13))]) + : : +- Exchange hashpartitioning(ss_store_sk#7, ss_item_sk#10, 5) + : : +- *(2) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[partial_sum(UnscaledValue(ss_sales_price#13))]) + : : +- *(2) Project [ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] + : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + : : :- *(2) Project [ss_sold_date_sk#14, ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] + : : : +- *(2) Filter ((isnotnull(ss_sold_date_sk#14) && isnotnull(ss_store_sk#7)) && isnotnull(ss_item_sk#10)) + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#14,ss_item_sk#10,ss_store_sk#7,ss_sales_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#15] + : : +- *(1) Filter (((isnotnull(d_month_seq#16) && (d_month_seq#16 >= 1176)) && (d_month_seq#16 <= 1187)) && isnotnull(d_date_sk#15)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#15,d_month_seq#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187),..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [i_item_sk#11, i_item_desc#2, i_current_price#4, i_wholesale_cost#5, i_brand#6] + : +- *(4) Filter isnotnull(i_item_sk#11) + : +- *(4) FileScan parquet default.item[i_item_sk#11,i_item_desc#2,i_current_price#4,i_wholesale_cost#5,i_brand#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt index a4b468ffe..2250a433c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt @@ -1,63 +1,57 @@ -TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] - WholeStageCodegen (9) - Project [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] - BroadcastHashJoin [ss_store_sk,ss_store_sk,revenue,ave] - Project [s_store_name,ss_store_sk,revenue,i_item_desc,i_current_price,i_wholesale_cost,i_brand] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [s_store_name,ss_store_sk,ss_item_sk,revenue] +TakeOrderedAndProject [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] + WholeStageCodegen + Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] + BroadcastHashJoin [ave,revenue,ss_store_sk,ss_store_sk] + Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [revenue,s_store_name,ss_item_sk,ss_store_sk] BroadcastHashJoin [s_store_sk,ss_store_sk] - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name] + Project [s_store_name,s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] InputAdapter BroadcastExchange #1 - WholeStageCodegen (3) + WholeStageCodegen Filter [revenue] - HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + HashAggregate [ss_item_sk,ss_store_sk,sum,sum(UnscaledValue(ss_sales_price))] [revenue,sum,sum(UnscaledValue(ss_sales_price))] InputAdapter - Exchange [ss_store_sk,ss_item_sk] #2 - WholeStageCodegen (2) - HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum] - Project [ss_item_sk,ss_store_sk,ss_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Exchange [ss_item_sk,ss_store_sk] #2 + WholeStageCodegen + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk,sum,sum] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 - WholeStageCodegen (4) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + WholeStageCodegen + Project [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] + Filter [i_item_sk] + Scan parquet default.item [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] InputAdapter BroadcastExchange #5 - WholeStageCodegen (8) - HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count] + WholeStageCodegen + HashAggregate [avg(revenue),count,ss_store_sk,sum] [ave,avg(revenue),count,sum] InputAdapter Exchange [ss_store_sk] #6 - WholeStageCodegen (7) - HashAggregate [ss_store_sk,revenue] [sum,count,sum,count] - HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum] + WholeStageCodegen + HashAggregate [count,count,revenue,ss_store_sk,sum,sum] [count,count,sum,sum] + HashAggregate [ss_item_sk,ss_store_sk,sum,sum(UnscaledValue(ss_sales_price))] [revenue,sum,sum(UnscaledValue(ss_sales_price))] InputAdapter - Exchange [ss_store_sk,ss_item_sk] #7 - WholeStageCodegen (6) - HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum] - Project [ss_item_sk,ss_store_sk,ss_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + Exchange [ss_item_sk,ss_store_sk] #7 + WholeStageCodegen + HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk,sum,sum] [sum,sum] + Project [ss_item_sk,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt index fc18efd3d..fb220ece8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt @@ -1,310 +1,54 @@ == Physical Plan == -TakeOrderedAndProject (55) -+- * HashAggregate (54) - +- Exchange (53) - +- * HashAggregate (52) - +- Union (51) - :- * HashAggregate (32) - : +- Exchange (31) - : +- * HashAggregate (30) - : +- * Project (29) - : +- * BroadcastHashJoin Inner BuildRight (28) - : :- * Project (22) - : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Project (15) - : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : :- * Project (9) - : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.web_sales (1) - : : : : +- BroadcastExchange (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.warehouse (4) - : : : +- BroadcastExchange (13) - : : : +- * Filter (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.date_dim (10) - : : +- BroadcastExchange (20) - : : +- * Project (19) - : : +- * Filter (18) - : : +- * ColumnarToRow (17) - : : +- Scan parquet default.time_dim (16) - : +- BroadcastExchange (27) - : +- * Project (26) - : +- * Filter (25) - : +- * ColumnarToRow (24) - : +- Scan parquet default.ship_mode (23) - +- * HashAggregate (50) - +- Exchange (49) - +- * HashAggregate (48) - +- * Project (47) - +- * BroadcastHashJoin Inner BuildRight (46) - :- * Project (44) - : +- * BroadcastHashJoin Inner BuildRight (43) - : :- * Project (41) - : : +- * BroadcastHashJoin Inner BuildRight (40) - : : :- * Project (38) - : : : +- * BroadcastHashJoin Inner BuildRight (37) - : : : :- * Filter (35) - : : : : +- * ColumnarToRow (34) - : : : : +- Scan parquet default.catalog_sales (33) - : : : +- ReusedExchange (36) - : : +- ReusedExchange (39) - : +- ReusedExchange (42) - +- ReusedExchange (45) - - -(1) Scan parquet default.web_sales -Output [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_ship_mode_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] - -(3) Filter [codegen id : 5] -Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] -Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_sold_date_sk#1)) AND isnotnull(ws_sold_time_sk#2)) AND isnotnull(ws_ship_mode_sk#3)) - -(4) Scan parquet default.warehouse -Output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/warehouse] -PushedFilters: [IsNotNull(w_warehouse_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] - -(6) Filter [codegen id : 1] -Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] -Condition : isnotnull(w_warehouse_sk#8) - -(7) BroadcastExchange -Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] - -(8) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_warehouse_sk#4] -Right keys [1]: [w_warehouse_sk#8] -Join condition: None - -(9) Project [codegen id : 5] -Output [12]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] -Input [14]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] - -(10) Scan parquet default.date_dim -Output [3]: [d_date_sk#16, d_year#17, d_moy#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#16, d_year#17, d_moy#18] - -(12) Filter [codegen id : 2] -Input [3]: [d_date_sk#16, d_year#17, d_moy#18] -Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) - -(13) BroadcastExchange -Input [3]: [d_date_sk#16, d_year#17, d_moy#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] - -(14) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_sold_date_sk#1] -Right keys [1]: [d_date_sk#16] -Join condition: None - -(15) Project [codegen id : 5] -Output [13]: [ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] -Input [15]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_date_sk#16, d_year#17, d_moy#18] - -(16) Scan parquet default.time_dim -Output [2]: [t_time_sk#20, t_time#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_time_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [2]: [t_time_sk#20, t_time#21] - -(18) Filter [codegen id : 3] -Input [2]: [t_time_sk#20, t_time#21] -Condition : (((isnotnull(t_time#21) AND (t_time#21 >= 30838)) AND (t_time#21 <= 59638)) AND isnotnull(t_time_sk#20)) - -(19) Project [codegen id : 3] -Output [1]: [t_time_sk#20] -Input [2]: [t_time_sk#20, t_time#21] - -(20) BroadcastExchange -Input [1]: [t_time_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] - -(21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_sold_time_sk#2] -Right keys [1]: [t_time_sk#20] -Join condition: None - -(22) Project [codegen id : 5] -Output [12]: [ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] -Input [14]: [ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, t_time_sk#20] - -(23) Scan parquet default.ship_mode -Output [2]: [sm_ship_mode_sk#23, sm_carrier#24] -Batched: true -Location [not included in comparison]/{warehouse_dir}/ship_mode] -PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] - -(25) Filter [codegen id : 4] -Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] -Condition : (sm_carrier#24 IN (DHL,BARIAN) AND isnotnull(sm_ship_mode_sk#23)) - -(26) Project [codegen id : 4] -Output [1]: [sm_ship_mode_sk#23] -Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] - -(27) BroadcastExchange -Input [1]: [sm_ship_mode_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] - -(28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_ship_mode_sk#3] -Right keys [1]: [sm_ship_mode_sk#23] -Join condition: None - -(29) Project [codegen id : 5] -Output [11]: [ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] -Input [13]: [ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, sm_ship_mode_sk#23] - -(30) HashAggregate [codegen id : 5] -Input [11]: [ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] -Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] -Functions [24]: [partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] -Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73] -Results [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] - -(31) Exchange -Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] -Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, 5), true, [id=#122] - -(32) HashAggregate [codegen id : 6] -Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] -Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] -Functions [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] -Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146] -Results [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, DHL,BARIAN AS ship_carriers#147, d_year#17 AS year#148, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172] - -(33) Scan parquet default.catalog_sales -Output [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs_ship_mode_sk)] -ReadSchema: struct - -(34) ColumnarToRow [codegen id : 11] -Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] - -(35) Filter [codegen id : 11] -Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] -Condition : (((isnotnull(cs_warehouse_sk#176) AND isnotnull(cs_sold_date_sk#173)) AND isnotnull(cs_sold_time_sk#174)) AND isnotnull(cs_ship_mode_sk#175)) - -(36) ReusedExchange [Reuses operator id: 7] -Output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] - -(37) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_warehouse_sk#176] -Right keys [1]: [w_warehouse_sk#8] -Join condition: None - -(38) Project [codegen id : 11] -Output [12]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] -Input [14]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] - -(39) ReusedExchange [Reuses operator id: 13] -Output [3]: [d_date_sk#16, d_year#17, d_moy#18] - -(40) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#173] -Right keys [1]: [d_date_sk#16] -Join condition: None - -(41) Project [codegen id : 11] -Output [13]: [cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] -Input [15]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_date_sk#16, d_year#17, d_moy#18] - -(42) ReusedExchange [Reuses operator id: 20] -Output [1]: [t_time_sk#20] - -(43) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_time_sk#174] -Right keys [1]: [t_time_sk#20] -Join condition: None - -(44) Project [codegen id : 11] -Output [12]: [cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] -Input [14]: [cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, t_time_sk#20] - -(45) ReusedExchange [Reuses operator id: 27] -Output [1]: [sm_ship_mode_sk#23] - -(46) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_ship_mode_sk#175] -Right keys [1]: [sm_ship_mode_sk#23] -Join condition: None - -(47) Project [codegen id : 11] -Output [11]: [cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] -Input [13]: [cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, sm_ship_mode_sk#23] - -(48) HashAggregate [codegen id : 11] -Input [11]: [cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] -Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] -Functions [24]: [partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] -Aggregate Attributes [48]: [sum#180, isEmpty#181, sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189, sum#190, isEmpty#191, sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227] -Results [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] - -(49) Exchange -Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] -Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, 5), true, [id=#276] - -(50) HashAggregate [codegen id : 12] -Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] -Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] -Functions [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] -Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#277, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#278, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#279, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#280, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#281, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#282, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#283, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300] -Results [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, DHL,BARIAN AS ship_carriers#301, d_year#17 AS year#302, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#277 AS jan_sales#303, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#278 AS feb_sales#304, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#279 AS mar_sales#305, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#280 AS apr_sales#306, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#281 AS may_sales#307, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#282 AS jun_sales#308, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#283 AS jul_sales#309, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#284 AS aug_sales#310, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#285 AS sep_sales#311, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#286 AS oct_sales#312, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#287 AS nov_sales#313, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#288 AS dec_sales#314, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289 AS jan_net#315, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290 AS feb_net#316, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291 AS mar_net#317, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292 AS apr_net#318, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293 AS may_net#319, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294 AS jun_net#320, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295 AS jul_net#321, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296 AS aug_net#322, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297 AS sep_net#323, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298 AS oct_net#324, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299 AS nov_net#325, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300 AS dec_net#326] - -(51) Union - -(52) HashAggregate [codegen id : 13] -Input [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, jan_sales#149, feb_sales#150, mar_sales#151, apr_sales#152, may_sales#153, jun_sales#154, jul_sales#155, aug_sales#156, sep_sales#157, oct_sales#158, nov_sales#159, dec_sales#160, jan_net#161, feb_net#162, mar_net#163, apr_net#164, may_net#165, jun_net#166, jul_net#167, aug_net#168, sep_net#169, oct_net#170, nov_net#171, dec_net#172] -Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148] -Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)] -Aggregate Attributes [72]: [sum#327, isEmpty#328, sum#329, isEmpty#330, sum#331, isEmpty#332, sum#333, isEmpty#334, sum#335, isEmpty#336, sum#337, isEmpty#338, sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398] -Results [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] - -(53) Exchange -Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] -Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, 5), true, [id=#471] - -(54) HashAggregate [codegen id : 14] -Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] -Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148] -Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)] -Aggregate Attributes [36]: [sum(jan_sales#149)#472, sum(feb_sales#150)#473, sum(mar_sales#151)#474, sum(apr_sales#152)#475, sum(may_sales#153)#476, sum(jun_sales#154)#477, sum(jul_sales#155)#478, sum(aug_sales#156)#479, sum(sep_sales#157)#480, sum(oct_sales#158)#481, sum(nov_sales#159)#482, sum(dec_sales#160)#483, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#484, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#485, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#486, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#487, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#488, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#489, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#490, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#491, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#492, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#493, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#494, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#495, sum(jan_net#161)#496, sum(feb_net#162)#497, sum(mar_net#163)#498, sum(apr_net#164)#499, sum(may_net#165)#500, sum(jun_net#166)#501, sum(jul_net#167)#502, sum(aug_net#168)#503, sum(sep_net#169)#504, sum(oct_net#170)#505, sum(nov_net#171)#506, sum(dec_net#172)#507] -Results [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum(jan_sales#149)#472 AS jan_sales#508, sum(feb_sales#150)#473 AS feb_sales#509, sum(mar_sales#151)#474 AS mar_sales#510, sum(apr_sales#152)#475 AS apr_sales#511, sum(may_sales#153)#476 AS may_sales#512, sum(jun_sales#154)#477 AS jun_sales#513, sum(jul_sales#155)#478 AS jul_sales#514, sum(aug_sales#156)#479 AS aug_sales#515, sum(sep_sales#157)#480 AS sep_sales#516, sum(oct_sales#158)#481 AS oct_sales#517, sum(nov_sales#159)#482 AS nov_sales#518, sum(dec_sales#160)#483 AS dec_sales#519, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#484 AS jan_sales_per_sq_foot#520, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#485 AS feb_sales_per_sq_foot#521, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#486 AS mar_sales_per_sq_foot#522, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#487 AS apr_sales_per_sq_foot#523, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#488 AS may_sales_per_sq_foot#524, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#489 AS jun_sales_per_sq_foot#525, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#490 AS jul_sales_per_sq_foot#526, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#491 AS aug_sales_per_sq_foot#527, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#492 AS sep_sales_per_sq_foot#528, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#493 AS oct_sales_per_sq_foot#529, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#494 AS nov_sales_per_sq_foot#530, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#495 AS dec_sales_per_sq_foot#531, sum(jan_net#161)#496 AS jan_net#532, sum(feb_net#162)#497 AS feb_net#533, sum(mar_net#163)#498 AS mar_net#534, sum(apr_net#164)#499 AS apr_net#535, sum(may_net#165)#500 AS may_net#536, sum(jun_net#166)#501 AS jun_net#537, sum(jul_net#167)#502 AS jul_net#538, sum(aug_net#168)#503 AS aug_net#539, sum(sep_net#169)#504 AS sep_net#540, sum(oct_net#170)#505 AS oct_net#541, sum(nov_net#171)#506 AS nov_net#542, sum(dec_net#172)#507 AS dec_net#543] - -(55) TakeOrderedAndProject -Input [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, jan_sales#508, feb_sales#509, mar_sales#510, apr_sales#511, may_sales#512, jun_sales#513, jul_sales#514, aug_sales#515, sep_sales#516, oct_sales#517, nov_sales#518, dec_sales#519, jan_sales_per_sq_foot#520, feb_sales_per_sq_foot#521, mar_sales_per_sq_foot#522, apr_sales_per_sq_foot#523, may_sales_per_sq_foot#524, jun_sales_per_sq_foot#525, jul_sales_per_sq_foot#526, aug_sales_per_sq_foot#527, sep_sales_per_sq_foot#528, oct_sales_per_sq_foot#529, nov_sales_per_sq_foot#530, dec_sales_per_sq_foot#531, jan_net#532, feb_net#533, mar_net#534, apr_net#535, may_net#536, jun_net#537, jul_net#538, aug_net#539, sep_net#540, oct_net#541, nov_net#542, dec_net#543] -Arguments: 100, [w_warehouse_name#9 ASC NULLS FIRST], [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, jan_sales#508, feb_sales#509, mar_sales#510, apr_sales#511, may_sales#512, jun_sales#513, jul_sales#514, aug_sales#515, sep_sales#516, oct_sales#517, nov_sales#518, dec_sales#519, jan_sales_per_sq_foot#520, feb_sales_per_sq_foot#521, mar_sales_per_sq_foot#522, apr_sales_per_sq_foot#523, ... 20 more fields] - +TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST], output=[w_warehouse_name#1,w_warehouse_sq_ft#2,w_city#3,w_county#4,w_state#5,w_country#6,ship_carriers#7,year#8,jan_sales#9,feb_sales#10,mar_sales#11,apr_sales#12,may_sales#13,jun_sales#14,jul_sales#15,aug_sales#16,sep_sales#17,oct_sales#18,nov_sales#19,dec_sales#20,jan_sales_per_sq_foot#21,feb_sales_per_sq_foot#22,mar_sales_per_sq_foot#23,apr_sales_per_sq_foot#24,... 20 more fields]) ++- *(14) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8], functions=[sum(jan_sales#25), sum(feb_sales#26), sum(mar_sales#27), sum(apr_sales#28), sum(may_sales#29), sum(jun_sales#30), sum(jul_sales#31), sum(aug_sales#32), sum(sep_sales#33), sum(oct_sales#34), sum(nov_sales#35), sum(dec_sales#36), sum(CheckOverflow((promote_precision(jan_sales#25) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#26) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#27) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#28) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#29) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#30) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#31) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#32) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#33) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#34) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#35) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#36) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), ... 12 more fields]) + +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8, 5) + +- *(13) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8], functions=[partial_sum(jan_sales#25), partial_sum(feb_sales#26), partial_sum(mar_sales#27), partial_sum(apr_sales#28), partial_sum(may_sales#29), partial_sum(jun_sales#30), partial_sum(jul_sales#31), partial_sum(aug_sales#32), partial_sum(sep_sales#33), partial_sum(oct_sales#34), partial_sum(nov_sales#35), partial_sum(dec_sales#36), partial_sum(CheckOverflow((promote_precision(jan_sales#25) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#26) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#27) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#28) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#29) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#30) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#31) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#32) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#33) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#34) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#35) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#36) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), ... 12 more fields]) + +- Union + :- *(6) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + : +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 5) + : +- *(5) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + : +- *(5) Project [ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : +- *(5) BroadcastHashJoin [ws_ship_mode_sk#42], [sm_ship_mode_sk#43], Inner, BuildRight + : :- *(5) Project [ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : : +- *(5) BroadcastHashJoin [ws_sold_time_sk#44], [t_time_sk#45], Inner, BuildRight + : : :- *(5) Project [ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : : : +- *(5) BroadcastHashJoin [ws_sold_date_sk#46], [d_date_sk#47], Inner, BuildRight + : : : :- *(5) Project [ws_sold_date_sk#46, ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6] + : : : : +- *(5) BroadcastHashJoin [ws_warehouse_sk#48], [w_warehouse_sk#49], Inner, BuildRight + : : : : :- *(5) Project [ws_sold_date_sk#46, ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_warehouse_sk#48, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41] + : : : : : +- *(5) Filter (((isnotnull(ws_warehouse_sk#48) && isnotnull(ws_sold_date_sk#46)) && isnotnull(ws_sold_time_sk#44)) && isnotnull(ws_ship_mode_sk#42)) + : : : : : +- *(5) FileScan parquet default.web_sales[ws_sold_date_sk#46,ws_sold_time_sk#44,ws_ship_mode_sk#42,ws_warehouse_sk#48,ws_quantity#40,ws_ext_sales_price#39,ws_net_paid#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [t_time_sk#45] + : : +- *(3) Filter (((isnotnull(t_time#50) && (t_time#50 >= 30838)) && (t_time#50 <= 59638)) && isnotnull(t_time_sk#45)) + : : +- *(3) FileScan parquet default.time_dim[t_time_sk#45,t_time#50] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [sm_ship_mode_sk#43] + : +- *(4) Filter (sm_carrier#51 IN (DHL,BARIAN) && isnotnull(sm_ship_mode_sk#43)) + : +- *(4) FileScan parquet default.ship_mode[sm_ship_mode_sk#43,sm_carrier#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + +- *(12) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 5) + +- *(11) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) + +- *(11) Project [cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + +- *(11) BroadcastHashJoin [cs_ship_mode_sk#55], [sm_ship_mode_sk#43], Inner, BuildRight + :- *(11) Project [cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : +- *(11) BroadcastHashJoin [cs_sold_time_sk#56], [t_time_sk#45], Inner, BuildRight + : :- *(11) Project [cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] + : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#57], [d_date_sk#47], Inner, BuildRight + : : :- *(11) Project [cs_sold_date_sk#57, cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6] + : : : +- *(11) BroadcastHashJoin [cs_warehouse_sk#58], [w_warehouse_sk#49], Inner, BuildRight + : : : :- *(11) Project [cs_sold_date_sk#57, cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_warehouse_sk#58, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54] + : : : : +- *(11) Filter (((isnotnull(cs_warehouse_sk#58) && isnotnull(cs_sold_date_sk#57)) && isnotnull(cs_sold_time_sk#56)) && isnotnull(cs_ship_mode_sk#55)) + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#57,cs_sold_time_sk#56,cs_ship_mode_sk#55,cs_warehouse_sk#58,cs_quantity#53,cs_sales_price#52,cs_net_paid_inc_tax#54] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs..., ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] - -(3) Filter [codegen id : 4] -Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] -Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) - -(4) Scan parquet default.date_dim -Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] - -(6) Filter [codegen id : 1] -Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] -Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6)) - -(7) Project [codegen id : 1] -Output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] -Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] - -(8) BroadcastExchange -Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#6] -Join condition: None - -(10) Project [codegen id : 4] -Output [7]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10] -Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] - -(11) Scan parquet default.store -Output [2]: [s_store_sk#12, s_store_id#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [s_store_sk#12, s_store_id#13] - -(13) Filter [codegen id : 2] -Input [2]: [s_store_sk#12, s_store_id#13] -Condition : isnotnull(s_store_sk#12) - -(14) BroadcastExchange -Input [2]: [s_store_sk#12, s_store_id#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] - -(15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] -Join condition: None - -(16) Project [codegen id : 4] -Output [7]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13] -Input [9]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_sk#12, s_store_id#13] - -(17) Scan parquet default.item -Output [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] - -(19) Filter [codegen id : 3] -Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] -Condition : isnotnull(i_item_sk#15) - -(20) BroadcastExchange -Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#15] -Join condition: None - -(22) Project [codegen id : 4] -Output [10]: [ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] -Input [12]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13, i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] - -(23) Expand [codegen id : 4] -Input [10]: [ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] -Arguments: [List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, 0), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, null, 1), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, null, null, 3), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, null, null, null, 7), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, null, null, null, null, 15), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, null, null, null, null, null, 31), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, null, null, null, null, null, null, 63), List(ss_quantity#4, ss_sales_price#5, i_category#18, null, null, null, null, null, null, null, 127), List(ss_quantity#4, ss_sales_price#5, null, null, null, null, null, null, null, null, 255)], [ss_quantity#4, ss_sales_price#5, i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] - -(24) HashAggregate [codegen id : 4] -Input [11]: [ss_quantity#4, ss_sales_price#5, i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] -Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] -Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] -Aggregate Attributes [2]: [sum#30, isEmpty#31] -Results [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] - -(25) Exchange -Input [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] -Arguments: hashpartitioning(i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, 5), true, [id=#34] - -(26) HashAggregate [codegen id : 5] -Input [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] -Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] -Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35] -Results [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35 AS sumsales#36] - -(27) Exchange -Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] -Arguments: hashpartitioning(i_category#21, 5), true, [id=#37] - -(28) Sort [codegen id : 6] -Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] -Arguments: [i_category#21 ASC NULLS FIRST, sumsales#36 DESC NULLS LAST], false, 0 - -(29) Window -Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] -Arguments: [rank(sumsales#36) windowspecdefinition(i_category#21, sumsales#36 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#38], [i_category#21], [sumsales#36 DESC NULLS LAST] - -(30) Filter [codegen id : 7] -Input [10]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] -Condition : (isnotnull(rk#38) AND (rk#38 <= 100)) - -(31) TakeOrderedAndProject -Input [10]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] -Arguments: 100, [i_category#21 ASC NULLS FIRST, i_class#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, i_product_name#24 ASC NULLS FIRST, d_year#25 ASC NULLS FIRST, d_qoy#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST, s_store_id#28 ASC NULLS FIRST, sumsales#36 ASC NULLS FIRST, rk#38 ASC NULLS FIRST], [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] - +TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 ASC NULLS FIRST,i_brand#3 ASC NULLS FIRST,i_product_name#4 ASC NULLS FIRST,d_year#5 ASC NULLS FIRST,d_qoy#6 ASC NULLS FIRST,d_moy#7 ASC NULLS FIRST,s_store_id#8 ASC NULLS FIRST,sumsales#9 ASC NULLS FIRST,rk#10 ASC NULLS FIRST], output=[i_category#1,i_class#2,i_brand#3,i_product_name#4,d_year#5,d_qoy#6,d_moy#7,s_store_id#8,sumsales#9,rk#10]) ++- *(7) Filter (isnotnull(rk#10) && (rk#10 <= 100)) + +- Window [rank(sumsales#9) windowspecdefinition(i_category#1, sumsales#9 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#10], [i_category#1], [sumsales#9 DESC NULLS LAST] + +- *(6) Sort [i_category#1 ASC NULLS FIRST, sumsales#9 DESC NULLS LAST], false, 0 + +- Exchange hashpartitioning(i_category#1, 5) + +- *(5) HashAggregate(keys=[i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11], functions=[sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#12 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00))]) + +- Exchange hashpartitioning(i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11, 5) + +- *(4) HashAggregate(keys=[i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11], functions=[partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#12 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00))]) + +- *(4) Expand [List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, d_qoy#19, d_moy#20, s_store_id#21, 0), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, d_qoy#19, d_moy#20, null, 1), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, d_qoy#19, null, null, 3), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, null, null, null, 7), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, null, null, null, null, 15), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, null, null, null, null, null, 31), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, null, null, null, null, null, null, 63), List(ss_quantity#13, ss_sales_price#12, i_category#14, null, null, null, null, null, null, null, 127), List(ss_quantity#13, ss_sales_price#12, null, null, null, null, null, null, null, null, 255)], [ss_quantity#13, ss_sales_price#12, i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11] + +- *(4) Project [ss_quantity#13, ss_sales_price#12, d_year#18, d_moy#20, d_qoy#19, i_category#22 AS i_category#14, i_class#23 AS i_class#15, i_brand#24 AS i_brand#16, i_product_name#25 AS i_product_name#17, d_year#18, d_qoy#19, d_moy#20, s_store_id#26 AS s_store_id#21] + +- *(4) BroadcastHashJoin [ss_item_sk#27], [i_item_sk#28], Inner, BuildRight + :- *(4) Project [ss_item_sk#27, ss_quantity#13, ss_sales_price#12, d_year#18, d_moy#20, d_qoy#19, s_store_id#26] + : +- *(4) BroadcastHashJoin [ss_store_sk#29], [s_store_sk#30], Inner, BuildRight + : :- *(4) Project [ss_item_sk#27, ss_store_sk#29, ss_quantity#13, ss_sales_price#12, d_year#18, d_moy#20, d_qoy#19] + : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight + : : :- *(4) Project [ss_sold_date_sk#31, ss_item_sk#27, ss_store_sk#29, ss_quantity#13, ss_sales_price#12] + : : : +- *(4) Filter ((isnotnull(ss_sold_date_sk#31) && isnotnull(ss_store_sk#29)) && isnotnull(ss_item_sk#27)) + : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#31,ss_item_sk#27,ss_store_sk#29,ss_quantity#13,ss_sales_price#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct= 1200)) && (d_month_seq#33 <= 1211)) && isnotnull(d_date_sk#32)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#32,d_month_seq#33,d_year#18,d_moy#20,d_qoy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [s_store_sk#30, s_store_id#26] + : +- *(2) Filter isnotnull(s_store_sk#30) + : +- *(2) FileScan parquet default.store[s_store_sk#30,s_store_id#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [i_item_sk#28, i_brand#24, i_class#23, i_category#22, i_product_name#25] + +- *(3) Filter isnotnull(i_item_sk#28) + +- *(3) FileScan parquet default.item[i_item_sk#28,i_brand#24,i_class#23,i_category#22,i_product_name#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt index b343c2d02..49dac167f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt @@ -1,48 +1,43 @@ -TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,rk] - WholeStageCodegen (7) +TakeOrderedAndProject [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,rk,s_store_id,sumsales] + WholeStageCodegen Filter [rk] InputAdapter - Window [sumsales,i_category] - WholeStageCodegen (6) + Window [i_category,sumsales] + WholeStageCodegen Sort [i_category,sumsales] InputAdapter Exchange [i_category] #1 - WholeStageCodegen (5) - HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty] + WholeStageCodegen + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00))] [sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales] InputAdapter - Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2 - WholeStageCodegen (4) - HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,ss_sales_price,ss_quantity] [sum,isEmpty,sum,isEmpty] - Expand [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] - Project [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price] + Exchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id] #2 + WholeStageCodegen + HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id,ss_quantity,ss_sales_price,sum,sum] [sum,sum] + Expand [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] + Project [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [d_moy,d_qoy,d_year,s_store_id,ss_item_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,d_qoy,d_year,ss_item_sk,ss_quantity,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk,d_year,d_moy,d_qoy] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] + WholeStageCodegen + Project [d_date_sk,d_moy,d_qoy,d_year] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] + WholeStageCodegen + Project [s_store_id,s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] + WholeStageCodegen + Project [i_brand,i_category,i_class,i_item_sk,i_product_name] + Filter [i_item_sk] + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] [i_brand,i_category,i_class,i_item_sk,i_product_name] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt index d0c618bdb..3d2fcd498 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt @@ -1,241 +1,41 @@ == Physical Plan == -TakeOrderedAndProject (43) -+- * Project (42) - +- * BroadcastHashJoin Inner BuildRight (41) - :- * Project (39) - : +- * BroadcastHashJoin Inner BuildRight (38) - : :- * HashAggregate (33) - : : +- Exchange (32) - : : +- * HashAggregate (31) - : : +- * Project (30) - : : +- * BroadcastHashJoin Inner BuildRight (29) - : : :- * Project (24) - : : : +- * BroadcastHashJoin Inner BuildRight (23) - : : : :- * Project (17) - : : : : +- * BroadcastHashJoin Inner BuildRight (16) - : : : : :- * Project (10) - : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.store_sales (1) - : : : : : +- BroadcastExchange (8) - : : : : : +- * Project (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.date_dim (4) - : : : : +- BroadcastExchange (15) - : : : : +- * Project (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.store (11) - : : : +- BroadcastExchange (22) - : : : +- * Project (21) - : : : +- * Filter (20) - : : : +- * ColumnarToRow (19) - : : : +- Scan parquet default.household_demographics (18) - : : +- BroadcastExchange (28) - : : +- * Filter (27) - : : +- * ColumnarToRow (26) - : : +- Scan parquet default.customer_address (25) - : +- BroadcastExchange (37) - : +- * Filter (36) - : +- * ColumnarToRow (35) - : +- Scan parquet default.customer (34) - +- ReusedExchange (40) - - -(1) Scan parquet default.store_sales -Output [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] - -(3) Filter [codegen id : 5] -Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] -Condition : ((((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_customer_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_dom#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_dom#12] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_dom#12] -Condition : ((((isnotnull(d_dom#12) AND (d_dom#12 >= 1)) AND (d_dom#12 <= 2)) AND d_year#11 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#11, d_dom#12] - -(8) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(10) Project [codegen id : 5] -Output [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] -Input [10]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, d_date_sk#10] - -(11) Scan parquet default.store -Output [2]: [s_store_sk#14, s_city#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [s_store_sk#14, s_city#15] - -(13) Filter [codegen id : 2] -Input [2]: [s_store_sk#14, s_city#15] -Condition : (s_city#15 IN (Midway,Fairview) AND isnotnull(s_store_sk#14)) - -(14) Project [codegen id : 2] -Output [1]: [s_store_sk#14] -Input [2]: [s_store_sk#14, s_city#15] - -(15) BroadcastExchange -Input [1]: [s_store_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_store_sk#5] -Right keys [1]: [s_store_sk#14] -Join condition: None - -(17) Project [codegen id : 5] -Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] -Input [9]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, s_store_sk#14] - -(18) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] - -(20) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] -Condition : (((hd_dep_count#18 = 4) OR (hd_vehicle_count#19 = 3)) AND isnotnull(hd_demo_sk#17)) - -(21) Project [codegen id : 3] -Output [1]: [hd_demo_sk#17] -Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] - -(22) BroadcastExchange -Input [1]: [hd_demo_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] - -(23) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#17] -Join condition: None - -(24) Project [codegen id : 5] -Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] -Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, hd_demo_sk#17] - -(25) Scan parquet default.customer_address -Output [2]: [ca_address_sk#21, ca_city#22] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] -ReadSchema: struct - -(26) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#21, ca_city#22] - -(27) Filter [codegen id : 4] -Input [2]: [ca_address_sk#21, ca_city#22] -Condition : (isnotnull(ca_address_sk#21) AND isnotnull(ca_city#22)) - -(28) BroadcastExchange -Input [2]: [ca_address_sk#21, ca_city#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] - -(29) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_addr_sk#4] -Right keys [1]: [ca_address_sk#21] -Join condition: None - -(30) Project [codegen id : 5] -Output [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_city#22] -Input [8]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_address_sk#21, ca_city#22] - -(31) HashAggregate [codegen id : 5] -Input [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_city#22] -Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22] -Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#7)), partial_sum(UnscaledValue(ss_ext_list_price#8)), partial_sum(UnscaledValue(ss_ext_tax#9))] -Aggregate Attributes [3]: [sum#24, sum#25, sum#26] -Results [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] - -(32) Exchange -Input [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] -Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, 5), true, [id=#30] - -(33) HashAggregate [codegen id : 8] -Input [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] -Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22] -Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#7)), sum(UnscaledValue(ss_ext_list_price#8)), sum(UnscaledValue(ss_ext_tax#9))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#7))#31, sum(UnscaledValue(ss_ext_list_price#8))#32, sum(UnscaledValue(ss_ext_tax#9))#33] -Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ca_city#22 AS bought_city#34, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#31,17,2) AS extended_price#35, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#8))#32,17,2) AS list_price#36, MakeDecimal(sum(UnscaledValue(ss_ext_tax#9))#33,17,2) AS extended_tax#37] - -(34) Scan parquet default.customer -Output [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(35) ColumnarToRow [codegen id : 6] -Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] - -(36) Filter [codegen id : 6] -Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] -Condition : (isnotnull(c_customer_sk#38) AND isnotnull(c_current_addr_sk#39)) - -(37) BroadcastExchange -Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#42] - -(38) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#38] -Join condition: None - -(39) Project [codegen id : 8] -Output [8]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#39, c_first_name#40, c_last_name#41] -Input [10]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] - -(40) ReusedExchange [Reuses operator id: 28] -Output [2]: [ca_address_sk#21, ca_city#22] - -(41) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [c_current_addr_sk#39] -Right keys [1]: [ca_address_sk#21] -Join condition: NOT (ca_city#22 = bought_city#34) - -(42) Project [codegen id : 8] -Output [8]: [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] -Input [10]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#39, c_first_name#40, c_last_name#41, ca_address_sk#21, ca_city#22] - -(43) TakeOrderedAndProject -Input [8]: [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] -Arguments: 100, [c_last_name#41 ASC NULLS FIRST, ss_ticket_number#6 ASC NULLS FIRST], [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] - +TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,ss_ticket_number#2 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#3,ca_city#4,bought_city#5,ss_ticket_number#2,extended_price#6,extended_tax#7,list_price#8]) ++- *(8) Project [c_last_name#1, c_first_name#3, ca_city#4, bought_city#5, ss_ticket_number#2, extended_price#6, extended_tax#7, list_price#8] + +- *(8) BroadcastHashJoin [c_current_addr_sk#9], [ca_address_sk#10], Inner, BuildRight, NOT (ca_city#4 = bought_city#5) + :- *(8) Project [ss_ticket_number#2, bought_city#5, extended_price#6, list_price#8, extended_tax#7, c_current_addr_sk#9, c_first_name#3, c_last_name#1] + : +- *(8) BroadcastHashJoin [ss_customer_sk#11], [c_customer_sk#12], Inner, BuildRight + : :- *(8) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[sum(UnscaledValue(ss_ext_sales_price#14)), sum(UnscaledValue(ss_ext_list_price#15)), sum(UnscaledValue(ss_ext_tax#16))]) + : : +- Exchange hashpartitioning(ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4, 5) + : : +- *(5) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#14)), partial_sum(UnscaledValue(ss_ext_list_price#15)), partial_sum(UnscaledValue(ss_ext_tax#16))]) + : : +- *(5) Project [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16, ca_city#4] + : : +- *(5) BroadcastHashJoin [ss_addr_sk#13], [ca_address_sk#10], Inner, BuildRight + : : :- *(5) Project [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : +- *(5) BroadcastHashJoin [ss_hdemo_sk#17], [hd_demo_sk#18], Inner, BuildRight + : : : :- *(5) Project [ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : : +- *(5) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#20], Inner, BuildRight + : : : : :- *(5) Project [ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_store_sk#19, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : : : : : :- *(5) Project [ss_sold_date_sk#21, ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_store_sk#19, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] + : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#21) && isnotnull(ss_store_sk#19)) && isnotnull(ss_hdemo_sk#17)) && isnotnull(ss_addr_sk#13)) && isnotnull(ss_customer_sk#11)) + : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_customer_sk#11,ss_hdemo_sk#17,ss_addr_sk#13,ss_store_sk#19,ss_ticket_number#2,ss_ext_sales_price#14,ss_ext_list_price#15,ss_ext_tax#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct= 1)) && (d_dom#23 <= 2)) && d_year#24 IN (1999,2000,2001)) && isnotnull(d_date_sk#22)) + : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#22,d_year#24,d_dom#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [s_store_sk#20] + : : : : +- *(2) Filter (s_city#25 IN (Midway,Fairview) && isnotnull(s_store_sk#20)) + : : : : +- *(2) FileScan parquet default.store[s_store_sk#20,s_city#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [hd_demo_sk#18] + : : : +- *(3) Filter (((hd_dep_count#26 = 4) || (hd_vehicle_count#27 = 3)) && isnotnull(hd_demo_sk#18)) + : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#18,hd_dep_count#26,hd_vehicle_count#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#10, ca_city#4] + : : +- *(4) Filter (isnotnull(ca_address_sk#10) && isnotnull(ca_city#4)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_city#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [c_customer_sk#12, c_current_addr_sk#9, c_first_name#3, c_last_name#1] + : +- *(6) Filter (isnotnull(c_customer_sk#12) && isnotnull(c_current_addr_sk#9)) + : +- *(6) FileScan parquet default.customer[c_customer_sk#12,c_current_addr_sk#9,c_first_name#3,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + +- ReusedExchange [ca_address_sk#10, ca_city#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt index 819e5504f..579970f83 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt @@ -1,63 +1,54 @@ -TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_city,extended_price,extended_tax,list_price] - WholeStageCodegen (8) - Project [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,extended_price,extended_tax,list_price] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] - Project [ss_ticket_number,bought_city,extended_price,list_price,extended_tax,c_current_addr_sk,c_first_name,c_last_name] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_tax)),bought_city,extended_price,list_price,extended_tax,sum,sum,sum] +TakeOrderedAndProject [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] + WholeStageCodegen + Project [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] + BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] + Project [bought_city,c_current_addr_sk,c_first_name,c_last_name,extended_price,extended_tax,list_price,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] [bought_city,extended_price,extended_tax,list_price,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] InputAdapter - Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 - WholeStageCodegen (5) - HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] [sum,sum,sum,sum,sum,sum] - Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ca_city] - BroadcastHashJoin [ss_addr_sk,ca_address_sk] - Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 + WholeStageCodegen + HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + BroadcastHashJoin [ca_address_sk,ss_addr_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_dom,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_dom] + Filter [d_date_sk,d_dom,d_year] + Scan parquet default.date_dim [d_date_sk,d_dom,d_year] [d_date_sk,d_dom,d_year] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [s_store_sk] Filter [s_city,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_city] + Scan parquet default.store [s_city,s_store_sk] [s_city,s_store_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) - Filter [ca_address_sk,ca_city] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_city] + WholeStageCodegen + Project [ca_address_sk,ca_city] + Filter [ca_address_sk,ca_city] + Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] InputAdapter BroadcastExchange #6 - WholeStageCodegen (6) - Filter [c_customer_sk,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] + WholeStageCodegen + Project [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + Filter [c_current_addr_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] InputAdapter - ReusedExchange [ca_address_sk,ca_city] #5 + ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt index b21892546..ad4a155f0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt @@ -1,274 +1,48 @@ == Physical Plan == -TakeOrderedAndProject (49) -+- * HashAggregate (48) - +- Exchange (47) - +- * HashAggregate (46) - +- * Project (45) - +- * BroadcastHashJoin Inner BuildRight (44) - :- * Project (39) - : +- * BroadcastHashJoin Inner BuildRight (38) - : :- * Project (32) - : : +- * BroadcastHashJoin LeftAnti BuildRight (31) - : : :- * BroadcastHashJoin LeftAnti BuildRight (23) - : : : :- * BroadcastHashJoin LeftSemi BuildRight (15) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.customer (1) - : : : : +- BroadcastExchange (14) - : : : : +- * Project (13) - : : : : +- * BroadcastHashJoin Inner BuildRight (12) - : : : : :- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.store_sales (4) - : : : : +- BroadcastExchange (11) - : : : : +- * Project (10) - : : : : +- * Filter (9) - : : : : +- * ColumnarToRow (8) - : : : : +- Scan parquet default.date_dim (7) - : : : +- BroadcastExchange (22) - : : : +- * Project (21) - : : : +- * BroadcastHashJoin Inner BuildRight (20) - : : : :- * Filter (18) - : : : : +- * ColumnarToRow (17) - : : : : +- Scan parquet default.web_sales (16) - : : : +- ReusedExchange (19) - : : +- BroadcastExchange (30) - : : +- * Project (29) - : : +- * BroadcastHashJoin Inner BuildRight (28) - : : :- * Filter (26) - : : : +- * ColumnarToRow (25) - : : : +- Scan parquet default.catalog_sales (24) - : : +- ReusedExchange (27) - : +- BroadcastExchange (37) - : +- * Project (36) - : +- * Filter (35) - : +- * ColumnarToRow (34) - : +- Scan parquet default.customer_address (33) - +- BroadcastExchange (43) - +- * Filter (42) - +- * ColumnarToRow (41) - +- Scan parquet default.customer_demographics (40) - - -(1) Scan parquet default.customer -Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 9] -Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] - -(3) Filter [codegen id : 9] -Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] -Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) - -(4) Scan parquet default.store_sales -Output [2]: [ss_sold_date_sk#4, ss_customer_sk#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 2] -Input [2]: [ss_sold_date_sk#4, ss_customer_sk#5] - -(6) Filter [codegen id : 2] -Input [2]: [ss_sold_date_sk#4, ss_customer_sk#5] -Condition : isnotnull(ss_sold_date_sk#4) - -(7) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#7, d_moy#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,6), IsNotNull(d_date_sk)] -ReadSchema: struct - -(8) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#7, d_moy#8] - -(9) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#7, d_moy#8] -Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2001)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 6)) AND isnotnull(d_date_sk#6)) - -(10) Project [codegen id : 1] -Output [1]: [d_date_sk#6] -Input [3]: [d_date_sk#6, d_year#7, d_moy#8] - -(11) BroadcastExchange -Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] - -(12) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#4] -Right keys [1]: [d_date_sk#6] -Join condition: None - -(13) Project [codegen id : 2] -Output [1]: [ss_customer_sk#5] -Input [3]: [ss_sold_date_sk#4, ss_customer_sk#5, d_date_sk#6] - -(14) BroadcastExchange -Input [1]: [ss_customer_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] - -(15) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#5] -Join condition: None - -(16) Scan parquet default.web_sales -Output [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 4] -Input [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] - -(18) Filter [codegen id : 4] -Input [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] -Condition : isnotnull(ws_sold_date_sk#11) - -(19) ReusedExchange [Reuses operator id: 11] -Output [1]: [d_date_sk#6] - -(20) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_sold_date_sk#11] -Right keys [1]: [d_date_sk#6] -Join condition: None - -(21) Project [codegen id : 4] -Output [1]: [ws_bill_customer_sk#12] -Input [3]: [ws_sold_date_sk#11, ws_bill_customer_sk#12, d_date_sk#6] - -(22) BroadcastExchange -Input [1]: [ws_bill_customer_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(23) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ws_bill_customer_sk#12] -Join condition: None - -(24) Scan parquet default.catalog_sales -Output [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(25) ColumnarToRow [codegen id : 6] -Input [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] - -(26) Filter [codegen id : 6] -Input [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] -Condition : isnotnull(cs_sold_date_sk#14) - -(27) ReusedExchange [Reuses operator id: 11] -Output [1]: [d_date_sk#6] - -(28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#14] -Right keys [1]: [d_date_sk#6] -Join condition: None - -(29) Project [codegen id : 6] -Output [1]: [cs_ship_customer_sk#15] -Input [3]: [cs_sold_date_sk#14, cs_ship_customer_sk#15, d_date_sk#6] - -(30) BroadcastExchange -Input [1]: [cs_ship_customer_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(31) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [cs_ship_customer_sk#15] -Join condition: None - -(32) Project [codegen id : 9] -Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] -Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] - -(33) Scan parquet default.customer_address -Output [2]: [ca_address_sk#17, ca_state#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [In(ca_state, [KY,GA,NM]), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(34) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#17, ca_state#18] - -(35) Filter [codegen id : 7] -Input [2]: [ca_address_sk#17, ca_state#18] -Condition : (ca_state#18 IN (KY,GA,NM) AND isnotnull(ca_address_sk#17)) - -(36) Project [codegen id : 7] -Output [1]: [ca_address_sk#17] -Input [2]: [ca_address_sk#17, ca_state#18] - -(37) BroadcastExchange -Input [1]: [ca_address_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] - -(38) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_current_addr_sk#3] -Right keys [1]: [ca_address_sk#17] -Join condition: None - -(39) Project [codegen id : 9] -Output [1]: [c_current_cdemo_sk#2] -Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17] - -(40) Scan parquet default.customer_demographics -Output [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(41) ColumnarToRow [codegen id : 8] -Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] - -(42) Filter [codegen id : 8] -Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] -Condition : isnotnull(cd_demo_sk#20) - -(43) BroadcastExchange -Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] - -(44) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#20] -Join condition: None - -(45) Project [codegen id : 9] -Output [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] -Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] - -(46) HashAggregate [codegen id : 9] -Input [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] -Keys [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#27] -Results [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] - -(47) Exchange -Input [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] -Arguments: hashpartitioning(cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, 5), true, [id=#29] - -(48) HashAggregate [codegen id : 10] -Input [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] -Keys [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#30] -Results [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, count(1)#30 AS cnt1#31, cd_purchase_estimate#24, count(1)#30 AS cnt2#32, cd_credit_rating#25, count(1)#30 AS cnt3#33] - -(49) TakeOrderedAndProject -Input [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#31, cd_purchase_estimate#24, cnt2#32, cd_credit_rating#25, cnt3#33] -Arguments: 100, [cd_gender#21 ASC NULLS FIRST, cd_marital_status#22 ASC NULLS FIRST, cd_education_status#23 ASC NULLS FIRST, cd_purchase_estimate#24 ASC NULLS FIRST, cd_credit_rating#25 ASC NULLS FIRST], [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#31, cd_purchase_estimate#24, cnt2#32, cd_credit_rating#25, cnt3#33] - +TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#6,cd_purchase_estimate#4,cnt2#7,cd_credit_rating#5,cnt3#8]) ++- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[count(1)]) + +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, 5) + +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[partial_count(1)]) + +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] + +- *(9) BroadcastHashJoin [c_current_cdemo_sk#9], [cd_demo_sk#10], Inner, BuildRight + :- *(9) Project [c_current_cdemo_sk#9] + : +- *(9) BroadcastHashJoin [c_current_addr_sk#11], [ca_address_sk#12], Inner, BuildRight + : :- *(9) Project [c_current_cdemo_sk#9, c_current_addr_sk#11] + : : +- *(9) BroadcastHashJoin [c_customer_sk#13], [cs_ship_customer_sk#14], LeftAnti, BuildRight + : : :- *(9) BroadcastHashJoin [c_customer_sk#13], [ws_bill_customer_sk#15], LeftAnti, BuildRight + : : : :- *(9) BroadcastHashJoin [c_customer_sk#13], [ss_customer_sk#16], LeftSemi, BuildRight + : : : : :- *(9) Project [c_customer_sk#13, c_current_cdemo_sk#9, c_current_addr_sk#11] + : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#11) && isnotnull(c_current_cdemo_sk#9)) + : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#13,c_current_cdemo_sk#9,c_current_addr_sk#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [ss_customer_sk#16] + : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : : : :- *(2) Project [ss_sold_date_sk#17, ss_customer_sk#16] + : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#17) + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(1) Project [d_date_sk#18] + : : : : +- *(1) Filter (((((isnotnull(d_year#19) && isnotnull(d_moy#20)) && (d_year#19 = 2001)) && (d_moy#20 >= 4)) && (d_moy#20 <= 6)) && isnotnull(d_date_sk#18)) + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_year#19,d_moy#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThan..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_bill_customer_sk#15] + : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#18], Inner, BuildRight + : : : :- *(4) Project [ws_sold_date_sk#21, ws_bill_customer_sk#15] + : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#21) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [cs_ship_customer_sk#14] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#22], [d_date_sk#18], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#22, cs_ship_customer_sk#14] + : : : +- *(6) Filter isnotnull(cs_sold_date_sk#22) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#22,cs_ship_customer_sk#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#12] + : +- *(7) Filter (ca_state#23 IN (KY,GA,NM) && isnotnull(ca_address_sk#12)) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#12,ca_state#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [KY,GA,NM]), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [cd_demo_sk#10, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] + +- *(8) Filter isnotnull(cd_demo_sk#10) + +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#10,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] - -(3) Filter [codegen id : 5] -Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] -Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) - -(4) Scan parquet default.customer_demographics -Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] - -(6) Filter [codegen id : 1] -Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] -Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) - -(7) Project [codegen id : 1] -Output [1]: [cd_demo_sk#9] -Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] - -(8) BroadcastExchange -Input [1]: [cd_demo_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(9) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_cdemo_sk#3] -Right keys [1]: [cd_demo_sk#9] -Join condition: None - -(10) Project [codegen id : 5] -Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] -Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] - -(11) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#14, d_year#15] - -(13) Filter [codegen id : 2] -Input [2]: [d_date_sk#14, d_year#15] -Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2000)) AND isnotnull(d_date_sk#14)) - -(14) Project [codegen id : 2] -Output [1]: [d_date_sk#14] -Input [2]: [d_date_sk#14, d_year#15] - -(15) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#14] -Join condition: None - -(17) Project [codegen id : 5] -Output [6]: [ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] -Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] - -(18) Scan parquet default.item -Output [2]: [i_item_sk#17, i_item_id#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_sk#17, i_item_id#18] - -(20) Filter [codegen id : 3] -Input [2]: [i_item_sk#17, i_item_id#18] -Condition : isnotnull(i_item_sk#17) - -(21) BroadcastExchange -Input [2]: [i_item_sk#17, i_item_id#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] - -(22) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#17] -Join condition: None - -(23) Project [codegen id : 5] -Output [6]: [ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] -Input [8]: [ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#17, i_item_id#18] - -(24) Scan parquet default.promotion -Output [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] -Batched: true -Location [not included in comparison]/{warehouse_dir}/promotion] -PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] -ReadSchema: struct - -(25) ColumnarToRow [codegen id : 4] -Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] - -(26) Filter [codegen id : 4] -Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] -Condition : (((p_channel_email#21 = N) OR (p_channel_event#22 = N)) AND isnotnull(p_promo_sk#20)) - -(27) Project [codegen id : 4] -Output [1]: [p_promo_sk#20] -Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] - -(28) BroadcastExchange -Input [1]: [p_promo_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] - -(29) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_promo_sk#4] -Right keys [1]: [p_promo_sk#20] -Join condition: None - -(30) Project [codegen id : 5] -Output [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] -Input [7]: [ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18, p_promo_sk#20] - -(31) HashAggregate [codegen id : 5] -Input [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] -Keys [1]: [i_item_id#18] -Functions [4]: [partial_avg(cast(ss_quantity#5 as bigint)), partial_avg(UnscaledValue(ss_list_price#6)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31] -Results [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] - -(32) Exchange -Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] -Arguments: hashpartitioning(i_item_id#18, 5), true, [id=#40] - -(33) HashAggregate [codegen id : 6] -Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] -Keys [1]: [i_item_id#18] -Functions [4]: [avg(cast(ss_quantity#5 as bigint)), avg(UnscaledValue(ss_list_price#6)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [4]: [avg(cast(ss_quantity#5 as bigint))#41, avg(UnscaledValue(ss_list_price#6))#42, avg(UnscaledValue(ss_coupon_amt#8))#43, avg(UnscaledValue(ss_sales_price#7))#44] -Results [5]: [i_item_id#18, avg(cast(ss_quantity#5 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(ss_list_price#6))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(ss_coupon_amt#8))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(ss_sales_price#7))#44 / 100.0) as decimal(11,6)) AS agg4#48] - -(34) TakeOrderedAndProject -Input [5]: [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] -Arguments: 100, [i_item_id#18 ASC NULLS FIRST], [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] - +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,agg1#2,agg2#3,agg3#4,agg4#5]) ++- *(6) HashAggregate(keys=[i_item_id#1], functions=[avg(cast(ss_quantity#6 as bigint)), avg(UnscaledValue(ss_list_price#7)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#9))]) + +- Exchange hashpartitioning(i_item_id#1, 5) + +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_avg(cast(ss_quantity#6 as bigint)), partial_avg(UnscaledValue(ss_list_price#7)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#9))]) + +- *(5) Project [ss_quantity#6, ss_list_price#7, ss_sales_price#9, ss_coupon_amt#8, i_item_id#1] + +- *(5) BroadcastHashJoin [ss_promo_sk#10], [p_promo_sk#11], Inner, BuildRight + :- *(5) Project [ss_promo_sk#10, ss_quantity#6, ss_list_price#7, ss_sales_price#9, ss_coupon_amt#8, i_item_id#1] + : +- *(5) BroadcastHashJoin [ss_item_sk#12], [i_item_sk#13], Inner, BuildRight + : :- *(5) Project [ss_item_sk#12, ss_promo_sk#10, ss_quantity#6, ss_list_price#7, ss_sales_price#9, ss_coupon_amt#8] + : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight + : : :- *(5) Project [ss_sold_date_sk#14, ss_item_sk#12, ss_promo_sk#10, ss_quantity#6, ss_list_price#7, ss_sales_price#9, ss_coupon_amt#8] + : : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#16], [cd_demo_sk#17], Inner, BuildRight + : : : :- *(5) Project [ss_sold_date_sk#14, ss_item_sk#12, ss_cdemo_sk#16, ss_promo_sk#10, ss_quantity#6, ss_list_price#7, ss_sales_price#9, ss_coupon_amt#8] + : : : : +- *(5) Filter (((isnotnull(ss_cdemo_sk#16) && isnotnull(ss_sold_date_sk#14)) && isnotnull(ss_item_sk#12)) && isnotnull(ss_promo_sk#10)) + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#14,ss_item_sk#12,ss_cdemo_sk#16,ss_promo_sk#10,ss_quantity#6,ss_list_price#7,ss_sales_price#9,ss_coupon_amt#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#15] + : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [i_item_sk#13, i_item_id#1] + : +- *(3) Filter isnotnull(i_item_sk#13) + : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [p_promo_sk#11] + +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) + +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt index 075c6d15c..2cf9df4fd 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt @@ -1,50 +1,42 @@ -TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] - WholeStageCodegen (6) - HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(cast(ss_quantity as bigint)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] +TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen (5) - HashAggregate [i_item_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] - Project [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] - BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] - Filter [ss_cdemo_sk,ss_sold_date_sk,ss_item_sk,ss_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + WholeStageCodegen + HashAggregate [count,count,count,count,count,count,count,count,i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [i_item_id,ss_coupon_amt,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] + Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [cd_demo_sk] - Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] + Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id] + WholeStageCodegen + Project [i_item_id,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen Project [p_promo_sk] Filter [p_channel_email,p_channel_event,p_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event] + Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] [p_channel_email,p_channel_event,p_promo_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt index 05b533aa6..e0113e54d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt @@ -1,264 +1,46 @@ == Physical Plan == -TakeOrderedAndProject (47) -+- * Project (46) - +- Window (45) - +- * Sort (44) - +- Exchange (43) - +- * HashAggregate (42) - +- Exchange (41) - +- * HashAggregate (40) - +- * Expand (39) - +- * Project (38) - +- * BroadcastHashJoin Inner BuildRight (37) - :- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.date_dim (4) - +- BroadcastExchange (36) - +- * BroadcastHashJoin LeftSemi BuildRight (35) - :- * Filter (13) - : +- * ColumnarToRow (12) - : +- Scan parquet default.store (11) - +- BroadcastExchange (34) - +- * Project (33) - +- * Filter (32) - +- Window (31) - +- * Sort (30) - +- Exchange (29) - +- * HashAggregate (28) - +- Exchange (27) - +- * HashAggregate (26) - +- * Project (25) - +- * BroadcastHashJoin Inner BuildRight (24) - :- * Project (22) - : +- * BroadcastHashJoin Inner BuildRight (21) - : :- * Filter (16) - : : +- * ColumnarToRow (15) - : : +- Scan parquet default.store_sales (14) - : +- BroadcastExchange (20) - : +- * Filter (19) - : +- * ColumnarToRow (18) - : +- Scan parquet default.store (17) - +- ReusedExchange (23) - - -(1) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 9] -Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] - -(3) Filter [codegen id : 9] -Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] -Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) - -(4) Scan parquet default.date_dim -Output [2]: [d_date_sk#4, d_month_seq#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#4, d_month_seq#5] - -(6) Filter [codegen id : 1] -Input [2]: [d_date_sk#4, d_month_seq#5] -Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#4] -Input [2]: [d_date_sk#4, d_month_seq#5] - -(8) BroadcastExchange -Input [1]: [d_date_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] - -(9) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#4] -Join condition: None - -(10) Project [codegen id : 9] -Output [2]: [ss_store_sk#2, ss_net_profit#3] -Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] - -(11) Scan parquet default.store -Output [3]: [s_store_sk#7, s_county#8, s_state#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 8] -Input [3]: [s_store_sk#7, s_county#8, s_state#9] - -(13) Filter [codegen id : 8] -Input [3]: [s_store_sk#7, s_county#8, s_state#9] -Condition : isnotnull(s_store_sk#7) - -(14) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(15) ColumnarToRow [codegen id : 4] -Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] - -(16) Filter [codegen id : 4] -Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] -Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_sold_date_sk#1)) - -(17) Scan parquet default.store -Output [2]: [s_store_sk#7, s_state#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 2] -Input [2]: [s_store_sk#7, s_state#9] - -(19) Filter [codegen id : 2] -Input [2]: [s_store_sk#7, s_state#9] -Condition : isnotnull(s_store_sk#7) - -(20) BroadcastExchange -Input [2]: [s_store_sk#7, s_state#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#7] -Join condition: None - -(22) Project [codegen id : 4] -Output [3]: [ss_sold_date_sk#1, ss_net_profit#3, s_state#9] -Input [5]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_state#9] - -(23) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#4] - -(24) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#4] -Join condition: None - -(25) Project [codegen id : 4] -Output [2]: [ss_net_profit#3, s_state#9] -Input [4]: [ss_sold_date_sk#1, ss_net_profit#3, s_state#9, d_date_sk#4] - -(26) HashAggregate [codegen id : 4] -Input [2]: [ss_net_profit#3, s_state#9] -Keys [1]: [s_state#9] -Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [sum#11] -Results [2]: [s_state#9, sum#12] - -(27) Exchange -Input [2]: [s_state#9, sum#12] -Arguments: hashpartitioning(s_state#9, 5), true, [id=#13] - -(28) HashAggregate [codegen id : 5] -Input [2]: [s_state#9, sum#12] -Keys [1]: [s_state#9] -Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#14] -Results [3]: [s_state#9 AS s_state#15, s_state#9, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#14,17,2) AS _w2#16] - -(29) Exchange -Input [3]: [s_state#15, s_state#9, _w2#16] -Arguments: hashpartitioning(s_state#9, 5), true, [id=#17] - -(30) Sort [codegen id : 6] -Input [3]: [s_state#15, s_state#9, _w2#16] -Arguments: [s_state#9 ASC NULLS FIRST, _w2#16 DESC NULLS LAST], false, 0 - -(31) Window -Input [3]: [s_state#15, s_state#9, _w2#16] -Arguments: [rank(_w2#16) windowspecdefinition(s_state#9, _w2#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#18], [s_state#9], [_w2#16 DESC NULLS LAST] - -(32) Filter [codegen id : 7] -Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] -Condition : (isnotnull(ranking#18) AND (ranking#18 <= 5)) - -(33) Project [codegen id : 7] -Output [1]: [s_state#15] -Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] - -(34) BroadcastExchange -Input [1]: [s_state#15] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#19] - -(35) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [s_state#9] -Right keys [1]: [s_state#15] -Join condition: None - -(36) BroadcastExchange -Input [3]: [s_store_sk#7, s_county#8, s_state#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] - -(37) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#7] -Join condition: None - -(38) Project [codegen id : 9] -Output [3]: [ss_net_profit#3, s_state#9, s_county#8] -Input [5]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_county#8, s_state#9] - -(39) Expand [codegen id : 9] -Input [3]: [ss_net_profit#3, s_state#9, s_county#8] -Arguments: [List(ss_net_profit#3, s_state#9, s_county#8, 0), List(ss_net_profit#3, s_state#9, null, 1), List(ss_net_profit#3, null, null, 3)], [ss_net_profit#3, s_state#21, s_county#22, spark_grouping_id#23] - -(40) HashAggregate [codegen id : 9] -Input [4]: [ss_net_profit#3, s_state#21, s_county#22, spark_grouping_id#23] -Keys [3]: [s_state#21, s_county#22, spark_grouping_id#23] -Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [sum#24] -Results [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] - -(41) Exchange -Input [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] -Arguments: hashpartitioning(s_state#21, s_county#22, spark_grouping_id#23, 5), true, [id=#26] - -(42) HashAggregate [codegen id : 10] -Input [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] -Keys [3]: [s_state#21, s_county#22, spark_grouping_id#23] -Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#27] -Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#27,17,2) AS total_sum#28, s_state#21, s_county#22, (cast((shiftright(spark_grouping_id#23, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint)) AS lochierarchy#29, (cast((shiftright(spark_grouping_id#23, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint)) AS _w1#30, CASE WHEN (cast(cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint) as int) = 0) THEN s_state#21 END AS _w2#31, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#27,17,2) AS _w3#32] - -(43) Exchange -Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] -Arguments: hashpartitioning(_w1#30, _w2#31, 5), true, [id=#33] - -(44) Sort [codegen id : 11] -Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] -Arguments: [_w1#30 ASC NULLS FIRST, _w2#31 ASC NULLS FIRST, _w3#32 DESC NULLS LAST], false, 0 - -(45) Window -Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] -Arguments: [rank(_w3#32) windowspecdefinition(_w1#30, _w2#31, _w3#32 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#34], [_w1#30, _w2#31], [_w3#32 DESC NULLS LAST] - -(46) Project [codegen id : 12] -Output [5]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] -Input [8]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32, rank_within_parent#34] - -(47) TakeOrderedAndProject -Input [5]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] -Arguments: 100, [lochierarchy#29 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#29 as int) = 0) THEN s_state#21 END ASC NULLS FIRST, rank_within_parent#34 ASC NULLS FIRST], [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] - +TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN s_state#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[total_sum#4,s_state#2,s_county#5,lochierarchy#1,rank_within_parent#3]) ++- *(11) Project [total_sum#4, s_state#2, s_county#5, lochierarchy#1, rank_within_parent#3] + +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] + +- *(10) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 + +- Exchange hashpartitioning(_w1#7, _w2#8, 5) + +- *(9) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ss_net_profit#10))]) + +- Exchange hashpartitioning(s_state#2, s_county#5, spark_grouping_id#9, 5) + +- *(8) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) + +- *(8) Expand [List(ss_net_profit#10, s_state#11, s_county#12, 0), List(ss_net_profit#10, s_state#11, null, 1), List(ss_net_profit#10, null, null, 3)], [ss_net_profit#10, s_state#2, s_county#5, spark_grouping_id#9] + +- *(8) Project [ss_net_profit#10, s_state#13 AS s_state#11, s_county#14 AS s_county#12] + +- *(8) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + :- *(8) Project [ss_store_sk#15, ss_net_profit#10] + : +- *(8) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : :- *(8) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] + : : +- *(8) Filter (isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) + : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#18] + : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) BroadcastHashJoin [s_state#13], [s_state#20], LeftSemi, BuildRight + :- *(7) Project [s_store_sk#16, s_county#14, s_state#13] + : +- *(7) Filter isnotnull(s_store_sk#16) + : +- *(7) FileScan parquet default.store[s_store_sk#16,s_county#14,s_state#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(6) Project [s_state#20] + +- *(6) Filter (isnotnull(ranking#21) && (ranking#21 <= 5)) + +- Window [rank(_w2#22) windowspecdefinition(s_state#13, _w2#22 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#21], [s_state#13], [_w2#22 DESC NULLS LAST] + +- *(5) Sort [s_state#13 ASC NULLS FIRST, _w2#22 DESC NULLS LAST], false, 0 + +- *(5) HashAggregate(keys=[s_state#13], functions=[sum(UnscaledValue(ss_net_profit#10))]) + +- Exchange hashpartitioning(s_state#13, 5) + +- *(4) HashAggregate(keys=[s_state#13], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) + +- *(4) Project [ss_net_profit#10, s_state#13] + +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + :- *(4) Project [ss_sold_date_sk#17, ss_net_profit#10, s_state#13] + : +- *(4) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : :- *(4) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] + : : +- *(4) Filter (isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#17)) + : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [s_store_sk#16, s_state#13] + : +- *(2) Filter isnotnull(s_store_sk#16) + : +- *(2) FileScan parquet default.store[s_store_sk#16,s_state#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt index 5d01429ad..d2c001785 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt @@ -1,74 +1,65 @@ -TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] - WholeStageCodegen (12) - Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] +TakeOrderedAndProject [lochierarchy,rank_within_parent,s_county,s_state,total_sum] + WholeStageCodegen + Project [lochierarchy,rank_within_parent,s_county,s_state,total_sum] InputAdapter - Window [_w3,_w1,_w2] - WholeStageCodegen (11) + Window [_w1,_w2,_w3] + WholeStageCodegen Sort [_w1,_w2,_w3] InputAdapter Exchange [_w1,_w2] #1 - WholeStageCodegen (10) - HashAggregate [s_state,s_county,spark_grouping_id,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,lochierarchy,_w1,_w2,_w3,sum] + WholeStageCodegen + HashAggregate [s_county,s_state,spark_grouping_id,sum,sum(UnscaledValue(ss_net_profit))] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ss_net_profit)),total_sum] InputAdapter - Exchange [s_state,s_county,spark_grouping_id] #2 - WholeStageCodegen (9) - HashAggregate [s_state,s_county,spark_grouping_id,ss_net_profit] [sum,sum] - Expand [ss_net_profit,s_state,s_county] - Project [ss_net_profit,s_state,s_county] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] + Exchange [s_county,s_state,spark_grouping_id] #2 + WholeStageCodegen + HashAggregate [s_county,s_state,spark_grouping_id,ss_net_profit,sum,sum] [sum,sum] + Expand [s_county,s_state,ss_net_profit] + Project [s_county,s_state,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 - WholeStageCodegen (8) + WholeStageCodegen BroadcastHashJoin [s_state,s_state] - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_county,s_state] + Project [s_county,s_state,s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_county,s_state,s_store_sk] [s_county,s_state,s_store_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (7) + WholeStageCodegen Project [s_state] Filter [ranking] InputAdapter Window [_w2,s_state] - WholeStageCodegen (6) - Sort [s_state,_w2] - InputAdapter - Exchange [s_state] #6 - WholeStageCodegen (5) - HashAggregate [s_state,sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum] - InputAdapter - Exchange [s_state] #7 - WholeStageCodegen (4) - HashAggregate [s_state,ss_net_profit] [sum,sum] - Project [ss_net_profit,s_state] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_net_profit,s_state] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Filter [ss_store_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen (2) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_state] - InputAdapter - ReusedExchange [d_date_sk] #3 + WholeStageCodegen + Sort [_w2,s_state] + HashAggregate [s_state,sum,sum(UnscaledValue(ss_net_profit))] [_w2,s_state,sum,sum(UnscaledValue(ss_net_profit))] + InputAdapter + Exchange [s_state] #6 + WholeStageCodegen + HashAggregate [s_state,ss_net_profit,sum,sum] [sum,sum] + Project [s_state,ss_net_profit] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [s_state,ss_net_profit,ss_sold_date_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + Project [s_state,s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt index 9471377a1..f11328994 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt @@ -1,232 +1,40 @@ == Physical Plan == -* Sort (42) -+- Exchange (41) - +- * HashAggregate (40) - +- Exchange (39) - +- * HashAggregate (38) - +- * Project (37) - +- * BroadcastHashJoin Inner BuildRight (36) - :- * Project (30) - : +- * BroadcastHashJoin Inner BuildLeft (29) - : :- BroadcastExchange (5) - : : +- * Project (4) - : : +- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.item (1) - : +- Union (28) - : :- * Project (15) - : : +- * BroadcastHashJoin Inner BuildRight (14) - : : :- * Filter (8) - : : : +- * ColumnarToRow (7) - : : : +- Scan parquet default.web_sales (6) - : : +- BroadcastExchange (13) - : : +- * Project (12) - : : +- * Filter (11) - : : +- * ColumnarToRow (10) - : : +- Scan parquet default.date_dim (9) - : :- * Project (21) - : : +- * BroadcastHashJoin Inner BuildRight (20) - : : :- * Filter (18) - : : : +- * ColumnarToRow (17) - : : : +- Scan parquet default.catalog_sales (16) - : : +- ReusedExchange (19) - : +- * Project (27) - : +- * BroadcastHashJoin Inner BuildRight (26) - : :- * Filter (24) - : : +- * ColumnarToRow (23) - : : +- Scan parquet default.store_sales (22) - : +- ReusedExchange (25) - +- BroadcastExchange (35) - +- * Project (34) - +- * Filter (33) - +- * ColumnarToRow (32) - +- Scan parquet default.time_dim (31) - - -(1) Scan parquet default.item -Output [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 1] -Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] - -(3) Filter [codegen id : 1] -Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] -Condition : ((isnotnull(i_manager_id#4) AND (i_manager_id#4 = 1)) AND isnotnull(i_item_sk#1)) - -(4) Project [codegen id : 1] -Output [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] -Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] - -(5) BroadcastExchange -Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#5] - -(6) Scan parquet default.web_sales -Output [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)] -ReadSchema: struct - -(7) ColumnarToRow [codegen id : 3] -Input [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] - -(8) Filter [codegen id : 3] -Input [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] -Condition : ((isnotnull(ws_sold_date_sk#6) AND isnotnull(ws_item_sk#8)) AND isnotnull(ws_sold_time_sk#7)) - -(9) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] -ReadSchema: struct - -(10) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] - -(11) Filter [codegen id : 2] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Condition : ((((isnotnull(d_moy#12) AND isnotnull(d_year#11)) AND (d_moy#12 = 11)) AND (d_year#11 = 1999)) AND isnotnull(d_date_sk#10)) - -(12) Project [codegen id : 2] -Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] - -(13) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(14) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_sold_date_sk#6] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(15) Project [codegen id : 3] -Output [3]: [ws_ext_sales_price#9 AS ext_price#14, ws_item_sk#8 AS sold_item_sk#15, ws_sold_time_sk#7 AS time_sk#16] -Input [5]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9, d_date_sk#10] - -(16) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 5] -Input [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] - -(18) Filter [codegen id : 5] -Input [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] -Condition : ((isnotnull(cs_sold_date_sk#17) AND isnotnull(cs_item_sk#19)) AND isnotnull(cs_sold_time_sk#18)) - -(19) ReusedExchange [Reuses operator id: 13] -Output [1]: [d_date_sk#10] - -(20) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#17] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(21) Project [codegen id : 5] -Output [3]: [cs_ext_sales_price#20 AS ext_price#21, cs_item_sk#19 AS sold_item_sk#22, cs_sold_time_sk#18 AS time_sk#23] -Input [5]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20, d_date_sk#10] - -(22) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)] -ReadSchema: struct - -(23) ColumnarToRow [codegen id : 7] -Input [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] - -(24) Filter [codegen id : 7] -Input [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] -Condition : ((isnotnull(ss_sold_date_sk#24) AND isnotnull(ss_item_sk#26)) AND isnotnull(ss_sold_time_sk#25)) - -(25) ReusedExchange [Reuses operator id: 13] -Output [1]: [d_date_sk#10] - -(26) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_sold_date_sk#24] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(27) Project [codegen id : 7] -Output [3]: [ss_ext_sales_price#27 AS ext_price#28, ss_item_sk#26 AS sold_item_sk#29, ss_sold_time_sk#25 AS time_sk#30] -Input [5]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27, d_date_sk#10] - -(28) Union - -(29) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [i_item_sk#1] -Right keys [1]: [sold_item_sk#15] -Join condition: None - -(30) Project [codegen id : 9] -Output [4]: [i_brand_id#2, i_brand#3, ext_price#14, time_sk#16] -Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#14, sold_item_sk#15, time_sk#16] - -(31) Scan parquet default.time_dim -Output [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [Or(EqualTo(t_meal_time,breakfast),EqualTo(t_meal_time,dinner)), IsNotNull(t_time_sk)] -ReadSchema: struct - -(32) ColumnarToRow [codegen id : 8] -Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] - -(33) Filter [codegen id : 8] -Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] -Condition : (((t_meal_time#34 = breakfast) OR (t_meal_time#34 = dinner)) AND isnotnull(t_time_sk#31)) - -(34) Project [codegen id : 8] -Output [3]: [t_time_sk#31, t_hour#32, t_minute#33] -Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] - -(35) BroadcastExchange -Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] - -(36) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [time_sk#16] -Right keys [1]: [t_time_sk#31] -Join condition: None - -(37) Project [codegen id : 9] -Output [5]: [i_brand_id#2, i_brand#3, ext_price#14, t_hour#32, t_minute#33] -Input [7]: [i_brand_id#2, i_brand#3, ext_price#14, time_sk#16, t_time_sk#31, t_hour#32, t_minute#33] - -(38) HashAggregate [codegen id : 9] -Input [5]: [i_brand_id#2, i_brand#3, ext_price#14, t_hour#32, t_minute#33] -Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] -Functions [1]: [partial_sum(UnscaledValue(ext_price#14))] -Aggregate Attributes [1]: [sum#36] -Results [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] - -(39) Exchange -Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] -Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, 5), true, [id=#38] - -(40) HashAggregate [codegen id : 10] -Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] -Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] -Functions [1]: [sum(UnscaledValue(ext_price#14))] -Aggregate Attributes [1]: [sum(UnscaledValue(ext_price#14))#39] -Results [5]: [i_brand_id#2 AS brand_id#40, i_brand#3 AS brand#41, t_hour#32, t_minute#33, MakeDecimal(sum(UnscaledValue(ext_price#14))#39,17,2) AS ext_price#42] - -(41) Exchange -Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] -Arguments: rangepartitioning(ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST, 5), true, [id=#43] - -(42) Sort [codegen id : 11] -Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] -Arguments: [ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST], true, 0 - +*(11) Sort [ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST], true, 0 ++- Exchange rangepartitioning(ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST, 5) + +- *(10) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[sum(UnscaledValue(ext_price#7))]) + +- Exchange hashpartitioning(i_brand#3, i_brand_id#4, t_hour#5, t_minute#6, 5) + +- *(9) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[partial_sum(UnscaledValue(ext_price#7))]) + +- *(9) Project [i_brand_id#4, i_brand#3, ext_price#7, t_hour#5, t_minute#6] + +- *(9) BroadcastHashJoin [time_sk#8], [t_time_sk#9], Inner, BuildRight + :- *(9) Project [i_brand_id#4, i_brand#3, ext_price#7, time_sk#8] + : +- *(9) BroadcastHashJoin [i_item_sk#10], [sold_item_sk#11], Inner, BuildLeft + : :- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#10, i_brand_id#4, i_brand#3] + : : +- *(1) Filter ((isnotnull(i_manager_id#12) && (i_manager_id#12 = 1)) && isnotnull(i_item_sk#10)) + : : +- *(1) FileScan parquet default.item[i_item_sk#10,i_brand_id#4,i_brand#3,i_manager_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct + : +- Union + : :- *(3) Project [ws_ext_sales_price#13 AS ext_price#7, ws_item_sk#14 AS sold_item_sk#11, ws_sold_time_sk#15 AS time_sk#8] + : : +- *(3) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : :- *(3) Project [ws_sold_date_sk#16, ws_sold_time_sk#15, ws_item_sk#14, ws_ext_sales_price#13] + : : : +- *(3) Filter ((isnotnull(ws_sold_date_sk#16) && isnotnull(ws_item_sk#14)) && isnotnull(ws_sold_time_sk#15)) + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_sold_time_sk#15,ws_item_sk#14,ws_ext_sales_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#17] + : : +- *(2) Filter ((((isnotnull(d_moy#18) && isnotnull(d_year#19)) && (d_moy#18 = 11)) && (d_year#19 = 1999)) && isnotnull(d_date_sk#17)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#19,d_moy#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : :- *(5) Project [cs_ext_sales_price#20 AS ext_price#21, cs_item_sk#22 AS sold_item_sk#23, cs_sold_time_sk#24 AS time_sk#25] + : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#17], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#26, cs_sold_time_sk#24, cs_item_sk#22, cs_ext_sales_price#20] + : : : +- *(5) Filter ((isnotnull(cs_sold_date_sk#26) && isnotnull(cs_item_sk#22)) && isnotnull(cs_sold_time_sk#24)) + : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_sold_time_sk#24,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ss_ext_sales_price#27 AS ext_price#28, ss_item_sk#29 AS sold_item_sk#30, ss_sold_time_sk#31 AS time_sk#32] + : +- *(7) BroadcastHashJoin [ss_sold_date_sk#33], [d_date_sk#17], Inner, BuildRight + : :- *(7) Project [ss_sold_date_sk#33, ss_sold_time_sk#31, ss_item_sk#29, ss_ext_sales_price#27] + : : +- *(7) Filter ((isnotnull(ss_sold_date_sk#33) && isnotnull(ss_item_sk#29)) && isnotnull(ss_sold_time_sk#31)) + : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#33,ss_sold_time_sk#31,ss_item_sk#29,ss_ext_sales_price#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [t_time_sk#9, t_hour#5, t_minute#6] + +- *(8) Filter (((t_meal_time#34 = breakfast) || (t_meal_time#34 = dinner)) && isnotnull(t_time_sk#9)) + +- *(8) FileScan parquet default.time_dim[t_time_sk#9,t_hour#5,t_minute#6,t_meal_time#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [Or(EqualTo(t_meal_time,breakfast),EqualTo(t_meal_time,dinner)), IsNotNull(t_time_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt index 7fb18bbd6..c905e2e5d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt @@ -1,65 +1,56 @@ -WholeStageCodegen (11) - Sort [ext_price,brand_id] +WholeStageCodegen + Sort [brand_id,ext_price] InputAdapter - Exchange [ext_price,brand_id] #1 - WholeStageCodegen (10) - HashAggregate [i_brand,i_brand_id,t_hour,t_minute,sum] [sum(UnscaledValue(ext_price)),brand_id,brand,ext_price,sum] + Exchange [brand_id,ext_price] #1 + WholeStageCodegen + HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ext_price)),t_hour,t_minute] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ext_price))] InputAdapter Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 - WholeStageCodegen (9) - HashAggregate [i_brand,i_brand_id,t_hour,t_minute,ext_price] [sum,sum] - Project [i_brand_id,i_brand,ext_price,t_hour,t_minute] - BroadcastHashJoin [time_sk,t_time_sk] - Project [i_brand_id,i_brand,ext_price,time_sk] + WholeStageCodegen + HashAggregate [ext_price,i_brand,i_brand_id,sum,sum,t_hour,t_minute] [sum,sum] + Project [ext_price,i_brand,i_brand_id,t_hour,t_minute] + BroadcastHashJoin [t_time_sk,time_sk] + Project [ext_price,i_brand,i_brand_id,time_sk] BroadcastHashJoin [i_item_sk,sold_item_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Project [i_item_sk,i_brand_id,i_brand] - Filter [i_manager_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] + WholeStageCodegen + Project [i_brand,i_brand_id,i_item_sk] + Filter [i_item_sk,i_manager_id] + Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] InputAdapter Union - WholeStageCodegen (3) + WholeStageCodegen Project [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_item_sk,ws_sold_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_sold_time_sk,ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] + Filter [ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] - Filter [d_moy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - WholeStageCodegen (5) + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + WholeStageCodegen Project [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_item_sk,cs_sold_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_sold_time_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] + Filter [cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] InputAdapter - ReusedExchange [d_date_sk] #4 - WholeStageCodegen (7) + ReusedExchange [d_date_sk] [d_date_sk] #4 + WholeStageCodegen Project [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_item_sk,ss_sold_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] InputAdapter - ReusedExchange [d_date_sk] #4 + ReusedExchange [d_date_sk] [d_date_sk] #4 InputAdapter BroadcastExchange #5 - WholeStageCodegen (8) - Project [t_time_sk,t_hour,t_minute] + WholeStageCodegen + Project [t_hour,t_minute,t_time_sk] Filter [t_meal_time,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time] + Scan parquet default.time_dim [t_hour,t_meal_time,t_minute,t_time_sk] [t_hour,t_meal_time,t_minute,t_time_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt index 5ca28c3e3..40feaedbd 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt @@ -1,391 +1,68 @@ == Physical Plan == -TakeOrderedAndProject (70) -+- * HashAggregate (69) - +- Exchange (68) - +- * HashAggregate (67) - +- * Project (66) - +- * BroadcastHashJoin LeftOuter BuildRight (65) - :- * Project (60) - : +- * BroadcastHashJoin LeftOuter BuildRight (59) - : :- * Project (54) - : : +- * BroadcastHashJoin Inner BuildRight (53) - : : :- * Project (48) - : : : +- * BroadcastHashJoin Inner BuildRight (47) - : : : :- * Project (42) - : : : : +- * BroadcastHashJoin Inner BuildRight (41) - : : : : :- * Project (35) - : : : : : +- * BroadcastHashJoin Inner BuildRight (34) - : : : : : :- * Project (28) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (27) - : : : : : : :- * Project (21) - : : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) - : : : : : : : :- * Project (15) - : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : : : : : :- * Project (9) - : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : : : : : : :- * Filter (3) - : : : : : : : : : : +- * ColumnarToRow (2) - : : : : : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : : : : : +- BroadcastExchange (7) - : : : : : : : : : +- * Filter (6) - : : : : : : : : : +- * ColumnarToRow (5) - : : : : : : : : : +- Scan parquet default.inventory (4) - : : : : : : : : +- BroadcastExchange (13) - : : : : : : : : +- * Filter (12) - : : : : : : : : +- * ColumnarToRow (11) - : : : : : : : : +- Scan parquet default.warehouse (10) - : : : : : : : +- BroadcastExchange (19) - : : : : : : : +- * Filter (18) - : : : : : : : +- * ColumnarToRow (17) - : : : : : : : +- Scan parquet default.item (16) - : : : : : : +- BroadcastExchange (26) - : : : : : : +- * Project (25) - : : : : : : +- * Filter (24) - : : : : : : +- * ColumnarToRow (23) - : : : : : : +- Scan parquet default.customer_demographics (22) - : : : : : +- BroadcastExchange (33) - : : : : : +- * Project (32) - : : : : : +- * Filter (31) - : : : : : +- * ColumnarToRow (30) - : : : : : +- Scan parquet default.household_demographics (29) - : : : : +- BroadcastExchange (40) - : : : : +- * Project (39) - : : : : +- * Filter (38) - : : : : +- * ColumnarToRow (37) - : : : : +- Scan parquet default.date_dim (36) - : : : +- BroadcastExchange (46) - : : : +- * Filter (45) - : : : +- * ColumnarToRow (44) - : : : +- Scan parquet default.date_dim (43) - : : +- BroadcastExchange (52) - : : +- * Filter (51) - : : +- * ColumnarToRow (50) - : : +- Scan parquet default.date_dim (49) - : +- BroadcastExchange (58) - : +- * Filter (57) - : +- * ColumnarToRow (56) - : +- Scan parquet default.promotion (55) - +- BroadcastExchange (64) - +- * Filter (63) - +- * ColumnarToRow (62) - +- Scan parquet default.catalog_returns (61) - - -(1) Scan parquet default.catalog_sales -Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 11] -Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] - -(3) Filter [codegen id : 11] -Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] -Condition : (((((isnotnull(cs_quantity#8) AND isnotnull(cs_item_sk#5)) AND isnotnull(cs_bill_cdemo_sk#3)) AND isnotnull(cs_bill_hdemo_sk#4)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_ship_date_sk#2)) - -(4) Scan parquet default.inventory -Output [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/inventory] -PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] - -(6) Filter [codegen id : 1] -Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] -Condition : (((isnotnull(inv_quantity_on_hand#12) AND isnotnull(inv_item_sk#10)) AND isnotnull(inv_warehouse_sk#11)) AND isnotnull(inv_date_sk#9)) - -(7) BroadcastExchange -Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#13] - -(8) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#5] -Right keys [1]: [inv_item_sk#10] -Join condition: (inv_quantity_on_hand#12 < cs_quantity#8) - -(9) Project [codegen id : 11] -Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, inv_warehouse_sk#11] -Input [12]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] - -(10) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#14, w_warehouse_name#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/warehouse] -PushedFilters: [IsNotNull(w_warehouse_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] - -(12) Filter [codegen id : 2] -Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] -Condition : isnotnull(w_warehouse_sk#14) - -(13) BroadcastExchange -Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] - -(14) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [inv_warehouse_sk#11] -Right keys [1]: [w_warehouse_sk#14] -Join condition: None - -(15) Project [codegen id : 11] -Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15] -Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, inv_warehouse_sk#11, w_warehouse_sk#14, w_warehouse_name#15] - -(16) Scan parquet default.item -Output [2]: [i_item_sk#17, i_item_desc#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_sk#17, i_item_desc#18] - -(18) Filter [codegen id : 3] -Input [2]: [i_item_sk#17, i_item_desc#18] -Condition : isnotnull(i_item_sk#17) - -(19) BroadcastExchange -Input [2]: [i_item_sk#17, i_item_desc#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] - -(20) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#5] -Right keys [1]: [i_item_sk#17] -Join condition: None - -(21) Project [codegen id : 11] -Output [10]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] -Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_sk#17, i_item_desc#18] - -(22) Scan parquet default.customer_demographics -Output [2]: [cd_demo_sk#20, cd_marital_status#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(23) ColumnarToRow [codegen id : 4] -Input [2]: [cd_demo_sk#20, cd_marital_status#21] - -(24) Filter [codegen id : 4] -Input [2]: [cd_demo_sk#20, cd_marital_status#21] -Condition : ((isnotnull(cd_marital_status#21) AND (cd_marital_status#21 = D)) AND isnotnull(cd_demo_sk#20)) - -(25) Project [codegen id : 4] -Output [1]: [cd_demo_sk#20] -Input [2]: [cd_demo_sk#20, cd_marital_status#21] - -(26) BroadcastExchange -Input [1]: [cd_demo_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] - -(27) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_cdemo_sk#3] -Right keys [1]: [cd_demo_sk#20] -Join condition: None - -(28) Project [codegen id : 11] -Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] -Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, cd_demo_sk#20] - -(29) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#23, hd_buy_potential#24] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)] -ReadSchema: struct - -(30) ColumnarToRow [codegen id : 5] -Input [2]: [hd_demo_sk#23, hd_buy_potential#24] - -(31) Filter [codegen id : 5] -Input [2]: [hd_demo_sk#23, hd_buy_potential#24] -Condition : ((isnotnull(hd_buy_potential#24) AND (hd_buy_potential#24 = >10000)) AND isnotnull(hd_demo_sk#23)) - -(32) Project [codegen id : 5] -Output [1]: [hd_demo_sk#23] -Input [2]: [hd_demo_sk#23, hd_buy_potential#24] - -(33) BroadcastExchange -Input [1]: [hd_demo_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] - -(34) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_hdemo_sk#4] -Right keys [1]: [hd_demo_sk#23] -Join condition: None - -(35) Project [codegen id : 11] -Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] -Input [10]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, hd_demo_sk#23] - -(36) Scan parquet default.date_dim -Output [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] -ReadSchema: struct - -(37) ColumnarToRow [codegen id : 6] -Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] - -(38) Filter [codegen id : 6] -Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] -Condition : ((((isnotnull(d_year#29) AND (d_year#29 = 1999)) AND isnotnull(d_date_sk#26)) AND isnotnull(d_week_seq#28)) AND isnotnull(d_date#27)) - -(39) Project [codegen id : 6] -Output [3]: [d_date_sk#26, d_date#27, d_week_seq#28] -Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] - -(40) BroadcastExchange -Input [3]: [d_date_sk#26, d_date#27, d_week_seq#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] - -(41) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#26] -Join condition: None - -(42) Project [codegen id : 11] -Output [9]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28] -Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date_sk#26, d_date#27, d_week_seq#28] - -(43) Scan parquet default.date_dim -Output [2]: [d_date_sk#31, d_week_seq#32] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] -ReadSchema: struct - -(44) ColumnarToRow [codegen id : 7] -Input [2]: [d_date_sk#31, d_week_seq#32] - -(45) Filter [codegen id : 7] -Input [2]: [d_date_sk#31, d_week_seq#32] -Condition : (isnotnull(d_week_seq#32) AND isnotnull(d_date_sk#31)) - -(46) BroadcastExchange -Input [2]: [d_date_sk#31, d_week_seq#32] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#33] - -(47) BroadcastHashJoin [codegen id : 11] -Left keys [2]: [d_week_seq#28, inv_date_sk#9] -Right keys [2]: [d_week_seq#32, d_date_sk#31] -Join condition: None - -(48) Project [codegen id : 11] -Output [8]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28] -Input [11]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28, d_date_sk#31, d_week_seq#32] - -(49) Scan parquet default.date_dim -Output [2]: [d_date_sk#34, d_date#35] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] -ReadSchema: struct - -(50) ColumnarToRow [codegen id : 8] -Input [2]: [d_date_sk#34, d_date#35] - -(51) Filter [codegen id : 8] -Input [2]: [d_date_sk#34, d_date#35] -Condition : (isnotnull(d_date#35) AND isnotnull(d_date_sk#34)) - -(52) BroadcastExchange -Input [2]: [d_date_sk#34, d_date#35] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] - -(53) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_ship_date_sk#2] -Right keys [1]: [d_date_sk#34] -Join condition: (d_date#35 > d_date#27 + 5 days) - -(54) Project [codegen id : 11] -Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28] -Input [10]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28, d_date_sk#34, d_date#35] - -(55) Scan parquet default.promotion -Output [1]: [p_promo_sk#37] -Batched: true -Location [not included in comparison]/{warehouse_dir}/promotion] -PushedFilters: [IsNotNull(p_promo_sk)] -ReadSchema: struct - -(56) ColumnarToRow [codegen id : 9] -Input [1]: [p_promo_sk#37] - -(57) Filter [codegen id : 9] -Input [1]: [p_promo_sk#37] -Condition : isnotnull(p_promo_sk#37) - -(58) BroadcastExchange -Input [1]: [p_promo_sk#37] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#38] - -(59) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_promo_sk#6] -Right keys [1]: [p_promo_sk#37] -Join condition: None - -(60) Project [codegen id : 11] -Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28] -Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28, p_promo_sk#37] - -(61) Scan parquet default.catalog_returns -Output [2]: [cr_item_sk#39, cr_order_number#40] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] -ReadSchema: struct - -(62) ColumnarToRow [codegen id : 10] -Input [2]: [cr_item_sk#39, cr_order_number#40] - -(63) Filter [codegen id : 10] -Input [2]: [cr_item_sk#39, cr_order_number#40] -Condition : (isnotnull(cr_item_sk#39) AND isnotnull(cr_order_number#40)) - -(64) BroadcastExchange -Input [2]: [cr_item_sk#39, cr_order_number#40] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#41] - -(65) BroadcastHashJoin [codegen id : 11] -Left keys [2]: [cs_item_sk#5, cs_order_number#7] -Right keys [2]: [cr_item_sk#39, cr_order_number#40] -Join condition: None - -(66) Project [codegen id : 11] -Output [3]: [w_warehouse_name#15, i_item_desc#18, d_week_seq#28] -Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28, cr_item_sk#39, cr_order_number#40] - -(67) HashAggregate [codegen id : 11] -Input [3]: [w_warehouse_name#15, i_item_desc#18, d_week_seq#28] -Keys [3]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#42] -Results [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] - -(68) Exchange -Input [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] -Arguments: hashpartitioning(i_item_desc#18, w_warehouse_name#15, d_week_seq#28, 5), true, [id=#44] - -(69) HashAggregate [codegen id : 12] -Input [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] -Keys [3]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#45] -Results [6]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count(1)#45 AS no_promo#46, count(1)#45 AS promo#47, count(1)#45 AS total_cnt#48] - -(70) TakeOrderedAndProject -Input [6]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, no_promo#46, promo#47, total_cnt#48] -Arguments: 100, [total_cnt#48 DESC NULLS LAST, i_item_desc#18 ASC NULLS FIRST, w_warehouse_name#15 ASC NULLS FIRST, d_week_seq#28 ASC NULLS FIRST], [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, no_promo#46, promo#47, total_cnt#48] - +TakeOrderedAndProject(limit=100, orderBy=[total_cnt#1 DESC NULLS LAST,i_item_desc#2 ASC NULLS FIRST,w_warehouse_name#3 ASC NULLS FIRST,d_week_seq#4 ASC NULLS FIRST], output=[i_item_desc#2,w_warehouse_name#3,d_week_seq#4,no_promo#5,promo#6,total_cnt#1]) ++- *(12) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[count(1)]) + +- Exchange hashpartitioning(i_item_desc#2, w_warehouse_name#3, d_week_seq#4, 5) + +- *(11) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[partial_count(1)]) + +- *(11) Project [w_warehouse_name#3, i_item_desc#2, d_week_seq#4] + +- *(11) BroadcastHashJoin [cs_item_sk#7, cs_order_number#8], [cr_item_sk#9, cr_order_number#10], LeftOuter, BuildRight + :- *(11) Project [cs_item_sk#7, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_week_seq#4] + : +- *(11) BroadcastHashJoin [cs_promo_sk#11], [p_promo_sk#12], LeftOuter, BuildRight + : :- *(11) Project [cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_week_seq#4] + : : +- *(11) BroadcastHashJoin [cs_ship_date_sk#13], [d_date_sk#14], Inner, BuildRight, (d_date#15 > cast(cast(d_date#16 as timestamp) + interval 5 days as date)) + : : :- *(11) Project [cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_date#16, d_week_seq#4] + : : : +- *(11) BroadcastHashJoin [d_week_seq#4, inv_date_sk#17], [d_week_seq#18, d_date_sk#19], Inner, BuildRight + : : : :- *(11) Project [cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2, d_date#16, d_week_seq#4] + : : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] + : : : : : +- *(11) BroadcastHashJoin [cs_bill_hdemo_sk#22], [hd_demo_sk#23], Inner, BuildRight + : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] + : : : : : : +- *(11) BroadcastHashJoin [cs_bill_cdemo_sk#24], [cd_demo_sk#25], Inner, BuildRight + : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] + : : : : : : : +- *(11) BroadcastHashJoin [cs_item_sk#7], [i_item_sk#26], Inner, BuildRight + : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3] + : : : : : : : : +- *(11) BroadcastHashJoin [inv_warehouse_sk#27], [w_warehouse_sk#28], Inner, BuildRight + : : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, inv_warehouse_sk#27] + : : : : : : : : : +- *(11) BroadcastHashJoin [cs_item_sk#7], [inv_item_sk#29], Inner, BuildRight, (inv_quantity_on_hand#30 < cs_quantity#31) + : : : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, cs_quantity#31] + : : : : : : : : : : +- *(11) Filter (((((isnotnull(cs_quantity#31) && isnotnull(cs_item_sk#7)) && isnotnull(cs_bill_cdemo_sk#24)) && isnotnull(cs_bill_hdemo_sk#22)) && isnotnull(cs_sold_date_sk#20)) && isnotnull(cs_ship_date_sk#13)) + : : : : : : : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#20,cs_ship_date_sk#13,cs_bill_cdemo_sk#24,cs_bill_hdemo_sk#22,cs_item_sk#7,cs_promo_sk#11,cs_order_number#8,cs_quantity#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hd..., ReadSchema: struct + : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : : +- *(2) Project [w_warehouse_sk#28, w_warehouse_name#3] + : : : : : : : : +- *(2) Filter isnotnull(w_warehouse_sk#28) + : : : : : : : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#28,w_warehouse_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : : +- *(3) Project [i_item_sk#26, i_item_desc#2] + : : : : : : : +- *(3) Filter isnotnull(i_item_sk#26) + : : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#26,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : : +- *(4) Project [cd_demo_sk#25] + : : : : : : +- *(4) Filter ((isnotnull(cd_marital_status#32) && (cd_marital_status#32 = D)) && isnotnull(cd_demo_sk#25)) + : : : : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : : +- *(5) Project [hd_demo_sk#23] + : : : : : +- *(5) Filter ((isnotnull(hd_buy_potential#33) && (hd_buy_potential#33 = >10000)) && isnotnull(hd_demo_sk#23)) + : : : : : +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#23,hd_buy_potential#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(6) Project [d_date_sk#21, d_date#16, d_week_seq#4] + : : : : +- *(6) Filter (((isnotnull(d_year#34) && (d_year#34 = 1999)) && isnotnull(d_date_sk#21)) && isnotnull(d_week_seq#4)) + : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#21,d_date#16,d_week_seq#4,d_year#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + : : : +- *(7) Project [d_date_sk#19, d_week_seq#18] + : : : +- *(7) Filter (isnotnull(d_week_seq#18) && isnotnull(d_date_sk#19)) + : : : +- *(7) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(8) Project [d_date_sk#14, d_date#15] + : : +- *(8) Filter (isnotnull(d_date_sk#14) && isnotnull(d_date#15)) + : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_date)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(9) Project [p_promo_sk#12] + : +- *(9) Filter isnotnull(p_promo_sk#12) + : +- *(9) FileScan parquet default.promotion[p_promo_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) + +- *(10) Project [cr_item_sk#9, cr_order_number#10] + +- *(10) Filter (isnotnull(cr_item_sk#9) && isnotnull(cr_order_number#10)) + +- *(10) FileScan parquet default.catalog_returns[cr_item_sk#9,cr_order_number#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt index 25f03cbac..5a0b8c54a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt @@ -1,104 +1,90 @@ -TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo] - WholeStageCodegen (12) - HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] [count(1),no_promo,promo,total_cnt,count] +TakeOrderedAndProject [d_week_seq,i_item_desc,no_promo,promo,total_cnt,w_warehouse_name] + WholeStageCodegen + HashAggregate [count,count(1),d_week_seq,i_item_desc,w_warehouse_name] [count,count(1),no_promo,promo,total_cnt] InputAdapter - Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1 - WholeStageCodegen (11) - HashAggregate [i_item_desc,w_warehouse_name,d_week_seq] [count,count] - Project [w_warehouse_name,i_item_desc,d_week_seq] - BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] - Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + Exchange [d_week_seq,i_item_desc,w_warehouse_name] #1 + WholeStageCodegen + HashAggregate [count,count,d_week_seq,i_item_desc,w_warehouse_name] [count,count] + Project [d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_item_sk,cs_order_number,d_week_seq,i_item_desc,w_warehouse_name] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] - BroadcastHashJoin [cs_ship_date_sk,d_date_sk,d_date,d_date] - Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq] - BroadcastHashJoin [d_week_seq,inv_date_sk,d_week_seq,d_date_sk] - Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq] + Project [cs_item_sk,cs_order_number,cs_promo_sk,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [cs_ship_date_sk,d_date,d_date,d_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,w_warehouse_name] + BroadcastHashJoin [d_date_sk,d_week_seq,d_week_seq,inv_date_sk] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,inv_date_sk,w_warehouse_name] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc] + Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc] - BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc] + Project [cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] + BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,inv_warehouse_sk] - BroadcastHashJoin [cs_item_sk,inv_item_sk,inv_quantity_on_hand,cs_quantity] - Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,inv_warehouse_sk] + BroadcastHashJoin [cs_item_sk,cs_quantity,inv_item_sk,inv_quantity_on_hand] + Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + Filter [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + WholeStageCodegen + Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [w_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] + WholeStageCodegen + Project [w_warehouse_name,w_warehouse_sk] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_desc] + WholeStageCodegen + Project [i_item_desc,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_item_desc,i_item_sk] [i_item_desc,i_item_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen Project [cd_demo_sk] - Filter [cd_marital_status,cd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] + Filter [cd_demo_sk,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] InputAdapter BroadcastExchange #6 - WholeStageCodegen (5) + WholeStageCodegen Project [hd_demo_sk] Filter [hd_buy_potential,hd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] [hd_buy_potential,hd_demo_sk] InputAdapter BroadcastExchange #7 - WholeStageCodegen (6) - Project [d_date_sk,d_date,d_week_seq] - Filter [d_year,d_date_sk,d_week_seq,d_date] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year] + WholeStageCodegen + Project [d_date,d_date_sk,d_week_seq] + Filter [d_date_sk,d_week_seq,d_year] + Scan parquet default.date_dim [d_date,d_date_sk,d_week_seq,d_year] [d_date,d_date_sk,d_week_seq,d_year] InputAdapter BroadcastExchange #8 - WholeStageCodegen (7) - Filter [d_week_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_week_seq] + WholeStageCodegen + Project [d_date_sk,d_week_seq] + Filter [d_date_sk,d_week_seq] + Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] InputAdapter BroadcastExchange #9 - WholeStageCodegen (8) - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + WholeStageCodegen + Project [d_date,d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #10 - WholeStageCodegen (9) - Filter [p_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.promotion [p_promo_sk] + WholeStageCodegen + Project [p_promo_sk] + Filter [p_promo_sk] + Scan parquet default.promotion [p_promo_sk] [p_promo_sk] InputAdapter BroadcastExchange #11 - WholeStageCodegen (10) - Filter [cr_item_sk,cr_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] + WholeStageCodegen + Project [cr_item_sk,cr_order_number] + Filter [cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] [cr_item_sk,cr_order_number] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt index f4565c3ed..898ff3075 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt @@ -1,203 +1,34 @@ == Physical Plan == -* Sort (36) -+- Exchange (35) - +- * Project (34) - +- * BroadcastHashJoin Inner BuildRight (33) - :- * Filter (28) - : +- * HashAggregate (27) - : +- Exchange (26) - : +- * HashAggregate (25) - : +- * Project (24) - : +- * BroadcastHashJoin Inner BuildRight (23) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.store (11) - : +- BroadcastExchange (22) - : +- * Project (21) - : +- * Filter (20) - : +- * ColumnarToRow (19) - : +- Scan parquet default.household_demographics (18) - +- BroadcastExchange (32) - +- * Filter (31) - +- * ColumnarToRow (30) - +- Scan parquet default.customer (29) - - -(1) Scan parquet default.store_sales -Output [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] - -(3) Filter [codegen id : 4] -Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] -Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#7, d_dom#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#7, d_dom#8] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#7, d_dom#8] -Condition : ((((isnotnull(d_dom#8) AND (d_dom#8 >= 1)) AND (d_dom#8 <= 2)) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#6] -Input [3]: [d_date_sk#6, d_year#7, d_dom#8] - -(8) BroadcastExchange -Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#6] -Join condition: None - -(10) Project [codegen id : 4] -Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] -Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] - -(11) Scan parquet default.store -Output [2]: [s_store_sk#10, s_county#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [In(s_county, [Williamson County,Franklin Parish,Bronx County,Orange County]), IsNotNull(s_store_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [s_store_sk#10, s_county#11] - -(13) Filter [codegen id : 2] -Input [2]: [s_store_sk#10, s_county#11] -Condition : (s_county#11 IN (Williamson County,Franklin Parish,Bronx County,Orange County) AND isnotnull(s_store_sk#10)) - -(14) Project [codegen id : 2] -Output [1]: [s_store_sk#10] -Input [2]: [s_store_sk#10, s_county#11] - -(15) BroadcastExchange -Input [1]: [s_store_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#10] -Join condition: None - -(17) Project [codegen id : 4] -Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] -Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] - -(18) Scan parquet default.household_demographics -Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] - -(20) Filter [codegen id : 3] -Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) - -(21) Project [codegen id : 3] -Output [1]: [hd_demo_sk#13] -Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] - -(22) BroadcastExchange -Input [1]: [hd_demo_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] - -(23) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#13] -Join condition: None - -(24) Project [codegen id : 4] -Output [2]: [ss_customer_sk#2, ss_ticket_number#5] -Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] - -(25) HashAggregate [codegen id : 4] -Input [2]: [ss_customer_sk#2, ss_ticket_number#5] -Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#18] -Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] - -(26) Exchange -Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] - -(27) HashAggregate [codegen id : 6] -Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#21] -Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] - -(28) Filter [codegen id : 6] -Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] -Condition : ((cnt#22 >= 1) AND (cnt#22 <= 5)) - -(29) Scan parquet default.customer -Output [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(30) ColumnarToRow [codegen id : 5] -Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] - -(31) Filter [codegen id : 5] -Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] -Condition : isnotnull(c_customer_sk#23) - -(32) BroadcastExchange -Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] - -(33) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#23] -Join condition: None - -(34) Project [codegen id : 6] -Output [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] - -(35) Exchange -Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29] - -(36) Sort [codegen id : 7] -Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: [cnt#22 DESC NULLS LAST], true, 0 - +*(7) Sort [cnt#1 DESC NULLS LAST], true, 0 ++- Exchange rangepartitioning(cnt#1 DESC NULLS LAST, 5) + +- *(6) Project [c_last_name#2, c_first_name#3, c_salutation#4, c_preferred_cust_flag#5, ss_ticket_number#6, cnt#1] + +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight + :- *(6) Filter ((cnt#1 >= 1) && (cnt#1 <= 5)) + : +- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[count(1)]) + : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#7, 5) + : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[partial_count(1)]) + : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#6] + : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight + : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_ticket_number#6] + : : +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight + : : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#6] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#6] + : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#14] + : : : +- *(1) Filter ((((isnotnull(d_dom#15) && (d_dom#15 >= 1)) && (d_dom#15 <= 2)) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#12] + : : +- *(2) Filter (s_county#17 IN (Williamson County,Franklin Parish,Bronx County,Orange County) && isnotnull(s_store_sk#12)) + : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_county, [Williamson County,Franklin Parish,Bronx County,Orange County]), IsNotNull(s_store_..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [hd_demo_sk#10] + : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.0)) && isnotnull(hd_demo_sk#10)) + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [c_customer_sk#8, c_salutation#4, c_first_name#3, c_last_name#2, c_preferred_cust_flag#5] + +- *(5) Filter isnotnull(c_customer_sk#8) + +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#4,c_first_name#3,c_last_name#2,c_preferred_cust_flag#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] - -(3) Filter [codegen id : 3] -Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(4) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] - -(6) Filter [codegen id : 1] -Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] -Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_sold_date_sk#5)) - -(7) BroadcastExchange -Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#8] - -(8) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#6] -Join condition: None - -(9) Project [codegen id : 3] -Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7] -Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] - -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#9, d_year#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#9, d_year#10] - -(12) Filter [codegen id : 2] -Input [2]: [d_date_sk#9, d_year#10] -Condition : (((isnotnull(d_year#10) AND (d_year#10 = 2001)) AND d_year#10 IN (2001,2002)) AND isnotnull(d_date_sk#9)) - -(13) BroadcastExchange -Input [2]: [d_date_sk#9, d_year#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] - -(14) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#5] -Right keys [1]: [d_date_sk#9] -Join condition: None - -(15) Project [codegen id : 3] -Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] -Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7, d_date_sk#9, d_year#10] - -(16) HashAggregate [codegen id : 3] -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] -Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] -Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#7))] -Aggregate Attributes [1]: [sum#12] -Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] - -(17) Exchange -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#14] - -(18) HashAggregate [codegen id : 16] -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] -Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] -Functions [1]: [sum(UnscaledValue(ss_net_paid#7))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#7))#15] -Results [2]: [c_customer_id#2 AS customer_id#16, MakeDecimal(sum(UnscaledValue(ss_net_paid#7))#15,17,2) AS year_total#17] - -(19) Filter [codegen id : 16] -Input [2]: [customer_id#16, year_total#17] -Condition : (isnotnull(year_total#17) AND (year_total#17 > 0.00)) - -(20) Scan parquet default.customer -Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(21) ColumnarToRow [codegen id : 6] -Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] - -(22) Filter [codegen id : 6] -Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(23) ReusedExchange [Reuses operator id: 7] -Output [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] - -(24) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#6] -Join condition: None - -(25) Project [codegen id : 6] -Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7] -Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] - -(26) Scan parquet default.date_dim -Output [2]: [d_date_sk#9, d_year#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(27) ColumnarToRow [codegen id : 5] -Input [2]: [d_date_sk#9, d_year#10] - -(28) Filter [codegen id : 5] -Input [2]: [d_date_sk#9, d_year#10] -Condition : (((isnotnull(d_year#10) AND (d_year#10 = 2002)) AND d_year#10 IN (2001,2002)) AND isnotnull(d_date_sk#9)) - -(29) BroadcastExchange -Input [2]: [d_date_sk#9, d_year#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] - -(30) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#5] -Right keys [1]: [d_date_sk#9] -Join condition: None - -(31) Project [codegen id : 6] -Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] -Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7, d_date_sk#9, d_year#10] - -(32) HashAggregate [codegen id : 6] -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] -Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] -Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#7))] -Aggregate Attributes [1]: [sum#19] -Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] - -(33) Exchange -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#21] - -(34) HashAggregate [codegen id : 7] -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] -Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] -Functions [1]: [sum(UnscaledValue(ss_net_paid#7))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#7))#22] -Results [4]: [c_customer_id#2 AS customer_id#23, c_first_name#3 AS customer_first_name#24, c_last_name#4 AS customer_last_name#25, MakeDecimal(sum(UnscaledValue(ss_net_paid#7))#22,17,2) AS year_total#26] - -(35) BroadcastExchange -Input [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] - -(36) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#16] -Right keys [1]: [customer_id#23] -Join condition: None - -(37) Scan parquet default.customer -Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(38) ColumnarToRow [codegen id : 10] -Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] - -(39) Filter [codegen id : 10] -Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(40) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(41) ColumnarToRow [codegen id : 8] -Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] - -(42) Filter [codegen id : 8] -Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] -Condition : (isnotnull(ws_bill_customer_sk#29) AND isnotnull(ws_sold_date_sk#28)) - -(43) BroadcastExchange -Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#31] - -(44) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ws_bill_customer_sk#29] -Join condition: None - -(45) Project [codegen id : 10] -Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30] -Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] - -(46) ReusedExchange [Reuses operator id: 13] -Output [2]: [d_date_sk#9, d_year#10] - -(47) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ws_sold_date_sk#28] -Right keys [1]: [d_date_sk#9] -Join condition: None - -(48) Project [codegen id : 10] -Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] -Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30, d_date_sk#9, d_year#10] - -(49) HashAggregate [codegen id : 10] -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] -Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] -Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#30))] -Aggregate Attributes [1]: [sum#32] -Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] - -(50) Exchange -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#34] - -(51) HashAggregate [codegen id : 11] -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] -Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] -Functions [1]: [sum(UnscaledValue(ws_net_paid#30))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#30))#35] -Results [2]: [c_customer_id#2 AS customer_id#36, MakeDecimal(sum(UnscaledValue(ws_net_paid#30))#35,17,2) AS year_total#37] - -(52) Filter [codegen id : 11] -Input [2]: [customer_id#36, year_total#37] -Condition : (isnotnull(year_total#37) AND (year_total#37 > 0.00)) - -(53) Project [codegen id : 11] -Output [2]: [customer_id#36 AS customer_id#38, year_total#37 AS year_total#39] -Input [2]: [customer_id#36, year_total#37] - -(54) BroadcastExchange -Input [2]: [customer_id#38, year_total#39] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] - -(55) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#16] -Right keys [1]: [customer_id#38] -Join condition: None - -(56) Project [codegen id : 16] -Output [7]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#39] -Input [8]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, customer_id#38, year_total#39] - -(57) Scan parquet default.customer -Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] -ReadSchema: struct - -(58) ColumnarToRow [codegen id : 14] -Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] - -(59) Filter [codegen id : 14] -Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] -Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) - -(60) ReusedExchange [Reuses operator id: 43] -Output [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] - -(61) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ws_bill_customer_sk#29] -Join condition: None - -(62) Project [codegen id : 14] -Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30] -Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] - -(63) ReusedExchange [Reuses operator id: 29] -Output [2]: [d_date_sk#9, d_year#10] - -(64) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_sold_date_sk#28] -Right keys [1]: [d_date_sk#9] -Join condition: None - -(65) Project [codegen id : 14] -Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] -Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30, d_date_sk#9, d_year#10] - -(66) HashAggregate [codegen id : 14] -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] -Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] -Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#30))] -Aggregate Attributes [1]: [sum#41] -Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] - -(67) Exchange -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#43] - -(68) HashAggregate [codegen id : 15] -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] -Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] -Functions [1]: [sum(UnscaledValue(ws_net_paid#30))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#30))#44] -Results [2]: [c_customer_id#2 AS customer_id#45, MakeDecimal(sum(UnscaledValue(ws_net_paid#30))#44,17,2) AS year_total#46] - -(69) BroadcastExchange -Input [2]: [customer_id#45, year_total#46] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#47] - -(70) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#16] -Right keys [1]: [customer_id#45] -Join condition: (CASE WHEN (year_total#39 > 0.00) THEN CheckOverflow((promote_precision(year_total#46) / promote_precision(year_total#39)), DecimalType(37,20), true) ELSE null END > CASE WHEN (year_total#17 > 0.00) THEN CheckOverflow((promote_precision(year_total#26) / promote_precision(year_total#17)), DecimalType(37,20), true) ELSE null END) - -(71) Project [codegen id : 16] -Output [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] -Input [9]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#39, customer_id#45, year_total#46] - -(72) TakeOrderedAndProject -Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] -Arguments: 100, [customer_id#23 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST], [customer_id#23, customer_first_name#24, customer_last_name#25] - +TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer_id#1 ASC NULLS FIRST,customer_id#1 ASC NULLS FIRST], output=[customer_id#1,customer_first_name#2,customer_last_name#3]) ++- *(17) Project [customer_id#1, customer_first_name#2, customer_last_name#3] + +- *(17) BroadcastHashJoin [customer_id#4], [customer_id#5], Inner, BuildRight, (CASE WHEN (year_total#6 > 0.00) THEN CheckOverflow((promote_precision(year_total#7) / promote_precision(year_total#6)), DecimalType(37,20)) ELSE null END > CASE WHEN (year_total#8 > 0.00) THEN CheckOverflow((promote_precision(year_total#9) / promote_precision(year_total#8)), DecimalType(37,20)) ELSE null END) + :- *(17) Project [customer_id#4, year_total#8, customer_id#1, customer_first_name#2, customer_last_name#3, year_total#9, year_total#6] + : +- *(17) BroadcastHashJoin [customer_id#4], [customer_id#10], Inner, BuildRight + : :- *(17) BroadcastHashJoin [customer_id#4], [customer_id#1], Inner, BuildRight + : : :- Union + : : : :- *(4) Filter (isnotnull(year_total#8) && (year_total#8 > 0.00)) + : : : : +- *(4) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) + : : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) + : : : : +- *(3) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) + : : : : +- *(3) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] + : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : : : :- *(3) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_sold_date_sk#16, ss_net_paid#15] + : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#18], [ss_customer_sk#19], Inner, BuildRight + : : : : : :- *(3) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : : +- *(1) Project [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15] + : : : : : +- *(1) Filter (isnotnull(ss_customer_sk#19) && isnotnull(ss_sold_date_sk#16)) + : : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#19,ss_net_paid#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(2) Project [d_date_sk#17, d_year#14] + : : : : +- *(2) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2001)) && isnotnull(d_date_sk#17)) + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- LocalTableScan , [customer_id#20, year_total#21] + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : : +- Union + : : :- *(8) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) + : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) + : : : +- *(7) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) + : : : +- *(7) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] + : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : : :- *(7) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_sold_date_sk#16, ss_net_paid#15] + : : : : +- *(7) BroadcastHashJoin [c_customer_sk#18], [ss_customer_sk#19], Inner, BuildRight + : : : : :- *(7) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : : : : +- *(7) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : +- ReusedExchange [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(6) Project [d_date_sk#17, d_year#14] + : : : +- *(6) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2002)) && isnotnull(d_date_sk#17)) + : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- LocalTableScan , [customer_id#20, customer_first_name#22, customer_last_name#23, year_total#21] + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- Union + : :- LocalTableScan , [customer_id#10, year_total#6] + : +- *(12) Filter (isnotnull(year_total#21) && (year_total#21 > 0.00)) + : +- *(12) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) + : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) + : +- *(11) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) + : +- *(11) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] + : +- *(11) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight + : :- *(11) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_sold_date_sk#25, ws_net_paid#24] + : : +- *(11) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight + : : :- *(11) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : : +- *(11) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : : +- *(11) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(9) Project [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24] + : : +- *(9) Filter (isnotnull(ws_bill_customer_sk#26) && isnotnull(ws_sold_date_sk#25)) + : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_bill_customer_sk#26,ws_net_paid#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- Union + :- LocalTableScan , [customer_id#5, year_total#7] + +- *(16) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) + +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) + +- *(15) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) + +- *(15) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] + +- *(15) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight + :- *(15) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_sold_date_sk#25, ws_net_paid#24] + : +- *(15) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight + : :- *(15) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] + : : +- *(15) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) + : : +- *(15) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : +- ReusedExchange [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt index add2d43fc..5167a92d5 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt @@ -1,107 +1,108 @@ -TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] - WholeStageCodegen (16) - Project [customer_id,customer_first_name,customer_last_name] +TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name] + WholeStageCodegen + Project [customer_first_name,customer_id,customer_last_name] BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] - Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total] + Project [customer_first_name,customer_id,customer_id,customer_last_name,year_total,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] BroadcastHashJoin [customer_id,customer_id] - Filter [year_total] - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] - InputAdapter - Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 - WholeStageCodegen (3) - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ss_sold_date_sk,ss_net_paid] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Filter [c_customer_sk,c_customer_id] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen (1) - Filter [ss_customer_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_net_paid] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (2) - Filter [d_year,d_date_sk] - ColumnarToRow + InputAdapter + Union + WholeStageCodegen + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] [customer_id,sum,sum(UnscaledValue(ss_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + WholeStageCodegen + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid,sum,sum] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + BroadcastExchange #2 + WholeStageCodegen + Project [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] [ss_customer_sk,ss_net_paid,ss_sold_date_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + LocalTableScan [customer_id,year_total] [customer_id,year_total] InputAdapter BroadcastExchange #4 - WholeStageCodegen (7) - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,customer_first_name,customer_last_name,year_total,sum] - InputAdapter - Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 - WholeStageCodegen (6) - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ss_sold_date_sk,ss_net_paid] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Filter [c_customer_sk,c_customer_id] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] - InputAdapter - ReusedExchange [ss_sold_date_sk,ss_customer_sk,ss_net_paid] #2 - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (5) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Union + WholeStageCodegen + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] [customer_first_name,customer_id,customer_last_name,sum,sum(UnscaledValue(ss_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + WholeStageCodegen + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid,sum,sum] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + LocalTableScan [customer_first_name,customer_id,customer_last_name,year_total] [customer_first_name,customer_id,customer_last_name,year_total] InputAdapter BroadcastExchange #7 - WholeStageCodegen (11) - Project [customer_id,year_total] + Union + LocalTableScan [customer_id,year_total] [customer_id,year_total] + WholeStageCodegen Filter [year_total] - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 - WholeStageCodegen (10) - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ws_sold_date_sk,ws_net_paid] + WholeStageCodegen + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Filter [c_customer_sk,c_customer_id] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] InputAdapter BroadcastExchange #9 - WholeStageCodegen (8) - Filter [ws_bill_customer_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] + WholeStageCodegen + Project [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk,d_year] #3 + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 InputAdapter BroadcastExchange #10 - WholeStageCodegen (15) - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] - InputAdapter - Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 - WholeStageCodegen (14) - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ws_sold_date_sk,ws_net_paid] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Filter [c_customer_sk,c_customer_id] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] - InputAdapter - ReusedExchange [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] #9 - InputAdapter - ReusedExchange [d_date_sk,d_year] #6 + Union + LocalTableScan [customer_id,year_total] [customer_id,year_total] + WholeStageCodegen + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_id,c_customer_sk] + Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt index 292a44930..5f9929f2d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt @@ -1,647 +1,117 @@ == Physical Plan == -TakeOrderedAndProject (117) -+- * Project (116) - +- * BroadcastHashJoin Inner BuildRight (115) - :- * HashAggregate (63) - : +- Exchange (62) - : +- * HashAggregate (61) - : +- * HashAggregate (60) - : +- Exchange (59) - : +- * HashAggregate (58) - : +- Union (57) - : :- * HashAggregate (41) - : : +- Exchange (40) - : : +- * HashAggregate (39) - : : +- Union (38) - : : :- * Project (22) - : : : +- * BroadcastHashJoin LeftOuter BuildRight (21) - : : : :- * Project (16) - : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : :- * Project (10) - : : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.catalog_sales (1) - : : : : : +- BroadcastExchange (8) - : : : : : +- * Project (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.item (4) - : : : : +- BroadcastExchange (14) - : : : : +- * Filter (13) - : : : : +- * ColumnarToRow (12) - : : : : +- Scan parquet default.date_dim (11) - : : : +- BroadcastExchange (20) - : : : +- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.catalog_returns (17) - : : +- * Project (37) - : : +- * BroadcastHashJoin LeftOuter BuildRight (36) - : : :- * Project (31) - : : : +- * BroadcastHashJoin Inner BuildRight (30) - : : : :- * Project (28) - : : : : +- * BroadcastHashJoin Inner BuildRight (27) - : : : : :- * Filter (25) - : : : : : +- * ColumnarToRow (24) - : : : : : +- Scan parquet default.store_sales (23) - : : : : +- ReusedExchange (26) - : : : +- ReusedExchange (29) - : : +- BroadcastExchange (35) - : : +- * Filter (34) - : : +- * ColumnarToRow (33) - : : +- Scan parquet default.store_returns (32) - : +- * Project (56) - : +- * BroadcastHashJoin LeftOuter BuildRight (55) - : :- * Project (50) - : : +- * BroadcastHashJoin Inner BuildRight (49) - : : :- * Project (47) - : : : +- * BroadcastHashJoin Inner BuildRight (46) - : : : :- * Filter (44) - : : : : +- * ColumnarToRow (43) - : : : : +- Scan parquet default.web_sales (42) - : : : +- ReusedExchange (45) - : : +- ReusedExchange (48) - : +- BroadcastExchange (54) - : +- * Filter (53) - : +- * ColumnarToRow (52) - : +- Scan parquet default.web_returns (51) - +- BroadcastExchange (114) - +- * HashAggregate (113) - +- Exchange (112) - +- * HashAggregate (111) - +- * HashAggregate (110) - +- Exchange (109) - +- * HashAggregate (108) - +- Union (107) - :- * HashAggregate (94) - : +- Exchange (93) - : +- * HashAggregate (92) - : +- Union (91) - : :- * Project (78) - : : +- * BroadcastHashJoin LeftOuter BuildRight (77) - : : :- * Project (75) - : : : +- * BroadcastHashJoin Inner BuildRight (74) - : : : :- * Project (69) - : : : : +- * BroadcastHashJoin Inner BuildRight (68) - : : : : :- * Filter (66) - : : : : : +- * ColumnarToRow (65) - : : : : : +- Scan parquet default.catalog_sales (64) - : : : : +- ReusedExchange (67) - : : : +- BroadcastExchange (73) - : : : +- * Filter (72) - : : : +- * ColumnarToRow (71) - : : : +- Scan parquet default.date_dim (70) - : : +- ReusedExchange (76) - : +- * Project (90) - : +- * BroadcastHashJoin LeftOuter BuildRight (89) - : :- * Project (87) - : : +- * BroadcastHashJoin Inner BuildRight (86) - : : :- * Project (84) - : : : +- * BroadcastHashJoin Inner BuildRight (83) - : : : :- * Filter (81) - : : : : +- * ColumnarToRow (80) - : : : : +- Scan parquet default.store_sales (79) - : : : +- ReusedExchange (82) - : : +- ReusedExchange (85) - : +- ReusedExchange (88) - +- * Project (106) - +- * BroadcastHashJoin LeftOuter BuildRight (105) - :- * Project (103) - : +- * BroadcastHashJoin Inner BuildRight (102) - : :- * Project (100) - : : +- * BroadcastHashJoin Inner BuildRight (99) - : : :- * Filter (97) - : : : +- * ColumnarToRow (96) - : : : +- Scan parquet default.web_sales (95) - : : +- ReusedExchange (98) - : +- ReusedExchange (101) - +- ReusedExchange (104) - - -(1) Scan parquet default.catalog_sales -Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] - -(3) Filter [codegen id : 4] -Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] -Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) - -(4) Scan parquet default.item -Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books), IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_manufact_id)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] - -(6) Filter [codegen id : 1] -Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] -Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books)) AND isnotnull(i_item_sk#6)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) - -(7) Project [codegen id : 1] -Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] - -(8) BroadcastExchange -Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#6] -Join condition: None - -(10) Project [codegen id : 4] -Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] - -(11) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_year#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#13, d_year#14] - -(13) Filter [codegen id : 2] -Input [2]: [d_date_sk#13, d_year#14] -Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) - -(14) BroadcastExchange -Input [2]: [d_date_sk#13, d_year#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] - -(15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#13] -Join condition: None - -(16) Project [codegen id : 4] -Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] - -(17) Scan parquet default.catalog_returns -Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] - -(19) Filter [codegen id : 3] -Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] -Condition : (isnotnull(cr_order_number#17) AND isnotnull(cr_item_sk#16)) - -(20) BroadcastExchange -Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#20] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [2]: [cs_order_number#3, cs_item_sk#2] -Right keys [2]: [cr_order_number#17, cr_item_sk#16] -Join condition: None - -(22) Project [codegen id : 4] -Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] -Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] - -(23) Scan parquet default.store_sales -Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 8] -Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] - -(25) Filter [codegen id : 8] -Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] -Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23)) - -(26) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] - -(27) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_item_sk#24] -Right keys [1]: [i_item_sk#6] -Join condition: None - -(28) Project [codegen id : 8] -Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] - -(29) ReusedExchange [Reuses operator id: 14] -Output [2]: [d_date_sk#13, d_year#14] - -(30) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_date_sk#23] -Right keys [1]: [d_date_sk#13] -Join condition: None - -(31) Project [codegen id : 8] -Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] - -(32) Scan parquet default.store_returns -Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] -ReadSchema: struct - -(33) ColumnarToRow [codegen id : 7] -Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] - -(34) Filter [codegen id : 7] -Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] -Condition : (isnotnull(sr_ticket_number#29) AND isnotnull(sr_item_sk#28)) - -(35) BroadcastExchange -Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#32] - -(36) BroadcastHashJoin [codegen id : 8] -Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)] -Right keys [2]: [sr_ticket_number#29, sr_item_sk#28] -Join condition: None - -(37) Project [codegen id : 8] -Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#33, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#34] -Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] - -(38) Union - -(39) HashAggregate [codegen id : 9] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] - -(40) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#35] - -(41) HashAggregate [codegen id : 10] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] - -(42) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(43) ColumnarToRow [codegen id : 14] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] - -(44) Filter [codegen id : 14] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] -Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) - -(45) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] - -(46) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_item_sk#37] -Right keys [1]: [i_item_sk#6] -Join condition: None - -(47) Project [codegen id : 14] -Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] - -(48) ReusedExchange [Reuses operator id: 14] -Output [2]: [d_date_sk#13, d_year#14] - -(49) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_sold_date_sk#36] -Right keys [1]: [d_date_sk#13] -Join condition: None - -(50) Project [codegen id : 14] -Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] -Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] - -(51) Scan parquet default.web_returns -Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] -ReadSchema: struct - -(52) ColumnarToRow [codegen id : 13] -Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] - -(53) Filter [codegen id : 13] -Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] -Condition : (isnotnull(wr_order_number#42) AND isnotnull(wr_item_sk#41)) - -(54) BroadcastExchange -Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#45] - -(55) BroadcastHashJoin [codegen id : 14] -Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] -Right keys [2]: [wr_order_number#42, wr_item_sk#41] -Join condition: None - -(56) Project [codegen id : 14] -Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#46, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#47] -Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] - -(57) Union - -(58) HashAggregate [codegen id : 15] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] - -(59) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#48] - -(60) HashAggregate [codegen id : 16] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] - -(61) HashAggregate [codegen id : 16] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] -Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum#49, sum#50] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] - -(62) Exchange -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] -Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#53] - -(63) HashAggregate [codegen id : 34] -Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] -Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#54, sum(UnscaledValue(sales_amt#22))#55] -Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#54 AS sales_cnt#56, MakeDecimal(sum(UnscaledValue(sales_amt#22))#55,18,2) AS sales_amt#57] - -(64) Scan parquet default.catalog_sales -Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(65) ColumnarToRow [codegen id : 20] -Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] - -(66) Filter [codegen id : 20] -Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] -Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) - -(67) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] - -(68) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#58] -Join condition: None - -(69) Project [codegen id : 20] -Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] - -(70) Scan parquet default.date_dim -Output [2]: [d_date_sk#63, d_year#64] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] -ReadSchema: struct - -(71) ColumnarToRow [codegen id : 18] -Input [2]: [d_date_sk#63, d_year#64] - -(72) Filter [codegen id : 18] -Input [2]: [d_date_sk#63, d_year#64] -Condition : ((isnotnull(d_year#64) AND (d_year#64 = 2001)) AND isnotnull(d_date_sk#63)) - -(73) BroadcastExchange -Input [2]: [d_date_sk#63, d_year#64] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65] - -(74) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [cs_sold_date_sk#1] -Right keys [1]: [d_date_sk#63] -Join condition: None - -(75) Project [codegen id : 20] -Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] -Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] - -(76) ReusedExchange [Reuses operator id: 20] -Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] - -(77) BroadcastHashJoin [codegen id : 20] -Left keys [2]: [cs_order_number#3, cs_item_sk#2] -Right keys [2]: [cr_order_number#17, cr_item_sk#16] -Join condition: None - -(78) Project [codegen id : 20] -Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] -Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] - -(79) Scan parquet default.store_sales -Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(80) ColumnarToRow [codegen id : 24] -Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] - -(81) Filter [codegen id : 24] -Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] -Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23)) - -(82) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] - -(83) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ss_item_sk#24] -Right keys [1]: [i_item_sk#58] -Join condition: None - -(84) Project [codegen id : 24] -Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] - -(85) ReusedExchange [Reuses operator id: 73] -Output [2]: [d_date_sk#63, d_year#64] - -(86) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ss_sold_date_sk#23] -Right keys [1]: [d_date_sk#63] -Join condition: None - -(87) Project [codegen id : 24] -Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] -Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] - -(88) ReusedExchange [Reuses operator id: 35] -Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] - -(89) BroadcastHashJoin [codegen id : 24] -Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)] -Right keys [2]: [sr_ticket_number#29, sr_item_sk#28] -Join condition: None - -(90) Project [codegen id : 24] -Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67] -Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] - -(91) Union - -(92) HashAggregate [codegen id : 25] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] - -(93) Exchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#68] - -(94) HashAggregate [codegen id : 26] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] - -(95) Scan parquet default.web_sales -Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(96) ColumnarToRow [codegen id : 30] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] - -(97) Filter [codegen id : 30] -Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] -Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) - -(98) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] - -(99) BroadcastHashJoin [codegen id : 30] -Left keys [1]: [ws_item_sk#37] -Right keys [1]: [i_item_sk#58] -Join condition: None - -(100) Project [codegen id : 30] -Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] - -(101) ReusedExchange [Reuses operator id: 73] -Output [2]: [d_date_sk#63, d_year#64] - -(102) BroadcastHashJoin [codegen id : 30] -Left keys [1]: [ws_sold_date_sk#36] -Right keys [1]: [d_date_sk#63] -Join condition: None - -(103) Project [codegen id : 30] -Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] -Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] - -(104) ReusedExchange [Reuses operator id: 54] -Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] - -(105) BroadcastHashJoin [codegen id : 30] -Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] -Right keys [2]: [wr_order_number#42, wr_item_sk#41] -Join condition: None - -(106) Project [codegen id : 30] -Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#69, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#70] -Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] - -(107) Union - -(108) HashAggregate [codegen id : 31] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] - -(109) Exchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#71] - -(110) HashAggregate [codegen id : 32] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Functions: [] -Aggregate Attributes: [] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] - -(111) HashAggregate [codegen id : 32] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] -Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum#72, sum#73] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] - -(112) Exchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] -Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, 5), true, [id=#76] - -(113) HashAggregate [codegen id : 33] -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] -Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] -Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#77, sum(UnscaledValue(sales_amt#22))#78] -Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum(cast(sales_cnt#21 as bigint))#77 AS sales_cnt#79, MakeDecimal(sum(UnscaledValue(sales_amt#22))#78,18,2) AS sales_amt#80] - -(114) BroadcastExchange -Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] -Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#81] - -(115) BroadcastHashJoin [codegen id : 34] -Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] -Right keys [4]: [i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] -Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#56 as decimal(17,2))) / promote_precision(cast(sales_cnt#79 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) - -(116) Project [codegen id : 34] -Output [10]: [d_year#64 AS prev_year#82, d_year#14 AS year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#79 AS prev_yr_cnt#84, sales_cnt#56 AS curr_yr_cnt#85, (sales_cnt#56 - sales_cnt#79) AS sales_cnt_diff#86, CheckOverflow((promote_precision(cast(sales_amt#57 as decimal(19,2))) - promote_precision(cast(sales_amt#80 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#87] -Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#56, sales_amt#57, d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] - -(117) TakeOrderedAndProject -Input [10]: [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] -Arguments: 100, [sales_cnt_diff#86 ASC NULLS FIRST], [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] - +TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], output=[prev_year#2,year#3,i_brand_id#4,i_class_id#5,i_category_id#6,i_manufact_id#7,prev_yr_cnt#8,curr_yr_cnt#9,sales_cnt_diff#1,sales_amt_diff#10]) ++- *(34) Project [d_year#11 AS prev_year#2, d_year#12 AS year#3, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#13 AS prev_yr_cnt#8, sales_cnt#14 AS curr_yr_cnt#9, (sales_cnt#14 - sales_cnt#13) AS sales_cnt_diff#1, CheckOverflow((promote_precision(cast(sales_amt#15 as decimal(19,2))) - promote_precision(cast(sales_amt#16 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#10] + +- *(34) BroadcastHashJoin [i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], [i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], Inner, BuildRight, (CheckOverflow((promote_precision(cast(sales_cnt#14 as decimal(17,2))) / promote_precision(cast(sales_cnt#13 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000) + :- *(34) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) + : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, 5) + : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) + : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 5) + : +- *(15) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : +- Union + : :- *(10) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 5) + : : +- *(9) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) + : : +- Union + : : :- *(4) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] + : : : +- *(4) BroadcastHashJoin [cs_order_number#27, cs_item_sk#28], [cr_order_number#29, cr_item_sk#30], LeftOuter, BuildRight + : : : :- *(4) Project [cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] + : : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight + : : : : :- *(4) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] + : : : : : +- *(4) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#33], Inner, BuildRight + : : : : : :- *(4) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] + : : : : : : +- *(4) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) + : : : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + : : : +- *(3) Project [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26] + : : : +- *(3) Filter (isnotnull(cr_item_sk#30) && isnotnull(cr_order_number#29)) + : : : +- *(3) FileScan parquet default.catalog_returns[cr_item_sk#30,cr_order_number#29,cr_return_quantity#24,cr_return_amount#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct + : : +- *(8) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] + : : +- *(8) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight + : : :- *(8) Project [ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] + : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#45], [d_date_sk#32], Inner, BuildRight + : : : :- *(8) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] + : : : : +- *(8) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#33], Inner, BuildRight + : : : : :- *(8) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] + : : : : : +- *(8) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) + : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- *(14) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ws_quantity#46 - coalesce(wr_return_quantity#47, 0)) AS sales_cnt#48, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#50, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#51] + : +- *(14) BroadcastHashJoin [cast(ws_order_number#52 as bigint), cast(ws_item_sk#53 as bigint)], [wr_order_number#54, wr_item_sk#55], LeftOuter, BuildRight + : :- *(14) Project [ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] + : : +- *(14) BroadcastHashJoin [ws_sold_date_sk#56], [d_date_sk#32], Inner, BuildRight + : : :- *(14) Project [ws_sold_date_sk#56, ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] + : : : +- *(14) BroadcastHashJoin [ws_item_sk#53], [i_item_sk#33], Inner, BuildRight + : : : :- *(14) Project [ws_sold_date_sk#56, ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49] + : : : : +- *(14) Filter (isnotnull(ws_item_sk#53) && isnotnull(ws_sold_date_sk#56)) + : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#56,ws_item_sk#53,ws_order_number#52,ws_quantity#46,ws_ext_sales_price#49] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true])) + +- *(33) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) + +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, 5) + +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) + +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 5) + +- *(31) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + +- Union + :- *(26) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + : +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 5) + : +- *(25) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) + : +- Union + : :- *(20) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] + : : +- *(20) BroadcastHashJoin [cs_order_number#27, cs_item_sk#28], [cr_order_number#29, cr_item_sk#30], LeftOuter, BuildRight + : : :- *(20) Project [cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, d_year#11] + : : : +- *(20) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#57], Inner, BuildRight + : : : :- *(20) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20] + : : : : +- *(20) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#58], Inner, BuildRight + : : : : :- *(20) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] + : : : : : +- *(20) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) + : : : : : +- *(20) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26], BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) + : +- *(24) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] + : +- *(24) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight + : :- *(24) Project [ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, d_year#11] + : : +- *(24) BroadcastHashJoin [ss_sold_date_sk#45], [d_date_sk#57], Inner, BuildRight + : : :- *(24) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20] + : : : +- *(24) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#58], Inner, BuildRight + : : : :- *(24) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] + : : : : +- *(24) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) + : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4] - -(3) Filter [codegen id : 3] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4] -Condition : ((isnull(ss_store_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_sold_date_sk#1)) - -(4) Scan parquet default.item -Output [2]: [i_item_sk#5, i_category#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [i_item_sk#5, i_category#6] - -(6) Filter [codegen id : 1] -Input [2]: [i_item_sk#5, i_category#6] -Condition : isnotnull(i_item_sk#5) - -(7) BroadcastExchange -Input [2]: [i_item_sk#5, i_category#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] - -(8) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(9) Project [codegen id : 3] -Output [4]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#4, i_category#6] -Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, i_item_sk#5, i_category#6] - -(10) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] - -(12) Filter [codegen id : 2] -Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] -Condition : isnotnull(d_date_sk#8) - -(13) BroadcastExchange -Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] - -(14) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(15) Project [codegen id : 3] -Output [6]: [store AS channel#12, ss_store_sk#3 AS col_name#13, d_year#9, d_qoy#10, i_category#6, ss_ext_sales_price#4 AS ext_sales_price#14] -Input [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#4, i_category#6, d_date_sk#8, d_year#9, d_qoy#10] - -(16) Scan parquet default.web_sales -Output [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 6] -Input [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] - -(18) Filter [codegen id : 6] -Input [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] -Condition : ((isnull(ws_ship_customer_sk#17) AND isnotnull(ws_item_sk#16)) AND isnotnull(ws_sold_date_sk#15)) - -(19) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#5, i_category#6] - -(20) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_item_sk#16] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(21) Project [codegen id : 6] -Output [4]: [ws_sold_date_sk#15, ws_ship_customer_sk#17, ws_ext_sales_price#18, i_category#6] -Input [6]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18, i_item_sk#5, i_category#6] - -(22) ReusedExchange [Reuses operator id: 13] -Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] - -(23) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#15] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(24) Project [codegen id : 6] -Output [6]: [web AS channel#19, ws_ship_customer_sk#17 AS col_name#20, d_year#9, d_qoy#10, i_category#6, ws_ext_sales_price#18 AS ext_sales_price#21] -Input [7]: [ws_sold_date_sk#15, ws_ship_customer_sk#17, ws_ext_sales_price#18, i_category#6, d_date_sk#8, d_year#9, d_qoy#10] - -(25) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(26) ColumnarToRow [codegen id : 9] -Input [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25] - -(27) Filter [codegen id : 9] -Input [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25] -Condition : ((isnull(cs_ship_addr_sk#23) AND isnotnull(cs_item_sk#24)) AND isnotnull(cs_sold_date_sk#22)) - -(28) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#5, i_category#6] - -(29) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [cs_item_sk#24] -Right keys [1]: [i_item_sk#5] -Join condition: None - -(30) Project [codegen id : 9] -Output [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_ext_sales_price#25, i_category#6] -Input [6]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25, i_item_sk#5, i_category#6] - -(31) ReusedExchange [Reuses operator id: 13] -Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] - -(32) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [cs_sold_date_sk#22] -Right keys [1]: [d_date_sk#8] -Join condition: None - -(33) Project [codegen id : 9] -Output [6]: [catalog AS channel#26, cs_ship_addr_sk#23 AS col_name#27, d_year#9, d_qoy#10, i_category#6, cs_ext_sales_price#25 AS ext_sales_price#28] -Input [7]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_ext_sales_price#25, i_category#6, d_date_sk#8, d_year#9, d_qoy#10] - -(34) Union - -(35) HashAggregate [codegen id : 10] -Input [6]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, ext_sales_price#14] -Keys [5]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6] -Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#14))] -Aggregate Attributes [2]: [count#29, sum#30] -Results [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#31, sum#32] - -(36) Exchange -Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#31, sum#32] -Arguments: hashpartitioning(channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, 5), true, [id=#33] - -(37) HashAggregate [codegen id : 11] -Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#31, sum#32] -Keys [5]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6] -Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#14))] -Aggregate Attributes [2]: [count(1)#34, sum(UnscaledValue(ext_sales_price#14))#35] -Results [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count(1)#34 AS sales_cnt#36, MakeDecimal(sum(UnscaledValue(ext_sales_price#14))#35,17,2) AS sales_amt#37] - -(38) TakeOrderedAndProject -Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, sales_cnt#36, sales_amt#37] -Arguments: 100, [channel#12 ASC NULLS FIRST, col_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, i_category#6 ASC NULLS FIRST], [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, sales_cnt#36, sales_amt#37] - +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,col_name#2 ASC NULLS FIRST,d_year#3 ASC NULLS FIRST,d_qoy#4 ASC NULLS FIRST,i_category#5 ASC NULLS FIRST], output=[channel#1,col_name#2,d_year#3,d_qoy#4,i_category#5,sales_cnt#6,sales_amt#7]) ++- *(11) HashAggregate(keys=[channel#1, col_name#2, d_year#3, d_qoy#4, i_category#5], functions=[count(1), sum(UnscaledValue(ext_sales_price#8))]) + +- Exchange hashpartitioning(channel#1, col_name#2, d_year#3, d_qoy#4, i_category#5, 5) + +- *(10) HashAggregate(keys=[channel#1, col_name#2, d_year#3, d_qoy#4, i_category#5], functions=[partial_count(1), partial_sum(UnscaledValue(ext_sales_price#8))]) + +- Union + :- *(3) Project [store AS channel#1, ss_store_sk#9 AS col_name#2, d_year#3, d_qoy#4, i_category#5, ss_ext_sales_price#10 AS ext_sales_price#8] + : +- *(3) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight + : :- *(3) Project [ss_sold_date_sk#11, ss_store_sk#9, ss_ext_sales_price#10, i_category#5] + : : +- *(3) BroadcastHashJoin [ss_item_sk#13], [i_item_sk#14], Inner, BuildRight + : : :- *(3) Project [ss_sold_date_sk#11, ss_item_sk#13, ss_store_sk#9, ss_ext_sales_price#10] + : : : +- *(3) Filter ((isnull(ss_store_sk#9) && isnotnull(ss_item_sk#13)) && isnotnull(ss_sold_date_sk#11)) + : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#13,ss_store_sk#9,ss_ext_sales_price#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#14, i_category#5] + : : +- *(1) Filter isnotnull(i_item_sk#14) + : : +- *(1) FileScan parquet default.item[i_item_sk#14,i_category#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#12, d_year#3, d_qoy#4] + : +- *(2) Filter isnotnull(d_date_sk#12) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#12,d_year#3,d_qoy#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + :- *(6) Project [web AS channel#15, ws_ship_customer_sk#16 AS col_name#17, d_year#3, d_qoy#4, i_category#5, ws_ext_sales_price#18 AS ext_sales_price#19] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#20], [d_date_sk#12], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#20, ws_ship_customer_sk#16, ws_ext_sales_price#18, i_category#5] + : : +- *(6) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#14], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#20, ws_item_sk#21, ws_ship_customer_sk#16, ws_ext_sales_price#18] + : : : +- *(6) Filter ((isnull(ws_ship_customer_sk#16) && isnotnull(ws_item_sk#21)) && isnotnull(ws_sold_date_sk#20)) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#20,ws_item_sk#21,ws_ship_customer_sk#16,ws_ext_sales_price#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(9) Project [catalog AS channel#22, cs_ship_addr_sk#23 AS col_name#24, d_year#3, d_qoy#4, i_category#5, cs_ext_sales_price#25 AS ext_sales_price#26] + +- *(9) BroadcastHashJoin [cs_sold_date_sk#27], [d_date_sk#12], Inner, BuildRight + :- *(9) Project [cs_sold_date_sk#27, cs_ship_addr_sk#23, cs_ext_sales_price#25, i_category#5] + : +- *(9) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#14], Inner, BuildRight + : :- *(9) Project [cs_sold_date_sk#27, cs_ship_addr_sk#23, cs_item_sk#28, cs_ext_sales_price#25] + : : +- *(9) Filter ((isnull(cs_ship_addr_sk#23) && isnotnull(cs_item_sk#28)) && isnotnull(cs_sold_date_sk#27)) + : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#27,cs_ship_addr_sk#23,cs_item_sk#28,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt index f01916baa..5b7872ad7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt @@ -1,58 +1,53 @@ -TakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_amt] - WholeStageCodegen (11) - HashAggregate [channel,col_name,d_year,d_qoy,i_category,count,sum] [count(1),sum(UnscaledValue(ext_sales_price)),sales_cnt,sales_amt,count,sum] +TakeOrderedAndProject [channel,col_name,d_qoy,d_year,i_category,sales_amt,sales_cnt] + WholeStageCodegen + HashAggregate [channel,col_name,count,count(1),d_qoy,d_year,i_category,sum,sum(UnscaledValue(ext_sales_price))] [count,count(1),sales_amt,sales_cnt,sum,sum(UnscaledValue(ext_sales_price))] InputAdapter - Exchange [channel,col_name,d_year,d_qoy,i_category] #1 - WholeStageCodegen (10) - HashAggregate [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] [count,sum,count,sum] + Exchange [channel,col_name,d_qoy,d_year,i_category] #1 + WholeStageCodegen + HashAggregate [channel,col_name,count,count,d_qoy,d_year,ext_sales_price,i_category,sum,sum] [count,count,sum,sum] InputAdapter Union - WholeStageCodegen (3) - Project [ss_store_sk,d_year,d_qoy,i_category,ss_ext_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,i_category] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_store_sk,ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price] + WholeStageCodegen + Project [d_qoy,d_year,i_category,ss_ext_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_category,ss_ext_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_category] + WholeStageCodegen + Project [i_category,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_category,i_item_sk] [i_category,i_item_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] - WholeStageCodegen (6) - Project [ws_ship_customer_sk,d_year,d_qoy,i_category,ws_ext_sales_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_ship_customer_sk,ws_ext_sales_price,i_category] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Filter [ws_ship_customer_sk,ws_item_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price] + WholeStageCodegen + Project [d_date_sk,d_qoy,d_year] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] + WholeStageCodegen + Project [d_qoy,d_year,i_category,ws_ext_sales_price,ws_ship_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [i_category,ws_ext_sales_price,ws_ship_customer_sk,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_category] #2 + ReusedExchange [i_category,i_item_sk] [i_category,i_item_sk] #2 InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] #3 - WholeStageCodegen (9) - Project [cs_ship_addr_sk,d_year,d_qoy,i_category,cs_ext_sales_price] + ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 + WholeStageCodegen + Project [cs_ext_sales_price,cs_ship_addr_sk,d_qoy,d_year,i_category] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ship_addr_sk,cs_ext_sales_price,i_category] + Project [cs_ext_sales_price,cs_ship_addr_sk,cs_sold_date_sk,i_category] BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_ship_addr_sk,cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price] + Project [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_category] #2 + ReusedExchange [i_category,i_item_sk] [i_category,i_item_sk] #2 InputAdapter - ReusedExchange [d_date_sk,d_year,d_qoy] #3 + ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt index c232055ba..0b9dd6982 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt @@ -1,520 +1,100 @@ == Physical Plan == -TakeOrderedAndProject (91) -+- * HashAggregate (90) - +- Exchange (89) - +- * HashAggregate (88) - +- * Expand (87) - +- Union (86) - :- * Project (34) - : +- * BroadcastHashJoin LeftOuter BuildRight (33) - : :- * HashAggregate (19) - : : +- Exchange (18) - : : +- * HashAggregate (17) - : : +- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.store (11) - : +- BroadcastExchange (32) - : +- * HashAggregate (31) - : +- Exchange (30) - : +- * HashAggregate (29) - : +- * Project (28) - : +- * BroadcastHashJoin Inner BuildRight (27) - : :- * Project (25) - : : +- * BroadcastHashJoin Inner BuildRight (24) - : : :- * Filter (22) - : : : +- * ColumnarToRow (21) - : : : +- Scan parquet default.store_returns (20) - : : +- ReusedExchange (23) - : +- ReusedExchange (26) - :- * Project (55) - : +- BroadcastNestedLoopJoin Inner BuildLeft (54) - : :- BroadcastExchange (44) - : : +- * HashAggregate (43) - : : +- Exchange (42) - : : +- * HashAggregate (41) - : : +- * Project (40) - : : +- * BroadcastHashJoin Inner BuildRight (39) - : : :- * Filter (37) - : : : +- * ColumnarToRow (36) - : : : +- Scan parquet default.catalog_sales (35) - : : +- ReusedExchange (38) - : +- * HashAggregate (53) - : +- Exchange (52) - : +- * HashAggregate (51) - : +- * Project (50) - : +- * BroadcastHashJoin Inner BuildRight (49) - : :- * Filter (47) - : : +- * ColumnarToRow (46) - : : +- Scan parquet default.catalog_returns (45) - : +- ReusedExchange (48) - +- * Project (85) - +- * BroadcastHashJoin LeftOuter BuildRight (84) - :- * HashAggregate (70) - : +- Exchange (69) - : +- * HashAggregate (68) - : +- * Project (67) - : +- * BroadcastHashJoin Inner BuildRight (66) - : :- * Project (61) - : : +- * BroadcastHashJoin Inner BuildRight (60) - : : :- * Filter (58) - : : : +- * ColumnarToRow (57) - : : : +- Scan parquet default.web_sales (56) - : : +- ReusedExchange (59) - : +- BroadcastExchange (65) - : +- * Filter (64) - : +- * ColumnarToRow (63) - : +- Scan parquet default.web_page (62) - +- BroadcastExchange (83) - +- * HashAggregate (82) - +- Exchange (81) - +- * HashAggregate (80) - +- * Project (79) - +- * BroadcastHashJoin Inner BuildRight (78) - :- * Project (76) - : +- * BroadcastHashJoin Inner BuildRight (75) - : :- * Filter (73) - : : +- * ColumnarToRow (72) - : : +- Scan parquet default.web_returns (71) - : +- ReusedExchange (74) - +- ReusedExchange (77) - - -(1) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] - -(3) Filter [codegen id : 3] -Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] -Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) - -(4) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_date#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_date#6] - -(6) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_date#6] -Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 11172)) AND (d_date#6 <= 11202)) AND isnotnull(d_date_sk#5)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_date#6] - -(8) BroadcastExchange -Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(10) Project [codegen id : 3] -Output [3]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] -Input [5]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, d_date_sk#5] - -(11) Scan parquet default.store -Output [1]: [s_store_sk#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [1]: [s_store_sk#8] - -(13) Filter [codegen id : 2] -Input [1]: [s_store_sk#8] -Condition : isnotnull(s_store_sk#8) - -(14) BroadcastExchange -Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] - -(15) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#8] -Join condition: None - -(16) Project [codegen id : 3] -Output [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] -Input [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] - -(17) HashAggregate [codegen id : 3] -Input [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] -Keys [1]: [s_store_sk#8] -Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#3)), partial_sum(UnscaledValue(ss_net_profit#4))] -Aggregate Attributes [2]: [sum#10, sum#11] -Results [3]: [s_store_sk#8, sum#12, sum#13] - -(18) Exchange -Input [3]: [s_store_sk#8, sum#12, sum#13] -Arguments: hashpartitioning(s_store_sk#8, 5), true, [id=#14] - -(19) HashAggregate [codegen id : 8] -Input [3]: [s_store_sk#8, sum#12, sum#13] -Keys [1]: [s_store_sk#8] -Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#3)), sum(UnscaledValue(ss_net_profit#4))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#3))#15, sum(UnscaledValue(ss_net_profit#4))#16] -Results [3]: [s_store_sk#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS sales#17, MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#16,17,2) AS profit#18] - -(20) Scan parquet default.store_returns -Output [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] -ReadSchema: struct - -(21) ColumnarToRow [codegen id : 6] -Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] - -(22) Filter [codegen id : 6] -Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] -Condition : (isnotnull(sr_returned_date_sk#19) AND isnotnull(sr_store_sk#20)) - -(23) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(24) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sr_returned_date_sk#19] -Right keys [1]: [cast(d_date_sk#5 as bigint)] -Join condition: None - -(25) Project [codegen id : 6] -Output [3]: [sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] -Input [5]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22, d_date_sk#5] - -(26) ReusedExchange [Reuses operator id: 14] -Output [1]: [s_store_sk#23] - -(27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sr_store_sk#20] -Right keys [1]: [cast(s_store_sk#23 as bigint)] -Join condition: None - -(28) Project [codegen id : 6] -Output [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] -Input [4]: [sr_store_sk#20, sr_return_amt#21, sr_net_loss#22, s_store_sk#23] - -(29) HashAggregate [codegen id : 6] -Input [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] -Keys [1]: [s_store_sk#23] -Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#21)), partial_sum(UnscaledValue(sr_net_loss#22))] -Aggregate Attributes [2]: [sum#24, sum#25] -Results [3]: [s_store_sk#23, sum#26, sum#27] - -(30) Exchange -Input [3]: [s_store_sk#23, sum#26, sum#27] -Arguments: hashpartitioning(s_store_sk#23, 5), true, [id=#28] - -(31) HashAggregate [codegen id : 7] -Input [3]: [s_store_sk#23, sum#26, sum#27] -Keys [1]: [s_store_sk#23] -Functions [2]: [sum(UnscaledValue(sr_return_amt#21)), sum(UnscaledValue(sr_net_loss#22))] -Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#21))#29, sum(UnscaledValue(sr_net_loss#22))#30] -Results [3]: [s_store_sk#23, MakeDecimal(sum(UnscaledValue(sr_return_amt#21))#29,17,2) AS returns#31, MakeDecimal(sum(UnscaledValue(sr_net_loss#22))#30,17,2) AS profit_loss#32] - -(32) BroadcastExchange -Input [3]: [s_store_sk#23, returns#31, profit_loss#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] - -(33) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [s_store_sk#8] -Right keys [1]: [s_store_sk#23] -Join condition: None - -(34) Project [codegen id : 8] -Output [5]: [sales#17, coalesce(returns#31, 0.00) AS returns#34, CheckOverflow((promote_precision(cast(profit#18 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#35, store channel AS channel#36, s_store_sk#8 AS id#37] -Input [6]: [s_store_sk#8, sales#17, profit#18, s_store_sk#23, returns#31, profit_loss#32] - -(35) Scan parquet default.catalog_sales -Output [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(36) ColumnarToRow [codegen id : 10] -Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] - -(37) Filter [codegen id : 10] -Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] -Condition : isnotnull(cs_sold_date_sk#38) - -(38) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(39) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#38] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(40) Project [codegen id : 10] -Output [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] -Input [5]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41, d_date_sk#5] - -(41) HashAggregate [codegen id : 10] -Input [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] -Keys [1]: [cs_call_center_sk#39] -Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#40)), partial_sum(UnscaledValue(cs_net_profit#41))] -Aggregate Attributes [2]: [sum#42, sum#43] -Results [3]: [cs_call_center_sk#39, sum#44, sum#45] - -(42) Exchange -Input [3]: [cs_call_center_sk#39, sum#44, sum#45] -Arguments: hashpartitioning(cs_call_center_sk#39, 5), true, [id=#46] - -(43) HashAggregate [codegen id : 11] -Input [3]: [cs_call_center_sk#39, sum#44, sum#45] -Keys [1]: [cs_call_center_sk#39] -Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#40)), sum(UnscaledValue(cs_net_profit#41))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#40))#47, sum(UnscaledValue(cs_net_profit#41))#48] -Results [3]: [cs_call_center_sk#39, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#40))#47,17,2) AS sales#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#41))#48,17,2) AS profit#50] - -(44) BroadcastExchange -Input [3]: [cs_call_center_sk#39, sales#49, profit#50] -Arguments: IdentityBroadcastMode, [id=#51] - -(45) Scan parquet default.catalog_returns -Output [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_returned_date_sk)] -ReadSchema: struct - -(46) ColumnarToRow [codegen id : 13] -Input [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] - -(47) Filter [codegen id : 13] -Input [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] -Condition : isnotnull(cr_returned_date_sk#52) - -(48) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(49) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cr_returned_date_sk#52] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(50) Project [codegen id : 13] -Output [2]: [cr_return_amount#53, cr_net_loss#54] -Input [4]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54, d_date_sk#5] - -(51) HashAggregate [codegen id : 13] -Input [2]: [cr_return_amount#53, cr_net_loss#54] -Keys: [] -Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#53)), partial_sum(UnscaledValue(cr_net_loss#54))] -Aggregate Attributes [2]: [sum#55, sum#56] -Results [2]: [sum#57, sum#58] - -(52) Exchange -Input [2]: [sum#57, sum#58] -Arguments: SinglePartition, true, [id=#59] - -(53) HashAggregate [codegen id : 14] -Input [2]: [sum#57, sum#58] -Keys: [] -Functions [2]: [sum(UnscaledValue(cr_return_amount#53)), sum(UnscaledValue(cr_net_loss#54))] -Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#53))#60, sum(UnscaledValue(cr_net_loss#54))#61] -Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#53))#60,17,2) AS returns#62, MakeDecimal(sum(UnscaledValue(cr_net_loss#54))#61,17,2) AS profit_loss#63] - -(54) BroadcastNestedLoopJoin -Join condition: None - -(55) Project [codegen id : 15] -Output [5]: [sales#49, returns#62, CheckOverflow((promote_precision(cast(profit#50 as decimal(18,2))) - promote_precision(cast(profit_loss#63 as decimal(18,2)))), DecimalType(18,2), true) AS profit#64, catalog channel AS channel#65, cs_call_center_sk#39 AS id#66] -Input [5]: [cs_call_center_sk#39, sales#49, profit#50, returns#62, profit_loss#63] - -(56) Scan parquet default.web_sales -Output [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)] -ReadSchema: struct - -(57) ColumnarToRow [codegen id : 18] -Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] - -(58) Filter [codegen id : 18] -Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] -Condition : (isnotnull(ws_sold_date_sk#67) AND isnotnull(ws_web_page_sk#68)) - -(59) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(60) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_sold_date_sk#67] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(61) Project [codegen id : 18] -Output [3]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] -Input [5]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, d_date_sk#5] - -(62) Scan parquet default.web_page -Output [1]: [wp_web_page_sk#71] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_page] -PushedFilters: [IsNotNull(wp_web_page_sk)] -ReadSchema: struct - -(63) ColumnarToRow [codegen id : 17] -Input [1]: [wp_web_page_sk#71] - -(64) Filter [codegen id : 17] -Input [1]: [wp_web_page_sk#71] -Condition : isnotnull(wp_web_page_sk#71) - -(65) BroadcastExchange -Input [1]: [wp_web_page_sk#71] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#72] - -(66) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_web_page_sk#68] -Right keys [1]: [wp_web_page_sk#71] -Join condition: None - -(67) Project [codegen id : 18] -Output [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] -Input [4]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] - -(68) HashAggregate [codegen id : 18] -Input [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] -Keys [1]: [wp_web_page_sk#71] -Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#69)), partial_sum(UnscaledValue(ws_net_profit#70))] -Aggregate Attributes [2]: [sum#73, sum#74] -Results [3]: [wp_web_page_sk#71, sum#75, sum#76] - -(69) Exchange -Input [3]: [wp_web_page_sk#71, sum#75, sum#76] -Arguments: hashpartitioning(wp_web_page_sk#71, 5), true, [id=#77] - -(70) HashAggregate [codegen id : 23] -Input [3]: [wp_web_page_sk#71, sum#75, sum#76] -Keys [1]: [wp_web_page_sk#71] -Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#69)), sum(UnscaledValue(ws_net_profit#70))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#69))#78, sum(UnscaledValue(ws_net_profit#70))#79] -Results [3]: [wp_web_page_sk#71, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#69))#78,17,2) AS sales#80, MakeDecimal(sum(UnscaledValue(ws_net_profit#70))#79,17,2) AS profit#81] - -(71) Scan parquet default.web_returns -Output [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)] -ReadSchema: struct - -(72) ColumnarToRow [codegen id : 21] -Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] - -(73) Filter [codegen id : 21] -Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] -Condition : (isnotnull(wr_returned_date_sk#82) AND isnotnull(wr_web_page_sk#83)) - -(74) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(75) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [wr_returned_date_sk#82] -Right keys [1]: [cast(d_date_sk#5 as bigint)] -Join condition: None - -(76) Project [codegen id : 21] -Output [3]: [wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] -Input [5]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85, d_date_sk#5] - -(77) ReusedExchange [Reuses operator id: 65] -Output [1]: [wp_web_page_sk#86] - -(78) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [wr_web_page_sk#83] -Right keys [1]: [cast(wp_web_page_sk#86 as bigint)] -Join condition: None - -(79) Project [codegen id : 21] -Output [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] -Input [4]: [wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] - -(80) HashAggregate [codegen id : 21] -Input [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] -Keys [1]: [wp_web_page_sk#86] -Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#84)), partial_sum(UnscaledValue(wr_net_loss#85))] -Aggregate Attributes [2]: [sum#87, sum#88] -Results [3]: [wp_web_page_sk#86, sum#89, sum#90] - -(81) Exchange -Input [3]: [wp_web_page_sk#86, sum#89, sum#90] -Arguments: hashpartitioning(wp_web_page_sk#86, 5), true, [id=#91] - -(82) HashAggregate [codegen id : 22] -Input [3]: [wp_web_page_sk#86, sum#89, sum#90] -Keys [1]: [wp_web_page_sk#86] -Functions [2]: [sum(UnscaledValue(wr_return_amt#84)), sum(UnscaledValue(wr_net_loss#85))] -Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#84))#92, sum(UnscaledValue(wr_net_loss#85))#93] -Results [3]: [wp_web_page_sk#86, MakeDecimal(sum(UnscaledValue(wr_return_amt#84))#92,17,2) AS returns#94, MakeDecimal(sum(UnscaledValue(wr_net_loss#85))#93,17,2) AS profit_loss#95] - -(83) BroadcastExchange -Input [3]: [wp_web_page_sk#86, returns#94, profit_loss#95] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#96] - -(84) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [wp_web_page_sk#71] -Right keys [1]: [wp_web_page_sk#86] -Join condition: None - -(85) Project [codegen id : 23] -Output [5]: [sales#80, coalesce(returns#94, 0.00) AS returns#97, CheckOverflow((promote_precision(cast(profit#81 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#95, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#98, web channel AS channel#99, wp_web_page_sk#71 AS id#100] -Input [6]: [wp_web_page_sk#71, sales#80, profit#81, wp_web_page_sk#86, returns#94, profit_loss#95] - -(86) Union - -(87) Expand [codegen id : 24] -Input [5]: [sales#17, returns#34, profit#35, channel#36, id#37] -Arguments: [List(sales#17, returns#34, profit#35, channel#36, id#37, 0), List(sales#17, returns#34, profit#35, channel#36, null, 1), List(sales#17, returns#34, profit#35, null, null, 3)], [sales#17, returns#34, profit#35, channel#101, id#102, spark_grouping_id#103] - -(88) HashAggregate [codegen id : 24] -Input [6]: [sales#17, returns#34, profit#35, channel#101, id#102, spark_grouping_id#103] -Keys [3]: [channel#101, id#102, spark_grouping_id#103] -Functions [3]: [partial_sum(sales#17), partial_sum(returns#34), partial_sum(profit#35)] -Aggregate Attributes [6]: [sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109] -Results [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] - -(89) Exchange -Input [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] -Arguments: hashpartitioning(channel#101, id#102, spark_grouping_id#103, 5), true, [id=#116] - -(90) HashAggregate [codegen id : 25] -Input [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] -Keys [3]: [channel#101, id#102, spark_grouping_id#103] -Functions [3]: [sum(sales#17), sum(returns#34), sum(profit#35)] -Aggregate Attributes [3]: [sum(sales#17)#117, sum(returns#34)#118, sum(profit#35)#119] -Results [5]: [channel#101, id#102, sum(sales#17)#117 AS sales#120, sum(returns#34)#118 AS returns#121, sum(profit#35)#119 AS profit#122] - -(91) TakeOrderedAndProject -Input [5]: [channel#101, id#102, sales#120, returns#121, profit#122] -Arguments: 100, [channel#101 ASC NULLS FIRST, id#102 ASC NULLS FIRST], [channel#101, id#102, sales#120, returns#121, profit#122] - +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) ++- *(25) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) + +- *(24) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) + +- *(24) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] + +- Union + :- *(8) Project [sales#7, coalesce(returns#12, 0.00) AS returns#8, CheckOverflow((promote_precision(cast(profit#13 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#9, store channel AS channel#10, s_store_sk#15 AS id#11] + : +- *(8) BroadcastHashJoin [s_store_sk#15], [s_store_sk#16], LeftOuter, BuildRight + : :- *(8) HashAggregate(keys=[s_store_sk#15], functions=[sum(UnscaledValue(ss_ext_sales_price#17)), sum(UnscaledValue(ss_net_profit#18))]) + : : +- Exchange hashpartitioning(s_store_sk#15, 5) + : : +- *(3) HashAggregate(keys=[s_store_sk#15], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#17)), partial_sum(UnscaledValue(ss_net_profit#18))]) + : : +- *(3) Project [ss_ext_sales_price#17, ss_net_profit#18, s_store_sk#15] + : : +- *(3) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#15], Inner, BuildRight + : : :- *(3) Project [ss_store_sk#19, ss_ext_sales_price#17, ss_net_profit#18] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#20, ss_store_sk#19, ss_ext_sales_price#17, ss_net_profit#18] + : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#20) && isnotnull(ss_store_sk#19)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_store_sk#19,ss_ext_sales_price#17,ss_net_profit#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#15] + : : +- *(2) Filter isnotnull(s_store_sk#15) + : : +- *(2) FileScan parquet default.store[s_store_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) HashAggregate(keys=[s_store_sk#16], functions=[sum(UnscaledValue(sr_return_amt#23)), sum(UnscaledValue(sr_net_loss#24))]) + : +- Exchange hashpartitioning(s_store_sk#16, 5) + : +- *(6) HashAggregate(keys=[s_store_sk#16], functions=[partial_sum(UnscaledValue(sr_return_amt#23)), partial_sum(UnscaledValue(sr_net_loss#24))]) + : +- *(6) Project [sr_return_amt#23, sr_net_loss#24, s_store_sk#16] + : +- *(6) BroadcastHashJoin [sr_store_sk#25], [cast(s_store_sk#16 as bigint)], Inner, BuildRight + : :- *(6) Project [sr_store_sk#25, sr_return_amt#23, sr_net_loss#24] + : : +- *(6) BroadcastHashJoin [sr_returned_date_sk#26], [cast(d_date_sk#21 as bigint)], Inner, BuildRight + : : :- *(6) Project [sr_returned_date_sk#26, sr_store_sk#25, sr_return_amt#23, sr_net_loss#24] + : : : +- *(6) Filter (isnotnull(sr_returned_date_sk#26) && isnotnull(sr_store_sk#25)) + : : : +- *(6) FileScan parquet default.store_returns[sr_returned_date_sk#26,sr_store_sk#25,sr_return_amt#23,sr_net_loss#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) + : : +- *(4) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [s_store_sk#16] + : +- *(5) Filter isnotnull(s_store_sk#16) + : +- *(5) FileScan parquet default.store[s_store_sk#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + :- *(15) Project [sales#27, returns#28, CheckOverflow((promote_precision(cast(profit#29 as decimal(18,2))) - promote_precision(cast(profit_loss#30 as decimal(18,2)))), DecimalType(18,2)) AS profit#31, catalog channel AS channel#32, cs_call_center_sk#33 AS id#34] + : +- BroadcastNestedLoopJoin BuildLeft, Inner + : :- BroadcastExchange IdentityBroadcastMode + : : +- *(11) HashAggregate(keys=[cs_call_center_sk#33], functions=[sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))]) + : : +- Exchange hashpartitioning(cs_call_center_sk#33, 5) + : : +- *(10) HashAggregate(keys=[cs_call_center_sk#33], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))]) + : : +- *(10) Project [cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] + : : +- *(10) BroadcastHashJoin [cs_sold_date_sk#37], [d_date_sk#21], Inner, BuildRight + : : :- *(10) Project [cs_sold_date_sk#37, cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] + : : : +- *(10) Filter isnotnull(cs_sold_date_sk#37) + : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#37,cs_call_center_sk#33,cs_ext_sales_price#35,cs_net_profit#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#21], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(23) Project [sales#41, coalesce(returns#42, 0.00) AS returns#43, CheckOverflow((promote_precision(cast(profit#44 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#45, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#46, web channel AS channel#47, wp_web_page_sk#48 AS id#49] + +- *(23) BroadcastHashJoin [wp_web_page_sk#48], [wp_web_page_sk#50], LeftOuter, BuildRight + :- *(23) HashAggregate(keys=[wp_web_page_sk#48], functions=[sum(UnscaledValue(ws_ext_sales_price#51)), sum(UnscaledValue(ws_net_profit#52))]) + : +- Exchange hashpartitioning(wp_web_page_sk#48, 5) + : +- *(18) HashAggregate(keys=[wp_web_page_sk#48], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#51)), partial_sum(UnscaledValue(ws_net_profit#52))]) + : +- *(18) Project [ws_ext_sales_price#51, ws_net_profit#52, wp_web_page_sk#48] + : +- *(18) BroadcastHashJoin [ws_web_page_sk#53], [wp_web_page_sk#48], Inner, BuildRight + : :- *(18) Project [ws_web_page_sk#53, ws_ext_sales_price#51, ws_net_profit#52] + : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#54], [d_date_sk#21], Inner, BuildRight + : : :- *(18) Project [ws_sold_date_sk#54, ws_web_page_sk#53, ws_ext_sales_price#51, ws_net_profit#52] + : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#54) && isnotnull(ws_web_page_sk#53)) + : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#54,ws_web_page_sk#53,ws_ext_sales_price#51,ws_net_profit#52] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(22) HashAggregate(keys=[wp_web_page_sk#50], functions=[sum(UnscaledValue(wr_return_amt#55)), sum(UnscaledValue(wr_net_loss#56))]) + +- Exchange hashpartitioning(wp_web_page_sk#50, 5) + +- *(21) HashAggregate(keys=[wp_web_page_sk#50], functions=[partial_sum(UnscaledValue(wr_return_amt#55)), partial_sum(UnscaledValue(wr_net_loss#56))]) + +- *(21) Project [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#50] + +- *(21) BroadcastHashJoin [wr_web_page_sk#57], [cast(wp_web_page_sk#50 as bigint)], Inner, BuildRight + :- *(21) Project [wr_web_page_sk#57, wr_return_amt#55, wr_net_loss#56] + : +- *(21) BroadcastHashJoin [wr_returned_date_sk#58], [cast(d_date_sk#21 as bigint)], Inner, BuildRight + : :- *(21) Project [wr_returned_date_sk#58, wr_web_page_sk#57, wr_return_amt#55, wr_net_loss#56] + : : +- *(21) Filter (isnotnull(wr_returned_date_sk#58) && isnotnull(wr_web_page_sk#57)) + : : +- *(21) FileScan parquet default.web_returns[wr_returned_date_sk#58,wr_web_page_sk#57,wr_return_amt#55,wr_net_loss#56] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt index bfbeff02b..ef07e38e8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt @@ -1,139 +1,141 @@ -TakeOrderedAndProject [channel,id,sales,returns,profit] - WholeStageCodegen (25) - HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen + HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] InputAdapter Exchange [channel,id,spark_grouping_id] #1 - WholeStageCodegen (24) - HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - Expand [sales,returns,profit,channel,id] + WholeStageCodegen + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] InputAdapter Union - WholeStageCodegen (8) - Project [sales,returns,profit,profit_loss,s_store_sk] + WholeStageCodegen + Project [profit,profit_loss,returns,s_store_sk,sales] BroadcastHashJoin [s_store_sk,s_store_sk] - HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum] + HashAggregate [s_store_sk,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] [profit,sales,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] InputAdapter Exchange [s_store_sk] #2 - WholeStageCodegen (3) - HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] - Project [ss_ext_sales_price,ss_net_profit,s_store_sk] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + WholeStageCodegen + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [s_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_ext_sales_price,ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk] + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (7) - HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum] + WholeStageCodegen + HashAggregate [s_store_sk,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] [profit_loss,returns,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] InputAdapter Exchange [s_store_sk] #6 - WholeStageCodegen (6) - HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum] - Project [sr_return_amt,sr_net_loss,s_store_sk] - BroadcastHashJoin [sr_store_sk,s_store_sk] - Project [sr_store_sk,sr_return_amt,sr_net_loss] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Filter [sr_returned_date_sk,sr_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] + WholeStageCodegen + HashAggregate [s_store_sk,sr_net_loss,sr_return_amt,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [s_store_sk,sr_net_loss,sr_return_amt] + BroadcastHashJoin [s_store_sk,sr_store_sk] + Project [sr_net_loss,sr_return_amt,sr_store_sk] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + Filter [sr_returned_date_sk,sr_store_sk] + Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + BroadcastExchange #7 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date,d_date_sk] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter - ReusedExchange [s_store_sk] #4 - WholeStageCodegen (15) - Project [sales,returns,profit,profit_loss,cs_call_center_sk] + BroadcastExchange #8 + WholeStageCodegen + Project [s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_sk] [s_store_sk] + WholeStageCodegen + Project [cs_call_center_sk,profit,profit_loss,returns,sales] InputAdapter BroadcastNestedLoopJoin - BroadcastExchange #7 - WholeStageCodegen (11) - HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum] + BroadcastExchange #9 + WholeStageCodegen + HashAggregate [cs_call_center_sk,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] [profit,sales,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] InputAdapter - Exchange [cs_call_center_sk] #8 - WholeStageCodegen (10) - HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] + Exchange [cs_call_center_sk] #10 + WholeStageCodegen + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] + Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] InputAdapter - ReusedExchange [d_date_sk] #3 - WholeStageCodegen (14) - HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum] + ReusedExchange [d_date_sk] [d_date_sk] #3 + WholeStageCodegen + HashAggregate [sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] [profit_loss,returns,sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] InputAdapter - Exchange #9 - WholeStageCodegen (13) - HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum] - Project [cr_return_amount,cr_net_loss] + Exchange #11 + WholeStageCodegen + HashAggregate [cr_net_loss,cr_return_amount,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [cr_net_loss,cr_return_amount] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Filter [cr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss] + Project [cr_net_loss,cr_return_amount,cr_returned_date_sk] + Filter [cr_returned_date_sk] + Scan parquet default.catalog_returns [cr_net_loss,cr_return_amount,cr_returned_date_sk] [cr_net_loss,cr_return_amount,cr_returned_date_sk] InputAdapter - ReusedExchange [d_date_sk] #3 - WholeStageCodegen (23) - Project [sales,returns,profit,profit_loss,wp_web_page_sk] + ReusedExchange [d_date_sk] [d_date_sk] #3 + WholeStageCodegen + Project [profit,profit_loss,returns,sales,wp_web_page_sk] BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] - HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum] + HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),wp_web_page_sk] [profit,sales,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] InputAdapter - Exchange [wp_web_page_sk] #10 - WholeStageCodegen (18) - HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] - Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + Exchange [wp_web_page_sk] #12 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Project [ws_ext_sales_price,ws_net_profit,ws_web_page_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] + Filter [ws_sold_date_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + ReusedExchange [d_date_sk] [d_date_sk] #3 InputAdapter - BroadcastExchange #11 - WholeStageCodegen (17) - Filter [wp_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_page [wp_web_page_sk] + BroadcastExchange #13 + WholeStageCodegen + Project [wp_web_page_sk] + Filter [wp_web_page_sk] + Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] InputAdapter - BroadcastExchange #12 - WholeStageCodegen (22) - HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum] + BroadcastExchange #14 + WholeStageCodegen + HashAggregate [sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt)),wp_web_page_sk] [profit_loss,returns,sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt))] InputAdapter - Exchange [wp_web_page_sk] #13 - WholeStageCodegen (21) - HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum] - Project [wr_return_amt,wr_net_loss,wp_web_page_sk] - BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] - Project [wr_web_page_sk,wr_return_amt,wr_net_loss] - BroadcastHashJoin [wr_returned_date_sk,d_date_sk] - Filter [wr_returned_date_sk,wr_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] + Exchange [wp_web_page_sk] #15 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,wp_web_page_sk,wr_net_loss,wr_return_amt] [sum,sum,sum,sum] + Project [wp_web_page_sk,wr_net_loss,wr_return_amt] + BroadcastHashJoin [wp_web_page_sk,wr_web_page_sk] + Project [wr_net_loss,wr_return_amt,wr_web_page_sk] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] + Filter [wr_returned_date_sk,wr_web_page_sk] + Scan parquet default.web_returns [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + ReusedExchange [d_date_sk] [d_date_sk] #7 InputAdapter - ReusedExchange [wp_web_page_sk] #11 + BroadcastExchange #16 + WholeStageCodegen + Project [wp_web_page_sk] + Filter [wp_web_page_sk] + Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt index 1bcf039dd..348b4f499 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt @@ -1,341 +1,61 @@ == Physical Plan == -TakeOrderedAndProject (60) -+- * Project (59) - +- * BroadcastHashJoin Inner BuildRight (58) - :- * Project (39) - : +- * BroadcastHashJoin Inner BuildRight (38) - : :- * HashAggregate (19) - : : +- Exchange (18) - : : +- * HashAggregate (17) - : : +- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) - : : :- * Project (10) - : : : +- * Filter (9) - : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.store_returns (4) - : : +- BroadcastExchange (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.date_dim (11) - : +- BroadcastExchange (37) - : +- * Filter (36) - : +- * HashAggregate (35) - : +- Exchange (34) - : +- * HashAggregate (33) - : +- * Project (32) - : +- * BroadcastHashJoin Inner BuildRight (31) - : :- * Project (29) - : : +- * Filter (28) - : : +- * BroadcastHashJoin LeftOuter BuildRight (27) - : : :- * Filter (22) - : : : +- * ColumnarToRow (21) - : : : +- Scan parquet default.web_sales (20) - : : +- BroadcastExchange (26) - : : +- * Filter (25) - : : +- * ColumnarToRow (24) - : : +- Scan parquet default.web_returns (23) - : +- ReusedExchange (30) - +- BroadcastExchange (57) - +- * Filter (56) - +- * HashAggregate (55) - +- Exchange (54) - +- * HashAggregate (53) - +- * Project (52) - +- * BroadcastHashJoin Inner BuildRight (51) - :- * Project (49) - : +- * Filter (48) - : +- * BroadcastHashJoin LeftOuter BuildRight (47) - : :- * Filter (42) - : : +- * ColumnarToRow (41) - : : +- Scan parquet default.catalog_sales (40) - : +- BroadcastExchange (46) - : +- * Filter (45) - : +- * ColumnarToRow (44) - : +- Scan parquet default.catalog_returns (43) - +- ReusedExchange (50) - - -(1) Scan parquet default.store_sales -Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] - -(3) Filter [codegen id : 3] -Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] -Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_customer_sk#3)) - -(4) Scan parquet default.store_returns -Output [2]: [sr_item_sk#8, sr_ticket_number#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [sr_item_sk#8, sr_ticket_number#9] - -(6) Filter [codegen id : 1] -Input [2]: [sr_item_sk#8, sr_ticket_number#9] -Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) - -(7) BroadcastExchange -Input [2]: [sr_item_sk#8, sr_ticket_number#9] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#10] - -(8) BroadcastHashJoin [codegen id : 3] -Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#2 as bigint)] -Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] -Join condition: None - -(9) Filter [codegen id : 3] -Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#8, sr_ticket_number#9] -Condition : isnull(sr_ticket_number#9) - -(10) Project [codegen id : 3] -Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] -Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#8, sr_ticket_number#9] - -(11) Scan parquet default.date_dim -Output [2]: [d_date_sk#11, d_year#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#11, d_year#12] - -(13) Filter [codegen id : 2] -Input [2]: [d_date_sk#11, d_year#12] -Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) - -(14) BroadcastExchange -Input [2]: [d_date_sk#11, d_year#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] - -(15) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#11] -Join condition: None - -(16) Project [codegen id : 3] -Output [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#12] -Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_date_sk#11, d_year#12] - -(17) HashAggregate [codegen id : 3] -Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#12] -Keys [3]: [d_year#12, ss_item_sk#2, ss_customer_sk#3] -Functions [3]: [partial_sum(cast(ss_quantity#5 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#6)), partial_sum(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [3]: [sum#14, sum#15, sum#16] -Results [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] - -(18) Exchange -Input [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] -Arguments: hashpartitioning(d_year#12, ss_item_sk#2, ss_customer_sk#3, 5), true, [id=#20] - -(19) HashAggregate [codegen id : 12] -Input [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] -Keys [3]: [d_year#12, ss_item_sk#2, ss_customer_sk#3] -Functions [3]: [sum(cast(ss_quantity#5 as bigint)), sum(UnscaledValue(ss_wholesale_cost#6)), sum(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [3]: [sum(cast(ss_quantity#5 as bigint))#21, sum(UnscaledValue(ss_wholesale_cost#6))#22, sum(UnscaledValue(ss_sales_price#7))#23] -Results [6]: [d_year#12 AS ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, sum(cast(ss_quantity#5 as bigint))#21 AS ss_qty#25, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#6))#22,17,2) AS ss_wc#26, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#23,17,2) AS ss_sp#27] - -(20) Scan parquet default.web_sales -Output [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] -ReadSchema: struct - -(21) ColumnarToRow [codegen id : 6] -Input [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] - -(22) Filter [codegen id : 6] -Input [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] -Condition : ((isnotnull(ws_sold_date_sk#28) AND isnotnull(ws_item_sk#29)) AND isnotnull(ws_bill_customer_sk#30)) - -(23) Scan parquet default.web_returns -Output [2]: [wr_item_sk#35, wr_order_number#36] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [2]: [wr_item_sk#35, wr_order_number#36] - -(25) Filter [codegen id : 4] -Input [2]: [wr_item_sk#35, wr_order_number#36] -Condition : (isnotnull(wr_order_number#36) AND isnotnull(wr_item_sk#35)) - -(26) BroadcastExchange -Input [2]: [wr_item_sk#35, wr_order_number#36] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#37] - -(27) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [cast(ws_order_number#31 as bigint), cast(ws_item_sk#29 as bigint)] -Right keys [2]: [wr_order_number#36, wr_item_sk#35] -Join condition: None - -(28) Filter [codegen id : 6] -Input [9]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, wr_item_sk#35, wr_order_number#36] -Condition : isnull(wr_order_number#36) - -(29) Project [codegen id : 6] -Output [6]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] -Input [9]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, wr_item_sk#35, wr_order_number#36] - -(30) ReusedExchange [Reuses operator id: 14] -Output [2]: [d_date_sk#11, d_year#12] - -(31) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#28] -Right keys [1]: [d_date_sk#11] -Join condition: None - -(32) Project [codegen id : 6] -Output [6]: [ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_year#12] -Input [8]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_date_sk#11, d_year#12] - -(33) HashAggregate [codegen id : 6] -Input [6]: [ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_year#12] -Keys [3]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30] -Functions [3]: [partial_sum(cast(ws_quantity#32 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#33)), partial_sum(UnscaledValue(ws_sales_price#34))] -Aggregate Attributes [3]: [sum#38, sum#39, sum#40] -Results [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] - -(34) Exchange -Input [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] -Arguments: hashpartitioning(d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, 5), true, [id=#44] - -(35) HashAggregate [codegen id : 7] -Input [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] -Keys [3]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30] -Functions [3]: [sum(cast(ws_quantity#32 as bigint)), sum(UnscaledValue(ws_wholesale_cost#33)), sum(UnscaledValue(ws_sales_price#34))] -Aggregate Attributes [3]: [sum(cast(ws_quantity#32 as bigint))#45, sum(UnscaledValue(ws_wholesale_cost#33))#46, sum(UnscaledValue(ws_sales_price#34))#47] -Results [6]: [d_year#12 AS ws_sold_year#48, ws_item_sk#29, ws_bill_customer_sk#30 AS ws_customer_sk#49, sum(cast(ws_quantity#32 as bigint))#45 AS ws_qty#50, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#33))#46,17,2) AS ws_wc#51, MakeDecimal(sum(UnscaledValue(ws_sales_price#34))#47,17,2) AS ws_sp#52] - -(36) Filter [codegen id : 7] -Input [6]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] -Condition : (coalesce(ws_qty#50, 0) > 0) - -(37) BroadcastExchange -Input [6]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#53] - -(38) BroadcastHashJoin [codegen id : 12] -Left keys [3]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3] -Right keys [3]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49] -Join condition: None - -(39) Project [codegen id : 12] -Output [9]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, ws_wc#51, ws_sp#52] -Input [12]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] - -(40) Scan parquet default.catalog_sales -Output [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] -ReadSchema: struct - -(41) ColumnarToRow [codegen id : 10] -Input [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] - -(42) Filter [codegen id : 10] -Input [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] -Condition : ((isnotnull(cs_sold_date_sk#54) AND isnotnull(cs_item_sk#56)) AND isnotnull(cs_bill_customer_sk#55)) - -(43) Scan parquet default.catalog_returns -Output [2]: [cr_item_sk#61, cr_order_number#62] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] -ReadSchema: struct - -(44) ColumnarToRow [codegen id : 8] -Input [2]: [cr_item_sk#61, cr_order_number#62] - -(45) Filter [codegen id : 8] -Input [2]: [cr_item_sk#61, cr_order_number#62] -Condition : (isnotnull(cr_order_number#62) AND isnotnull(cr_item_sk#61)) - -(46) BroadcastExchange -Input [2]: [cr_item_sk#61, cr_order_number#62] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#63] - -(47) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [cs_order_number#57, cs_item_sk#56] -Right keys [2]: [cr_order_number#62, cr_item_sk#61] -Join condition: None - -(48) Filter [codegen id : 10] -Input [9]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, cr_item_sk#61, cr_order_number#62] -Condition : isnull(cr_order_number#62) - -(49) Project [codegen id : 10] -Output [6]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] -Input [9]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, cr_item_sk#61, cr_order_number#62] - -(50) ReusedExchange [Reuses operator id: 14] -Output [2]: [d_date_sk#11, d_year#12] - -(51) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#54] -Right keys [1]: [d_date_sk#11] -Join condition: None - -(52) Project [codegen id : 10] -Output [6]: [cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_year#12] -Input [8]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_date_sk#11, d_year#12] - -(53) HashAggregate [codegen id : 10] -Input [6]: [cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_year#12] -Keys [3]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55] -Functions [3]: [partial_sum(cast(cs_quantity#58 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#59)), partial_sum(UnscaledValue(cs_sales_price#60))] -Aggregate Attributes [3]: [sum#64, sum#65, sum#66] -Results [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] - -(54) Exchange -Input [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] -Arguments: hashpartitioning(d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, 5), true, [id=#70] - -(55) HashAggregate [codegen id : 11] -Input [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] -Keys [3]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55] -Functions [3]: [sum(cast(cs_quantity#58 as bigint)), sum(UnscaledValue(cs_wholesale_cost#59)), sum(UnscaledValue(cs_sales_price#60))] -Aggregate Attributes [3]: [sum(cast(cs_quantity#58 as bigint))#71, sum(UnscaledValue(cs_wholesale_cost#59))#72, sum(UnscaledValue(cs_sales_price#60))#73] -Results [6]: [d_year#12 AS cs_sold_year#74, cs_item_sk#56, cs_bill_customer_sk#55 AS cs_customer_sk#75, sum(cast(cs_quantity#58 as bigint))#71 AS cs_qty#76, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#59))#72,17,2) AS cs_wc#77, MakeDecimal(sum(UnscaledValue(cs_sales_price#60))#73,17,2) AS cs_sp#78] - -(56) Filter [codegen id : 11] -Input [6]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] -Condition : (coalesce(cs_qty#76, 0) > 0) - -(57) BroadcastExchange -Input [6]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#79] - -(58) BroadcastHashJoin [codegen id : 12] -Left keys [3]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3] -Right keys [3]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75] -Join condition: None - -(59) Project [codegen id : 12] -Output [12]: [round((cast(ss_qty#25 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#80, ss_qty#25 AS store_qty#81, ss_wc#26 AS store_wholesale_cost#82, ss_sp#27 AS store_sales_price#83, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#84, CheckOverflow((promote_precision(cast(coalesce(ws_wc#51, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#77, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_wholesale_cost#85, CheckOverflow((promote_precision(cast(coalesce(ws_sp#52, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#78, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_sales_price#86, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, cs_qty#76] -Input [15]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, ws_wc#51, ws_sp#52, cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] - -(60) TakeOrderedAndProject -Input [12]: [ratio#80, store_qty#81, store_wholesale_cost#82, store_sales_price#83, other_chan_qty#84, other_chan_wholesale_cost#85, other_chan_sales_price#86, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, cs_qty#76] -Arguments: 100, [ratio#80 ASC NULLS FIRST, ss_qty#25 DESC NULLS LAST, ss_wc#26 DESC NULLS LAST, ss_sp#27 DESC NULLS LAST, other_chan_qty#84 ASC NULLS FIRST, other_chan_wholesale_cost#85 ASC NULLS FIRST, other_chan_sales_price#86 ASC NULLS FIRST, round((cast(ss_qty#25 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) ASC NULLS FIRST], [ratio#80, store_qty#81, store_wholesale_cost#82, store_sales_price#83, other_chan_qty#84, other_chan_wholesale_cost#85, other_chan_sales_price#86] - +TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC NULLS LAST,ss_wc#3 DESC NULLS LAST,ss_sp#4 DESC NULLS LAST,other_chan_qty#5 ASC NULLS FIRST,other_chan_wholesale_cost#6 ASC NULLS FIRST,other_chan_sales_price#7 ASC NULLS FIRST,round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) ASC NULLS FIRST], output=[ratio#1,store_qty#10,store_wholesale_cost#11,store_sales_price#12,other_chan_qty#5,other_chan_wholesale_cost#6,other_chan_sales_price#7]) ++- *(12) Project [round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) AS ratio#1, ss_qty#2 AS store_qty#10, ss_wc#3 AS store_wholesale_cost#11, ss_sp#4 AS store_sales_price#12, (coalesce(ws_qty#8, 0) + coalesce(cs_qty#9, 0)) AS other_chan_qty#5, CheckOverflow((promote_precision(cast(coalesce(ws_wc#13, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#6, CheckOverflow((promote_precision(cast(coalesce(ws_sp#15, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#16, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#7, ss_wc#3, cs_qty#9, ss_sp#4, ss_qty#2, ws_qty#8] + +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [cs_sold_year#20, cs_item_sk#21, cs_customer_sk#22], Inner, BuildRight + :- *(12) Project [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19, ss_qty#2, ss_wc#3, ss_sp#4, ws_qty#8, ws_wc#13, ws_sp#15] + : +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [ws_sold_year#23, ws_item_sk#24, ws_customer_sk#25], Inner, BuildRight + : :- *(12) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[sum(cast(ss_quantity#27 as bigint)), sum(UnscaledValue(ss_wholesale_cost#28)), sum(UnscaledValue(ss_sales_price#29))]) + : : +- Exchange hashpartitioning(d_year#26, ss_item_sk#18, ss_customer_sk#19, 5) + : : +- *(3) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[partial_sum(cast(ss_quantity#27 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#28)), partial_sum(UnscaledValue(ss_sales_price#29))]) + : : +- *(3) Project [ss_item_sk#18, ss_customer_sk#19, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29, d_year#26] + : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight + : : :- *(3) Project [ss_sold_date_sk#30, ss_item_sk#18, ss_customer_sk#19, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29] + : : : +- *(3) Filter isnull(sr_ticket_number#32) + : : : +- *(3) BroadcastHashJoin [cast(ss_ticket_number#33 as bigint), cast(ss_item_sk#18 as bigint)], [sr_ticket_number#32, sr_item_sk#34], LeftOuter, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#30, ss_item_sk#18, ss_customer_sk#19, ss_ticket_number#33, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29] + : : : : +- *(3) Filter ((isnotnull(ss_sold_date_sk#30) && isnotnull(ss_item_sk#18)) && isnotnull(ss_customer_sk#19)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_item_sk#18,ss_customer_sk#19,ss_ticket_number#33,ss_quantity#27,ss_wholesale_cost#28,ss_sales_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_date_sk#31, d_year#26] + : : +- *(2) Filter ((isnotnull(d_year#26) && (d_year#26 = 2000)) && isnotnull(d_date_sk#31)) + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#31,d_year#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + : +- *(7) Filter (coalesce(ws_qty#8, 0) > 0) + : +- *(7) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[sum(cast(ws_quantity#36 as bigint)), sum(UnscaledValue(ws_wholesale_cost#37)), sum(UnscaledValue(ws_sales_price#38))]) + : +- Exchange hashpartitioning(d_year#26, ws_item_sk#24, ws_bill_customer_sk#35, 5) + : +- *(6) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[partial_sum(cast(ws_quantity#36 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#37)), partial_sum(UnscaledValue(ws_sales_price#38))]) + : +- *(6) Project [ws_item_sk#24, ws_bill_customer_sk#35, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38, d_year#26] + : +- *(6) BroadcastHashJoin [ws_sold_date_sk#39], [d_date_sk#31], Inner, BuildRight + : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] + : : +- *(6) Filter isnull(wr_order_number#40) + : : +- *(6) BroadcastHashJoin [cast(ws_order_number#41 as bigint), cast(ws_item_sk#24 as bigint)], [wr_order_number#40, wr_item_sk#42], LeftOuter, BuildRight + : : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_order_number#41, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] + : : : +- *(6) Filter ((isnotnull(ws_sold_date_sk#39) && isnotnull(ws_item_sk#24)) && isnotnull(ws_bill_customer_sk#35)) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#39,ws_item_sk#24,ws_bill_customer_sk#35,ws_order_number#41,ws_quantity#36,ws_wholesale_cost#37,ws_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) + +- *(11) Filter (coalesce(cs_qty#9, 0) > 0) + +- *(11) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[sum(cast(cs_quantity#44 as bigint)), sum(UnscaledValue(cs_wholesale_cost#45)), sum(UnscaledValue(cs_sales_price#46))]) + +- Exchange hashpartitioning(d_year#26, cs_item_sk#21, cs_bill_customer_sk#43, 5) + +- *(10) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[partial_sum(cast(cs_quantity#44 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#45)), partial_sum(UnscaledValue(cs_sales_price#46))]) + +- *(10) Project [cs_bill_customer_sk#43, cs_item_sk#21, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46, d_year#26] + +- *(10) BroadcastHashJoin [cs_sold_date_sk#47], [d_date_sk#31], Inner, BuildRight + :- *(10) Project [cs_sold_date_sk#47, cs_bill_customer_sk#43, cs_item_sk#21, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46] + : +- *(10) Filter isnull(cr_order_number#48) + : +- *(10) BroadcastHashJoin [cs_order_number#49, cs_item_sk#21], [cr_order_number#48, cr_item_sk#50], LeftOuter, BuildRight + : :- *(10) Project [cs_sold_date_sk#47, cs_bill_customer_sk#43, cs_item_sk#21, cs_order_number#49, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46] + : : +- *(10) Filter ((isnotnull(cs_sold_date_sk#47) && isnotnull(cs_item_sk#21)) && isnotnull(cs_bill_customer_sk#43)) + : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#47,cs_bill_customer_sk#43,cs_item_sk#21,cs_order_number#49,cs_quantity#44,cs_wholesale_cost#45,cs_sales_price#46] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt index 977070283..fc8bc4f3f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt @@ -1,88 +1,81 @@ -TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholesale_cost,other_chan_sales_price,ws_qty,cs_qty,store_qty,store_wholesale_cost,store_sales_price] - WholeStageCodegen (12) - Project [ss_qty,ws_qty,cs_qty,ss_wc,ss_sp,ws_wc,cs_wc,ws_sp,cs_sp] - BroadcastHashJoin [ss_sold_year,ss_item_sk,ss_customer_sk,cs_sold_year,cs_item_sk,cs_customer_sk] - Project [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp] - BroadcastHashJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ws_sold_year,ws_item_sk,ws_customer_sk] - HashAggregate [d_year,ss_item_sk,ss_customer_sk,sum,sum,sum] [sum(cast(ss_quantity as bigint)),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_sales_price)),ss_sold_year,ss_qty,ss_wc,ss_sp,sum,sum,sum] +TakeOrderedAndProject [cs_qty,other_chan_qty,other_chan_sales_price,other_chan_wholesale_cost,ratio,ss_qty,ss_sp,ss_wc,store_qty,store_sales_price,store_wholesale_cost,ws_qty] + WholeStageCodegen + Project [cs_qty,cs_sp,cs_wc,ss_qty,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] + BroadcastHashJoin [cs_customer_sk,cs_item_sk,cs_sold_year,ss_customer_sk,ss_item_sk,ss_sold_year] + Project [ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_sold_year,ws_customer_sk,ws_item_sk,ws_sold_year] + HashAggregate [d_year,ss_customer_sk,ss_item_sk,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] [ss_qty,ss_sold_year,ss_sp,ss_wc,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] InputAdapter - Exchange [d_year,ss_item_sk,ss_customer_sk] #1 - WholeStageCodegen (3) - HashAggregate [d_year,ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price] [sum,sum,sum,sum,sum,sum] - Project [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,d_year] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price] + Exchange [d_year,ss_customer_sk,ss_item_sk] #1 + WholeStageCodegen + HashAggregate [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] Filter [sr_ticket_number] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] + Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [sr_ticket_number,sr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] + WholeStageCodegen + Project [sr_item_sk,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen (7) + WholeStageCodegen Filter [ws_qty] - HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum] [sum(cast(ws_quantity as bigint)),sum(UnscaledValue(ws_wholesale_cost)),sum(UnscaledValue(ws_sales_price)),ws_sold_year,ws_customer_sk,ws_qty,ws_wc,ws_sp,sum,sum,sum] + HashAggregate [d_year,sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_bill_customer_sk,ws_item_sk] [sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_customer_sk,ws_qty,ws_sold_year,ws_sp,ws_wc] InputAdapter - Exchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 - WholeStageCodegen (6) - HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price] [sum,sum,sum,sum,sum,sum] - Project [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,d_year] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price] + Exchange [d_year,ws_bill_customer_sk,ws_item_sk] #5 + WholeStageCodegen + HashAggregate [d_year,sum,sum,sum,sum,sum,sum,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] [sum,sum,sum,sum,sum,sum] + Project [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] Filter [wr_order_number] - BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - Filter [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] InputAdapter BroadcastExchange #6 - WholeStageCodegen (4) - Filter [wr_order_number,wr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_item_sk,wr_order_number] + WholeStageCodegen + Project [wr_item_sk,wr_order_number] + Filter [wr_item_sk,wr_order_number] + Scan parquet default.web_returns [wr_item_sk,wr_order_number] [wr_item_sk,wr_order_number] InputAdapter - ReusedExchange [d_date_sk,d_year] #3 + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 InputAdapter BroadcastExchange #7 - WholeStageCodegen (11) + WholeStageCodegen Filter [cs_qty] - HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,sum,sum,sum] [sum(cast(cs_quantity as bigint)),sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price)),cs_sold_year,cs_customer_sk,cs_qty,cs_wc,cs_sp,sum,sum,sum] + HashAggregate [cs_bill_customer_sk,cs_item_sk,d_year,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] [cs_customer_sk,cs_qty,cs_sold_year,cs_sp,cs_wc,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] InputAdapter - Exchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 - WholeStageCodegen (10) - HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,cs_quantity,cs_wholesale_cost,cs_sales_price] [sum,sum,sum,sum,sum,sum] - Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,d_year] + Exchange [cs_bill_customer_sk,cs_item_sk,d_year] #8 + WholeStageCodegen + HashAggregate [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] Filter [cr_order_number] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Filter [cs_sold_date_sk,cs_item_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] InputAdapter BroadcastExchange #9 - WholeStageCodegen (8) - Filter [cr_order_number,cr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] + WholeStageCodegen + Project [cr_item_sk,cr_order_number] + Filter [cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] [cr_item_sk,cr_order_number] InputAdapter - ReusedExchange [d_date_sk,d_year] #3 + ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt index 63cde7268..8e43f862c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt @@ -1,193 +1,32 @@ == Physical Plan == -TakeOrderedAndProject (34) -+- * Project (33) - +- * BroadcastHashJoin Inner BuildRight (32) - :- * HashAggregate (27) - : +- Exchange (26) - : +- * HashAggregate (25) - : +- * Project (24) - : +- * BroadcastHashJoin Inner BuildRight (23) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.store (11) - : +- BroadcastExchange (22) - : +- * Project (21) - : +- * Filter (20) - : +- * ColumnarToRow (19) - : +- Scan parquet default.household_demographics (18) - +- BroadcastExchange (31) - +- * Filter (30) - +- * ColumnarToRow (29) - +- Scan parquet default.customer (28) - - -(1) Scan parquet default.store_sales -Output [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] - -(3) Filter [codegen id : 4] -Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] -Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#9, d_year#10, d_dow#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#9, d_year#10, d_dow#11] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#9, d_year#10, d_dow#11] -Condition : (((isnotnull(d_dow#11) AND (d_dow#11 = 1)) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#9] -Input [3]: [d_date_sk#9, d_year#10, d_dow#11] - -(8) BroadcastExchange -Input [1]: [d_date_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#9] -Join condition: None - -(10) Project [codegen id : 4] -Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] -Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, d_date_sk#9] - -(11) Scan parquet default.store -Output [3]: [s_store_sk#13, s_number_employees#14, s_city#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_number_employees,295), IsNotNull(s_store_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] - -(13) Filter [codegen id : 2] -Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] -Condition : (((isnotnull(s_number_employees#14) AND (s_number_employees#14 >= 200)) AND (s_number_employees#14 <= 295)) AND isnotnull(s_store_sk#13)) - -(14) Project [codegen id : 2] -Output [2]: [s_store_sk#13, s_city#15] -Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] - -(15) BroadcastExchange -Input [2]: [s_store_sk#13, s_city#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#5] -Right keys [1]: [s_store_sk#13] -Join condition: None - -(17) Project [codegen id : 4] -Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] -Input [9]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_store_sk#13, s_city#15] - -(18) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] - -(20) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] -Condition : (((hd_dep_count#18 = 6) OR (hd_vehicle_count#19 > 2)) AND isnotnull(hd_demo_sk#17)) - -(21) Project [codegen id : 3] -Output [1]: [hd_demo_sk#17] -Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] - -(22) BroadcastExchange -Input [1]: [hd_demo_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] - -(23) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#17] -Join condition: None - -(24) Project [codegen id : 4] -Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] -Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15, hd_demo_sk#17] - -(25) HashAggregate [codegen id : 4] -Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] -Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15] -Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#7)), partial_sum(UnscaledValue(ss_net_profit#8))] -Aggregate Attributes [2]: [sum#21, sum#22] -Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] - -(26) Exchange -Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] -Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, 5), true, [id=#25] - -(27) HashAggregate [codegen id : 6] -Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] -Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15] -Functions [2]: [sum(UnscaledValue(ss_coupon_amt#7)), sum(UnscaledValue(ss_net_profit#8))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#7))#26, sum(UnscaledValue(ss_net_profit#8))#27] -Results [5]: [ss_ticket_number#6, ss_customer_sk#2, s_city#15, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#7))#26,17,2) AS amt#28, MakeDecimal(sum(UnscaledValue(ss_net_profit#8))#27,17,2) AS profit#29] - -(28) Scan parquet default.customer -Output [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 5] -Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] - -(30) Filter [codegen id : 5] -Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] -Condition : isnotnull(c_customer_sk#30) - -(31) BroadcastExchange -Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] - -(32) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#30] -Join condition: None - -(33) Project [codegen id : 6] -Output [7]: [c_last_name#32, c_first_name#31, substr(s_city#15, 1, 30) AS substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29, s_city#15] -Input [8]: [ss_ticket_number#6, ss_customer_sk#2, s_city#15, amt#28, profit#29, c_customer_sk#30, c_first_name#31, c_last_name#32] - -(34) TakeOrderedAndProject -Input [7]: [c_last_name#32, c_first_name#31, substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29, s_city#15] -Arguments: 100, [c_last_name#32 ASC NULLS FIRST, c_first_name#31 ASC NULLS FIRST, substr(s_city#15, 1, 30) ASC NULLS FIRST, profit#29 ASC NULLS FIRST], [c_last_name#32, c_first_name#31, substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29] - +TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,substring(s_city#3, 1, 30) ASC NULLS FIRST,profit#4 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,substring(s_city, 1, 30)#5,ss_ticket_number#6,amt#7,profit#4]) ++- *(6) Project [c_last_name#1, c_first_name#2, substring(s_city#3, 1, 30) AS substring(s_city, 1, 30)#5, ss_ticket_number#6, amt#7, profit#4, s_city#3] + +- *(6) BroadcastHashJoin [ss_customer_sk#8], [c_customer_sk#9], Inner, BuildRight + :- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#11)), sum(UnscaledValue(ss_net_profit#12))]) + : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3, 5) + : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#11)), partial_sum(UnscaledValue(ss_net_profit#12))]) + : +- *(4) Project [ss_customer_sk#8, ss_addr_sk#10, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12, s_city#3] + : +- *(4) BroadcastHashJoin [ss_hdemo_sk#13], [hd_demo_sk#14], Inner, BuildRight + : :- *(4) Project [ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12, s_city#3] + : : +- *(4) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight + : : :- *(4) Project [ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_store_sk#15, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12] + : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : : :- *(4) Project [ss_sold_date_sk#17, ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_store_sk#15, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12] + : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) && isnotnull(ss_hdemo_sk#13)) && isnotnull(ss_customer_sk#8)) + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#8,ss_hdemo_sk#13,ss_addr_sk#10,ss_store_sk#15,ss_ticket_number#6,ss_coupon_amt#11,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [s_store_sk#16, s_city#3] + : : +- *(2) Filter (((isnotnull(s_number_employees#21) && (s_number_employees#21 >= 200)) && (s_number_employees#21 <= 295)) && isnotnull(s_store_sk#16)) + : : +- *(2) FileScan parquet default.store[s_store_sk#16,s_number_employees#21,s_city#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_num..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [hd_demo_sk#14] + : +- *(3) Filter (((hd_dep_count#22 = 6) || (hd_vehicle_count#23 > 2)) && isnotnull(hd_demo_sk#14)) + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#14,hd_dep_count#22,hd_vehicle_count#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] + +- *(5) Filter isnotnull(c_customer_sk#9) + +- *(5) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt index 5b6177488..01ce92730 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt @@ -1,50 +1,42 @@ -TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1, 30),ss_ticket_number,amt] - WholeStageCodegen (6) - Project [c_last_name,c_first_name,s_city,ss_ticket_number,amt,profit] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,sum,sum] [sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),amt,profit,sum,sum] +TakeOrderedAndProject [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number,substring(s_city, 1, 30)] + WholeStageCodegen + Project [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + HashAggregate [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] [amt,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] InputAdapter - Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city] #1 - WholeStageCodegen (4) - HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,ss_coupon_amt,ss_net_profit] [sum,sum,sum,sum] - Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + Exchange [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 + WholeStageCodegen + HashAggregate [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number,sum,sum,sum,sum] [sum,sum,sum,sum] + Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_dow,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_dow] + Filter [d_date_sk,d_dow,d_year] + Scan parquet default.date_dim [d_date_sk,d_dow,d_year] [d_date_sk,d_dow,d_year] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Project [s_store_sk,s_city] + WholeStageCodegen + Project [s_city,s_store_sk] Filter [s_number_employees,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_number_employees,s_city] + Scan parquet default.store [s_city,s_number_employees,s_store_sk] [s_city,s_number_employees,s_store_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #5 - WholeStageCodegen (5) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt index e9f9a92a0..482275fd7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt @@ -1,272 +1,45 @@ == Physical Plan == -TakeOrderedAndProject (47) -+- * HashAggregate (46) - +- Exchange (45) - +- * HashAggregate (44) - +- * Project (43) - +- * BroadcastHashJoin Inner BuildRight (42) - :- * Project (16) - : +- * BroadcastHashJoin Inner BuildRight (15) - : :- * Project (10) - : : +- * BroadcastHashJoin Inner BuildRight (9) - : : :- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.store_sales (1) - : : +- BroadcastExchange (8) - : : +- * Project (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.date_dim (4) - : +- BroadcastExchange (14) - : +- * Filter (13) - : +- * ColumnarToRow (12) - : +- Scan parquet default.store (11) - +- BroadcastExchange (41) - +- * HashAggregate (40) - +- Exchange (39) - +- * HashAggregate (38) - +- * Project (37) - +- * BroadcastHashJoin LeftSemi BuildRight (36) - :- * Filter (19) - : +- * ColumnarToRow (18) - : +- Scan parquet default.customer_address (17) - +- BroadcastExchange (35) - +- * Project (34) - +- * Filter (33) - +- * HashAggregate (32) - +- Exchange (31) - +- * HashAggregate (30) - +- * Project (29) - +- * BroadcastHashJoin Inner BuildRight (28) - :- * Filter (22) - : +- * ColumnarToRow (21) - : +- Scan parquet default.customer_address (20) - +- BroadcastExchange (27) - +- * Project (26) - +- * Filter (25) - +- * ColumnarToRow (24) - +- Scan parquet default.customer (23) - - -(1) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 8] -Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] - -(3) Filter [codegen id : 8] -Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] -Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] -Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 1998)) AND isnotnull(d_date_sk#4)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#4] -Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] - -(8) BroadcastExchange -Input [1]: [d_date_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#4] -Join condition: None - -(10) Project [codegen id : 8] -Output [2]: [ss_store_sk#2, ss_net_profit#3] -Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] - -(11) Scan parquet default.store -Output [3]: [s_store_sk#8, s_store_name#9, s_zip#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] - -(13) Filter [codegen id : 2] -Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] -Condition : (isnotnull(s_store_sk#8) AND isnotnull(s_zip#10)) - -(14) BroadcastExchange -Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] - -(15) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#8] -Join condition: None - -(16) Project [codegen id : 8] -Output [3]: [ss_net_profit#3, s_store_name#9, s_zip#10] -Input [5]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#8, s_store_name#9, s_zip#10] - -(17) Scan parquet default.customer_address -Output [1]: [ca_zip#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 6] -Input [1]: [ca_zip#12] - -(19) Filter [codegen id : 6] -Input [1]: [ca_zip#12] -Condition : (substr(ca_zip#12, 1, 5) INSET (56910,69952,63792,39371,74351,11101,25003,97189,57834,73134,62377,51200,32754,22752,86379,14171,91110,40162,98569,28709,13394,66162,25733,25782,26065,18383,51949,87343,50298,83849,33786,64528,23470,67030,46136,25280,46820,77721,99076,18426,31880,17871,98235,45748,49156,18652,72013,51622,43848,78567,41248,13695,44165,67853,54917,53179,64034,10567,71791,68908,55565,59402,64147,85816,57855,61547,27700,68100,28810,58263,15723,83933,51103,58058,90578,82276,81096,81426,96451,77556,38607,76638,18906,62971,57047,48425,35576,11928,30625,83444,73520,51650,57647,60099,30122,94983,24128,10445,41368,26233,26859,21756,24676,19849,36420,38193,58470,39127,13595,87501,24317,15455,69399,98025,81019,48033,11376,39516,67875,92712,14867,38122,29741,42961,30469,51211,56458,15559,16021,33123,33282,33515,72823,54601,76698,56240,72175,60279,20004,68806,72325,28488,43933,50412,45200,22246,78668,79777,96765,67301,73273,49448,82636,23932,47305,29839,39192,18799,61265,37125,58943,64457,88424,24610,84935,89360,68893,30431,28898,10336,90257,59166,46081,26105,96888,36634,86284,35258,39972,22927,73241,53268,24206,27385,99543,31671,14663,30903,39861,24996,63089,88086,83921,21076,67897,66708,45721,60576,25103,52867,30450,36233,30010,96576,73171,56571,56575,64544,13955,78451,43285,18119,16725,83041,76107,79994,54364,35942,56691,19769,63435,34102,18845,22744,13354,75691,45549,23968,31387,83144,13375,15765,28577,88190,19736,73650,37930,25989,83926,94898,51798,39736,22437,55253,38415,71256,18376,42029,25858,44438,19515,38935,51649,71954,15882,18767,63193,25486,49130,37126,40604,34425,17043,12305,11634,26653,94167,36446,10516,67473,66864,72425,63981,18842,22461,42666,47770,69035,70372,28587,45266,15371,15798,45375,90225,16807,31016,68014,21337,19505,50016,10144,84093,21286,19430,34322,91068,94945,72305,24671,58048,65084,28545,21195,20548,22245,77191,96976,48583,76231,15734,61810,11356,68621,68786,98359,41367,26689,69913,76614,68101,88885,50308,79077,18270,28915,29178,53672,62878,10390,14922,68341,56529,41766,68309,56616,15126,61860,97789,11489,45692,41918,72151,72550,27156,36495,70738,17879,53535,17920,68880,78890,35850,14089,58078,65164,27068,26231,13376,57665,32213,77610,87816,21309,15146,86198,91137,55307,67467,40558,94627,82136,22351,89091,20260,23006,91393,47537,62496,98294,18840,71286,81312,31029,70466,35458,14060,22685,28286,25631,19512,40081,63837,14328,35474,22152,76232,51061,86057,17183) AND isnotnull(substr(ca_zip#12, 1, 5))) - -(20) Scan parquet default.customer_address -Output [2]: [ca_address_sk#13, ca_zip#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk)] -ReadSchema: struct - -(21) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#13, ca_zip#12] - -(22) Filter [codegen id : 4] -Input [2]: [ca_address_sk#13, ca_zip#12] -Condition : isnotnull(ca_address_sk#13) - -(23) Scan parquet default.customer -Output [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 3] -Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] - -(25) Filter [codegen id : 3] -Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] -Condition : ((isnotnull(c_preferred_cust_flag#15) AND (c_preferred_cust_flag#15 = Y)) AND isnotnull(c_current_addr_sk#14)) - -(26) Project [codegen id : 3] -Output [1]: [c_current_addr_sk#14] -Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] - -(27) BroadcastExchange -Input [1]: [c_current_addr_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(28) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ca_address_sk#13] -Right keys [1]: [c_current_addr_sk#14] -Join condition: None - -(29) Project [codegen id : 4] -Output [1]: [ca_zip#12] -Input [3]: [ca_address_sk#13, ca_zip#12, c_current_addr_sk#14] - -(30) HashAggregate [codegen id : 4] -Input [1]: [ca_zip#12] -Keys [1]: [ca_zip#12] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#17] -Results [2]: [ca_zip#12, count#18] - -(31) Exchange -Input [2]: [ca_zip#12, count#18] -Arguments: hashpartitioning(ca_zip#12, 5), true, [id=#19] - -(32) HashAggregate [codegen id : 5] -Input [2]: [ca_zip#12, count#18] -Keys [1]: [ca_zip#12] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#20] -Results [2]: [substr(ca_zip#12, 1, 5) AS ca_zip#21, count(1)#20 AS count(1)#22] - -(33) Filter [codegen id : 5] -Input [2]: [ca_zip#21, count(1)#22] -Condition : (count(1)#22 > 10) - -(34) Project [codegen id : 5] -Output [1]: [ca_zip#21] -Input [2]: [ca_zip#21, count(1)#22] - -(35) BroadcastExchange -Input [1]: [ca_zip#21] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [id=#23] - -(36) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [coalesce(substr(ca_zip#12, 1, 5), ), isnull(substr(ca_zip#12, 1, 5))] -Right keys [2]: [coalesce(ca_zip#21, ), isnull(ca_zip#21)] -Join condition: None - -(37) Project [codegen id : 6] -Output [1]: [substr(ca_zip#12, 1, 5) AS ca_zip#24] -Input [1]: [ca_zip#12] - -(38) HashAggregate [codegen id : 6] -Input [1]: [ca_zip#24] -Keys [1]: [ca_zip#24] -Functions: [] -Aggregate Attributes: [] -Results [1]: [ca_zip#24] - -(39) Exchange -Input [1]: [ca_zip#24] -Arguments: hashpartitioning(ca_zip#24, 5), true, [id=#25] - -(40) HashAggregate [codegen id : 7] -Input [1]: [ca_zip#24] -Keys [1]: [ca_zip#24] -Functions: [] -Aggregate Attributes: [] -Results [1]: [ca_zip#24] - -(41) BroadcastExchange -Input [1]: [ca_zip#24] -Arguments: HashedRelationBroadcastMode(List(substr(input[0, string, true], 1, 2)),false), [id=#26] - -(42) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [substr(s_zip#10, 1, 2)] -Right keys [1]: [substr(ca_zip#24, 1, 2)] -Join condition: None - -(43) Project [codegen id : 8] -Output [2]: [ss_net_profit#3, s_store_name#9] -Input [4]: [ss_net_profit#3, s_store_name#9, s_zip#10, ca_zip#24] - -(44) HashAggregate [codegen id : 8] -Input [2]: [ss_net_profit#3, s_store_name#9] -Keys [1]: [s_store_name#9] -Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [sum#27] -Results [2]: [s_store_name#9, sum#28] - -(45) Exchange -Input [2]: [s_store_name#9, sum#28] -Arguments: hashpartitioning(s_store_name#9, 5), true, [id=#29] - -(46) HashAggregate [codegen id : 9] -Input [2]: [s_store_name#9, sum#28] -Keys [1]: [s_store_name#9] -Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#30] -Results [2]: [s_store_name#9, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#30,17,2) AS sum(ss_net_profit)#31] - -(47) TakeOrderedAndProject -Input [2]: [s_store_name#9, sum(ss_net_profit)#31] -Arguments: 100, [s_store_name#9 ASC NULLS FIRST], [s_store_name#9, sum(ss_net_profit)#31] - +TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST], output=[s_store_name#1,sum(ss_net_profit)#2]) ++- *(9) HashAggregate(keys=[s_store_name#1], functions=[sum(UnscaledValue(ss_net_profit#3))]) + +- Exchange hashpartitioning(s_store_name#1, 5) + +- *(8) HashAggregate(keys=[s_store_name#1], functions=[partial_sum(UnscaledValue(ss_net_profit#3))]) + +- *(8) Project [ss_net_profit#3, s_store_name#1] + +- *(8) BroadcastHashJoin [substring(s_zip#4, 1, 2)], [substring(ca_zip#5, 1, 2)], Inner, BuildRight + :- *(8) Project [ss_net_profit#3, s_store_name#1, s_zip#4] + : +- *(8) BroadcastHashJoin [ss_store_sk#6], [s_store_sk#7], Inner, BuildRight + : :- *(8) Project [ss_store_sk#6, ss_net_profit#3] + : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight + : : :- *(8) Project [ss_sold_date_sk#8, ss_store_sk#6, ss_net_profit#3] + : : : +- *(8) Filter (isnotnull(ss_sold_date_sk#8) && isnotnull(ss_store_sk#6)) + : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#8,ss_store_sk#6,ss_net_profit#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [d_date_sk#9] + : : +- *(1) Filter ((((isnotnull(d_qoy#10) && isnotnull(d_year#11)) && (d_qoy#10 = 2)) && (d_year#11 = 1998)) && isnotnull(d_date_sk#9)) + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#9,d_year#11,d_qoy#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [s_store_sk#7, s_store_name#1, s_zip#4] + : +- *(2) Filter (isnotnull(s_store_sk#7) && isnotnull(s_zip#4)) + : +- *(2) FileScan parquet default.store[s_store_sk#7,s_store_name#1,s_zip#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(substring(input[0, string, true], 1, 2))) + +- *(7) HashAggregate(keys=[ca_zip#5], functions=[]) + +- Exchange hashpartitioning(ca_zip#5, 5) + +- *(6) HashAggregate(keys=[ca_zip#5], functions=[]) + +- *(6) BroadcastHashJoin [coalesce(ca_zip#5, )], [coalesce(ca_zip#12, )], LeftSemi, BuildRight, (ca_zip#5 <=> ca_zip#12) + :- *(6) Project [substring(ca_zip#13, 1, 5) AS ca_zip#5] + : +- *(6) Filter (substring(ca_zip#13, 1, 5) INSET (56910,69952,63792,39371,74351,11101,25003,97189,57834,73134,62377,51200,32754,22752,86379,14171,91110,40162,98569,28709,13394,66162,25733,25782,26065,18383,51949,87343,50298,83849,33786,64528,23470,67030,46136,25280,46820,77721,99076,18426,31880,17871,98235,45748,49156,18652,72013,51622,43848,78567,41248,13695,44165,67853,54917,53179,64034,10567,71791,68908,55565,59402,64147,85816,57855,61547,27700,68100,28810,58263,15723,83933,51103,58058,90578,82276,81096,81426,96451,77556,38607,76638,18906,62971,57047,48425,35576,11928,30625,83444,73520,51650,57647,60099,30122,94983,24128,10445,41368,26233,26859,21756,24676,19849,36420,38193,58470,39127,13595,87501,24317,15455,69399,98025,81019,48033,11376,39516,67875,92712,14867,38122,29741,42961,30469,51211,56458,15559,16021,33123,33282,33515,72823,54601,76698,56240,72175,60279,20004,68806,72325,28488,43933,50412,45200,22246,78668,79777,96765,67301,73273,49448,82636,23932,47305,29839,39192,18799,61265,37125,58943,64457,88424,24610,84935,89360,68893,30431,28898,10336,90257,59166,46081,26105,96888,36634,86284,35258,39972,22927,73241,53268,24206,27385,99543,31671,14663,30903,39861,24996,63089,88086,83921,21076,67897,66708,45721,60576,25103,52867,30450,36233,30010,96576,73171,56571,56575,64544,13955,78451,43285,18119,16725,83041,76107,79994,54364,35942,56691,19769,63435,34102,18845,22744,13354,75691,45549,23968,31387,83144,13375,15765,28577,88190,19736,73650,37930,25989,83926,94898,51798,39736,22437,55253,38415,71256,18376,42029,25858,44438,19515,38935,51649,71954,15882,18767,63193,25486,49130,37126,40604,34425,17043,12305,11634,26653,94167,36446,10516,67473,66864,72425,63981,18842,22461,42666,47770,69035,70372,28587,45266,15371,15798,45375,90225,16807,31016,68014,21337,19505,50016,10144,84093,21286,19430,34322,91068,94945,72305,24671,58048,65084,28545,21195,20548,22245,77191,96976,48583,76231,15734,61810,11356,68621,68786,98359,41367,26689,69913,76614,68101,88885,50308,79077,18270,28915,29178,53672,62878,10390,14922,68341,56529,41766,68309,56616,15126,61860,97789,11489,45692,41918,72151,72550,27156,36495,70738,17879,53535,17920,68880,78890,35850,14089,58078,65164,27068,26231,13376,57665,32213,77610,87816,21309,15146,86198,91137,55307,67467,40558,94627,82136,22351,89091,20260,23006,91393,47537,62496,98294,18840,71286,81312,31029,70466,35458,14060,22685,28286,25631,19512,40081,63837,14328,35474,22152,76232,51061,86057,17183) && isnotnull(substring(ca_zip#13, 1, 5))) + : +- *(6) FileScan parquet default.customer_address[ca_zip#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ))) + +- *(5) Project [ca_zip#12] + +- *(5) Filter (count(1)#14 > 10) + +- *(5) HashAggregate(keys=[ca_zip#13], functions=[count(1)]) + +- Exchange hashpartitioning(ca_zip#13, 5) + +- *(4) HashAggregate(keys=[ca_zip#13], functions=[partial_count(1)]) + +- *(4) Project [ca_zip#13] + +- *(4) BroadcastHashJoin [ca_address_sk#15], [c_current_addr_sk#16], Inner, BuildRight + :- *(4) Project [ca_address_sk#15, ca_zip#13] + : +- *(4) Filter isnotnull(ca_address_sk#15) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#15,ca_zip#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [c_current_addr_sk#16] + +- *(3) Filter ((isnotnull(c_preferred_cust_flag#17) && (c_preferred_cust_flag#17 = Y)) && isnotnull(c_current_addr_sk#16)) + +- *(3) FileScan parquet default.customer[c_current_addr_sk#16,c_preferred_cust_flag#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt index cc62907dc..fd00e42e4 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt @@ -1,70 +1,61 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] - WholeStageCodegen (9) - HashAggregate [s_store_name,sum] [sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit),sum] + WholeStageCodegen + HashAggregate [s_store_name,sum,sum(UnscaledValue(ss_net_profit))] [sum,sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit)] InputAdapter Exchange [s_store_name] #1 - WholeStageCodegen (8) - HashAggregate [s_store_name,ss_net_profit] [sum,sum] - Project [ss_net_profit,s_store_name] - BroadcastHashJoin [s_zip,ca_zip] - Project [ss_net_profit,s_store_name,s_zip] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_store_sk,ss_net_profit] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] + WholeStageCodegen + HashAggregate [s_store_name,ss_net_profit,sum,sum] [sum,sum] + Project [s_store_name,ss_net_profit] + BroadcastHashJoin [ca_zip,s_zip] + Project [s_store_name,s_zip,ss_net_profit] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [ss_net_profit,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] + Filter [ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_qoy,d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] + Filter [d_date_sk,d_qoy,d_year] + Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [s_store_sk,s_zip] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name,s_zip] + WholeStageCodegen + Project [s_store_name,s_store_sk,s_zip] + Filter [s_store_sk,s_zip] + Scan parquet default.store [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] InputAdapter BroadcastExchange #4 - WholeStageCodegen (7) + WholeStageCodegen HashAggregate [ca_zip] InputAdapter Exchange [ca_zip] #5 - WholeStageCodegen (6) + WholeStageCodegen HashAggregate [ca_zip] - Project [ca_zip] - BroadcastHashJoin [ca_zip,ca_zip] + BroadcastHashJoin [ca_zip,ca_zip] + Project [ca_zip] Filter [ca_zip] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_zip] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (5) - Project [ca_zip] - Filter [count(1)] - HashAggregate [ca_zip,count] [count(1),ca_zip,count(1),count] - InputAdapter - Exchange [ca_zip] #7 - WholeStageCodegen (4) - HashAggregate [ca_zip] [count,count] - Project [ca_zip] - BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Scan parquet default.customer_address [ca_zip] [ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen + Project [ca_zip] + Filter [count(1)] + HashAggregate [ca_zip,count,count(1)] [ca_zip,count,count(1),count(1)] + InputAdapter + Exchange [ca_zip] #7 + WholeStageCodegen + HashAggregate [ca_zip,count,count] [count,count] + Project [ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk] + Project [ca_address_sk,ca_zip] Filter [ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_zip] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen (3) - Project [c_current_addr_sk] - Filter [c_preferred_cust_flag,c_current_addr_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag] + Scan parquet default.customer_address [ca_address_sk,ca_zip] [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen + Project [c_current_addr_sk] + Filter [c_current_addr_sk,c_preferred_cust_flag] + Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag] [c_current_addr_sk,c_preferred_cust_flag] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt index 36b045bfd..41613515c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt @@ -1,553 +1,97 @@ == Physical Plan == -TakeOrderedAndProject (99) -+- * HashAggregate (98) - +- Exchange (97) - +- * HashAggregate (96) - +- * Expand (95) - +- Union (94) - :- * HashAggregate (39) - : +- Exchange (38) - : +- * HashAggregate (37) - : +- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) - : :- * Project (29) - : : +- * BroadcastHashJoin Inner BuildRight (28) - : : :- * Project (22) - : : : +- * BroadcastHashJoin Inner BuildRight (21) - : : : :- * Project (16) - : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : :- * Project (9) - : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.store_sales (1) - : : : : : +- BroadcastExchange (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.store_returns (4) - : : : : +- BroadcastExchange (14) - : : : : +- * Project (13) - : : : : +- * Filter (12) - : : : : +- * ColumnarToRow (11) - : : : : +- Scan parquet default.date_dim (10) - : : : +- BroadcastExchange (20) - : : : +- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.store (17) - : : +- BroadcastExchange (27) - : : +- * Project (26) - : : +- * Filter (25) - : : +- * ColumnarToRow (24) - : : +- Scan parquet default.item (23) - : +- BroadcastExchange (34) - : +- * Project (33) - : +- * Filter (32) - : +- * ColumnarToRow (31) - : +- Scan parquet default.promotion (30) - :- * HashAggregate (66) - : +- Exchange (65) - : +- * HashAggregate (64) - : +- * Project (63) - : +- * BroadcastHashJoin Inner BuildRight (62) - : :- * Project (60) - : : +- * BroadcastHashJoin Inner BuildRight (59) - : : :- * Project (57) - : : : +- * BroadcastHashJoin Inner BuildRight (56) - : : : :- * Project (51) - : : : : +- * BroadcastHashJoin Inner BuildRight (50) - : : : : :- * Project (48) - : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (47) - : : : : : :- * Filter (42) - : : : : : : +- * ColumnarToRow (41) - : : : : : : +- Scan parquet default.catalog_sales (40) - : : : : : +- BroadcastExchange (46) - : : : : : +- * Filter (45) - : : : : : +- * ColumnarToRow (44) - : : : : : +- Scan parquet default.catalog_returns (43) - : : : : +- ReusedExchange (49) - : : : +- BroadcastExchange (55) - : : : +- * Filter (54) - : : : +- * ColumnarToRow (53) - : : : +- Scan parquet default.catalog_page (52) - : : +- ReusedExchange (58) - : +- ReusedExchange (61) - +- * HashAggregate (93) - +- Exchange (92) - +- * HashAggregate (91) - +- * Project (90) - +- * BroadcastHashJoin Inner BuildRight (89) - :- * Project (87) - : +- * BroadcastHashJoin Inner BuildRight (86) - : :- * Project (84) - : : +- * BroadcastHashJoin Inner BuildRight (83) - : : :- * Project (78) - : : : +- * BroadcastHashJoin Inner BuildRight (77) - : : : :- * Project (75) - : : : : +- * BroadcastHashJoin LeftOuter BuildRight (74) - : : : : :- * Filter (69) - : : : : : +- * ColumnarToRow (68) - : : : : : +- Scan parquet default.web_sales (67) - : : : : +- BroadcastExchange (73) - : : : : +- * Filter (72) - : : : : +- * ColumnarToRow (71) - : : : : +- Scan parquet default.web_returns (70) - : : : +- ReusedExchange (76) - : : +- BroadcastExchange (82) - : : +- * Filter (81) - : : +- * ColumnarToRow (80) - : : +- Scan parquet default.web_site (79) - : +- ReusedExchange (85) - +- ReusedExchange (88) - - -(1) Scan parquet default.store_sales -Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 6] -Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] - -(3) Filter [codegen id : 6] -Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] -Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) - -(4) Scan parquet default.store_returns -Output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] - -(6) Filter [codegen id : 1] -Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] -Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) - -(7) BroadcastExchange -Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#12] - -(8) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] -Right keys [2]: [sr_item_sk#8, sr_ticket_number#9] -Join condition: None - -(9) Project [codegen id : 6] -Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11] -Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] - -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_date#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#13, d_date#14] - -(12) Filter [codegen id : 2] -Input [2]: [d_date_sk#13, d_date#14] -Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 11192)) AND (d_date#14 <= 11222)) AND isnotnull(d_date_sk#13)) - -(13) Project [codegen id : 2] -Output [1]: [d_date_sk#13] -Input [2]: [d_date_sk#13, d_date#14] - -(14) BroadcastExchange -Input [1]: [d_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] - -(15) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#13] -Join condition: None - -(16) Project [codegen id : 6] -Output [7]: [ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11] -Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] - -(17) Scan parquet default.store -Output [2]: [s_store_sk#16, s_store_id#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#16, s_store_id#17] - -(19) Filter [codegen id : 3] -Input [2]: [s_store_sk#16, s_store_id#17] -Condition : isnotnull(s_store_sk#16) - -(20) BroadcastExchange -Input [2]: [s_store_sk#16, s_store_id#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] - -(21) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] -Join condition: None - -(22) Project [codegen id : 6] -Output [7]: [ss_item_sk#2, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] -Input [9]: [ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_sk#16, s_store_id#17] - -(23) Scan parquet default.item -Output [2]: [i_item_sk#19, i_current_price#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#19, i_current_price#20] - -(25) Filter [codegen id : 4] -Input [2]: [i_item_sk#19, i_current_price#20] -Condition : ((isnotnull(i_current_price#20) AND (i_current_price#20 > 50.00)) AND isnotnull(i_item_sk#19)) - -(26) Project [codegen id : 4] -Output [1]: [i_item_sk#19] -Input [2]: [i_item_sk#19, i_current_price#20] - -(27) BroadcastExchange -Input [1]: [i_item_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] - -(28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#19] -Join condition: None - -(29) Project [codegen id : 6] -Output [6]: [ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] -Input [8]: [ss_item_sk#2, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17, i_item_sk#19] - -(30) Scan parquet default.promotion -Output [2]: [p_promo_sk#22, p_channel_tv#23] -Batched: true -Location [not included in comparison]/{warehouse_dir}/promotion] -PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] -ReadSchema: struct - -(31) ColumnarToRow [codegen id : 5] -Input [2]: [p_promo_sk#22, p_channel_tv#23] - -(32) Filter [codegen id : 5] -Input [2]: [p_promo_sk#22, p_channel_tv#23] -Condition : ((isnotnull(p_channel_tv#23) AND (p_channel_tv#23 = N)) AND isnotnull(p_promo_sk#22)) - -(33) Project [codegen id : 5] -Output [1]: [p_promo_sk#22] -Input [2]: [p_promo_sk#22, p_channel_tv#23] - -(34) BroadcastExchange -Input [1]: [p_promo_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] - -(35) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_promo_sk#4] -Right keys [1]: [p_promo_sk#22] -Join condition: None - -(36) Project [codegen id : 6] -Output [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] -Input [7]: [ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17, p_promo_sk#22] - -(37) HashAggregate [codegen id : 6] -Input [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] -Keys [1]: [s_store_id#17] -Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] -Aggregate Attributes [5]: [sum#25, sum#26, isEmpty#27, sum#28, isEmpty#29] -Results [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] - -(38) Exchange -Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] -Arguments: hashpartitioning(s_store_id#17, 5), true, [id=#35] - -(39) HashAggregate [codegen id : 7] -Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] -Keys [1]: [s_store_id#17] -Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#36, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#37, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#38] -Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#36,17,2) AS sales#39, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#37 AS returns#40, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#38 AS profit#41, store channel AS channel#42, concat(store, s_store_id#17) AS id#43] - -(40) Scan parquet default.catalog_sales -Output [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] -ReadSchema: struct - -(41) ColumnarToRow [codegen id : 13] -Input [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] - -(42) Filter [codegen id : 13] -Input [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] -Condition : (((isnotnull(cs_sold_date_sk#44) AND isnotnull(cs_catalog_page_sk#45)) AND isnotnull(cs_item_sk#46)) AND isnotnull(cs_promo_sk#47)) - -(43) Scan parquet default.catalog_returns -Output [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] -ReadSchema: struct - -(44) ColumnarToRow [codegen id : 8] -Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] - -(45) Filter [codegen id : 8] -Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] -Condition : (isnotnull(cr_item_sk#51) AND isnotnull(cr_order_number#52)) - -(46) BroadcastExchange -Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#55] - -(47) BroadcastHashJoin [codegen id : 13] -Left keys [2]: [cs_item_sk#46, cs_order_number#48] -Right keys [2]: [cr_item_sk#51, cr_order_number#52] -Join condition: None - -(48) Project [codegen id : 13] -Output [8]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54] -Input [11]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] - -(49) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#13] - -(50) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cs_sold_date_sk#44] -Right keys [1]: [d_date_sk#13] -Join condition: None - -(51) Project [codegen id : 13] -Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54] -Input [9]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, d_date_sk#13] - -(52) Scan parquet default.catalog_page -Output [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_page] -PushedFilters: [IsNotNull(cp_catalog_page_sk)] -ReadSchema: struct - -(53) ColumnarToRow [codegen id : 10] -Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] - -(54) Filter [codegen id : 10] -Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] -Condition : isnotnull(cp_catalog_page_sk#56) - -(55) BroadcastExchange -Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#58] - -(56) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cs_catalog_page_sk#45] -Right keys [1]: [cp_catalog_page_sk#56] -Join condition: None - -(57) Project [codegen id : 13] -Output [7]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] -Input [9]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_sk#56, cp_catalog_page_id#57] - -(58) ReusedExchange [Reuses operator id: 27] -Output [1]: [i_item_sk#19] - -(59) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cs_item_sk#46] -Right keys [1]: [i_item_sk#19] -Join condition: None - -(60) Project [codegen id : 13] -Output [6]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] -Input [8]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57, i_item_sk#19] - -(61) ReusedExchange [Reuses operator id: 34] -Output [1]: [p_promo_sk#22] - -(62) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cs_promo_sk#47] -Right keys [1]: [p_promo_sk#22] -Join condition: None - -(63) Project [codegen id : 13] -Output [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] -Input [7]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57, p_promo_sk#22] - -(64) HashAggregate [codegen id : 13] -Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] -Keys [1]: [cp_catalog_page_id#57] -Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] -Aggregate Attributes [5]: [sum#59, sum#60, isEmpty#61, sum#62, isEmpty#63] -Results [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] - -(65) Exchange -Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] -Arguments: hashpartitioning(cp_catalog_page_id#57, 5), true, [id=#69] - -(66) HashAggregate [codegen id : 14] -Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] -Keys [1]: [cp_catalog_page_id#57] -Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] -Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#70, sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00))#71, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#72] -Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#70,17,2) AS sales#73, sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00))#71 AS returns#74, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#72 AS profit#75, catalog channel AS channel#76, concat(catalog_page, cp_catalog_page_id#57) AS id#77] - -(67) Scan parquet default.web_sales -Output [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] -ReadSchema: struct - -(68) ColumnarToRow [codegen id : 20] -Input [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] - -(69) Filter [codegen id : 20] -Input [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] -Condition : (((isnotnull(ws_sold_date_sk#78) AND isnotnull(ws_web_site_sk#80)) AND isnotnull(ws_item_sk#79)) AND isnotnull(ws_promo_sk#81)) - -(70) Scan parquet default.web_returns -Output [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] -ReadSchema: struct - -(71) ColumnarToRow [codegen id : 15] -Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] - -(72) Filter [codegen id : 15] -Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] -Condition : (isnotnull(wr_item_sk#85) AND isnotnull(wr_order_number#86)) - -(73) BroadcastExchange -Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#89] - -(74) BroadcastHashJoin [codegen id : 20] -Left keys [2]: [cast(ws_item_sk#79 as bigint), cast(ws_order_number#82 as bigint)] -Right keys [2]: [wr_item_sk#85, wr_order_number#86] -Join condition: None - -(75) Project [codegen id : 20] -Output [8]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88] -Input [11]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84, wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] - -(76) ReusedExchange [Reuses operator id: 14] -Output [1]: [d_date_sk#13] - -(77) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ws_sold_date_sk#78] -Right keys [1]: [d_date_sk#13] -Join condition: None - -(78) Project [codegen id : 20] -Output [7]: [ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88] -Input [9]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, d_date_sk#13] - -(79) Scan parquet default.web_site -Output [2]: [web_site_sk#90, web_site_id#91] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_site] -PushedFilters: [IsNotNull(web_site_sk)] -ReadSchema: struct - -(80) ColumnarToRow [codegen id : 17] -Input [2]: [web_site_sk#90, web_site_id#91] - -(81) Filter [codegen id : 17] -Input [2]: [web_site_sk#90, web_site_id#91] -Condition : isnotnull(web_site_sk#90) - -(82) BroadcastExchange -Input [2]: [web_site_sk#90, web_site_id#91] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#92] - -(83) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ws_web_site_sk#80] -Right keys [1]: [web_site_sk#90] -Join condition: None - -(84) Project [codegen id : 20] -Output [7]: [ws_item_sk#79, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] -Input [9]: [ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_sk#90, web_site_id#91] - -(85) ReusedExchange [Reuses operator id: 27] -Output [1]: [i_item_sk#19] - -(86) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ws_item_sk#79] -Right keys [1]: [i_item_sk#19] -Join condition: None - -(87) Project [codegen id : 20] -Output [6]: [ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] -Input [8]: [ws_item_sk#79, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91, i_item_sk#19] - -(88) ReusedExchange [Reuses operator id: 34] -Output [1]: [p_promo_sk#22] - -(89) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ws_promo_sk#81] -Right keys [1]: [p_promo_sk#22] -Join condition: None - -(90) Project [codegen id : 20] -Output [5]: [ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] -Input [7]: [ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91, p_promo_sk#22] - -(91) HashAggregate [codegen id : 20] -Input [5]: [ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] -Keys [1]: [web_site_id#91] -Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#83)), partial_sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] -Aggregate Attributes [5]: [sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] -Results [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] - -(92) Exchange -Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] -Arguments: hashpartitioning(web_site_id#91, 5), true, [id=#103] - -(93) HashAggregate [codegen id : 21] -Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] -Keys [1]: [web_site_id#91] -Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#83)), sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#83))#104, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#105, sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#106] -Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#83))#104,17,2) AS sales#107, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#105 AS returns#108, sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#106 AS profit#109, web channel AS channel#110, concat(web_site, web_site_id#91) AS id#111] - -(94) Union - -(95) Expand [codegen id : 22] -Input [5]: [sales#39, returns#40, profit#41, channel#42, id#43] -Arguments: [List(sales#39, returns#40, profit#41, channel#42, id#43, 0), List(sales#39, returns#40, profit#41, channel#42, null, 1), List(sales#39, returns#40, profit#41, null, null, 3)], [sales#39, returns#40, profit#41, channel#112, id#113, spark_grouping_id#114] - -(96) HashAggregate [codegen id : 22] -Input [6]: [sales#39, returns#40, profit#41, channel#112, id#113, spark_grouping_id#114] -Keys [3]: [channel#112, id#113, spark_grouping_id#114] -Functions [3]: [partial_sum(sales#39), partial_sum(returns#40), partial_sum(profit#41)] -Aggregate Attributes [6]: [sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] -Results [9]: [channel#112, id#113, spark_grouping_id#114, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] - -(97) Exchange -Input [9]: [channel#112, id#113, spark_grouping_id#114, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] -Arguments: hashpartitioning(channel#112, id#113, spark_grouping_id#114, 5), true, [id=#127] - -(98) HashAggregate [codegen id : 23] -Input [9]: [channel#112, id#113, spark_grouping_id#114, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] -Keys [3]: [channel#112, id#113, spark_grouping_id#114] -Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] -Aggregate Attributes [3]: [sum(sales#39)#128, sum(returns#40)#129, sum(profit#41)#130] -Results [5]: [channel#112, id#113, sum(sales#39)#128 AS sales#131, sum(returns#40)#129 AS returns#132, sum(profit#41)#130 AS profit#133] - -(99) TakeOrderedAndProject -Input [5]: [channel#112, id#113, sales#131, returns#132, profit#133] -Arguments: 100, [channel#112 ASC NULLS FIRST, id#113 ASC NULLS FIRST], [channel#112, id#113, sales#131, returns#132, profit#133] - +TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) ++- *(23) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) + +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) + +- *(22) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) + +- *(22) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] + +- Union + :- *(7) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(ss_ext_sales_price#13)), sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- Exchange hashpartitioning(s_store_id#12, 5) + : +- *(6) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#13)), partial_sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- *(6) Project [ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] + : +- *(6) BroadcastHashJoin [ss_promo_sk#17], [p_promo_sk#18], Inner, BuildRight + : :- *(6) Project [ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] + : : +- *(6) BroadcastHashJoin [ss_item_sk#19], [i_item_sk#20], Inner, BuildRight + : : :- *(6) Project [ss_item_sk#19, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] + : : : +- *(6) BroadcastHashJoin [ss_store_sk#21], [s_store_sk#22], Inner, BuildRight + : : : :- *(6) Project [ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16] + : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight + : : : : :- *(6) Project [ss_sold_date_sk#23, ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16] + : : : : : +- *(6) BroadcastHashJoin [cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#25 as bigint)], [sr_item_sk#26, sr_ticket_number#27], LeftOuter, BuildRight + : : : : : :- *(6) Project [ss_sold_date_sk#23, ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ticket_number#25, ss_ext_sales_price#13, ss_net_profit#15] + : : : : : : +- *(6) Filter (((isnotnull(ss_sold_date_sk#23) && isnotnull(ss_store_sk#21)) && isnotnull(ss_item_sk#19)) && isnotnull(ss_promo_sk#17)) + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#23,ss_item_sk#19,ss_store_sk#21,ss_promo_sk#17,ss_ticket_number#25,ss_ext_sales_price#13,ss_net_profit#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)], ReadSchema: struct= 11192)) && (d_date#28 <= 11222)) && isnotnull(d_date_sk#24)) + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#24,d_date#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), Is..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [s_store_sk#22, s_store_id#12] + : : : +- *(3) Filter isnotnull(s_store_sk#22) + : : : +- *(3) FileScan parquet default.store[s_store_sk#22,s_store_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [i_item_sk#20] + : : +- *(4) Filter ((isnotnull(i_current_price#29) && (i_current_price#29 > 50.00)) && isnotnull(i_item_sk#20)) + : : +- *(4) FileScan parquet default.item[i_item_sk#20,i_current_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [p_promo_sk#18] + : +- *(5) Filter ((isnotnull(p_channel_tv#30) && (p_channel_tv#30 = N)) && isnotnull(p_promo_sk#18)) + : +- *(5) FileScan parquet default.promotion[p_promo_sk#18,p_channel_tv#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)], ReadSchema: struct + :- *(14) HashAggregate(keys=[cp_catalog_page_id#31], functions=[sum(UnscaledValue(cs_ext_sales_price#32)), sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- Exchange hashpartitioning(cp_catalog_page_id#31, 5) + : +- *(13) HashAggregate(keys=[cp_catalog_page_id#31], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#32)), partial_sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + : +- *(13) Project [cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] + : +- *(13) BroadcastHashJoin [cs_promo_sk#36], [p_promo_sk#18], Inner, BuildRight + : :- *(13) Project [cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] + : : +- *(13) BroadcastHashJoin [cs_item_sk#37], [i_item_sk#20], Inner, BuildRight + : : :- *(13) Project [cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] + : : : +- *(13) BroadcastHashJoin [cs_catalog_page_sk#38], [cp_catalog_page_sk#39], Inner, BuildRight + : : : :- *(13) Project [cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35] + : : : : +- *(13) BroadcastHashJoin [cs_sold_date_sk#40], [d_date_sk#24], Inner, BuildRight + : : : : :- *(13) Project [cs_sold_date_sk#40, cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35] + : : : : : +- *(13) BroadcastHashJoin [cs_item_sk#37, cs_order_number#41], [cr_item_sk#42, cr_order_number#43], LeftOuter, BuildRight + : : : : : :- *(13) Project [cs_sold_date_sk#40, cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_order_number#41, cs_ext_sales_price#32, cs_net_profit#34] + : : : : : : +- *(13) Filter (((isnotnull(cs_sold_date_sk#40) && isnotnull(cs_catalog_page_sk#38)) && isnotnull(cs_item_sk#37)) && isnotnull(cs_promo_sk#36)) + : : : : : : +- *(13) FileScan parquet default.catalog_sales[cs_sold_date_sk#40,cs_catalog_page_sk#38,cs_item_sk#37,cs_promo_sk#36,cs_order_number#41,cs_ext_sales_price#32,cs_net_profit#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_p..., ReadSchema: struct + : : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(10) Project [cp_catalog_page_sk#39, cp_catalog_page_id#31] + : : : +- *(10) Filter isnotnull(cp_catalog_page_sk#39) + : : : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#39,cp_catalog_page_id#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct + : : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(21) HashAggregate(keys=[web_site_id#44], functions=[sum(UnscaledValue(ws_ext_sales_price#45)), sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + +- Exchange hashpartitioning(web_site_id#44, 5) + +- *(20) HashAggregate(keys=[web_site_id#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#45)), partial_sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) + +- *(20) Project [ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] + +- *(20) BroadcastHashJoin [ws_promo_sk#49], [p_promo_sk#18], Inner, BuildRight + :- *(20) Project [ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] + : +- *(20) BroadcastHashJoin [ws_item_sk#50], [i_item_sk#20], Inner, BuildRight + : :- *(20) Project [ws_item_sk#50, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] + : : +- *(20) BroadcastHashJoin [ws_web_site_sk#51], [web_site_sk#52], Inner, BuildRight + : : :- *(20) Project [ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48] + : : : +- *(20) BroadcastHashJoin [ws_sold_date_sk#53], [d_date_sk#24], Inner, BuildRight + : : : :- *(20) Project [ws_sold_date_sk#53, ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48] + : : : : +- *(20) BroadcastHashJoin [cast(ws_item_sk#50 as bigint), cast(ws_order_number#54 as bigint)], [wr_item_sk#55, wr_order_number#56], LeftOuter, BuildRight + : : : : :- *(20) Project [ws_sold_date_sk#53, ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_order_number#54, ws_ext_sales_price#45, ws_net_profit#47] + : : : : : +- *(20) Filter (((isnotnull(ws_sold_date_sk#53) && isnotnull(ws_web_site_sk#51)) && isnotnull(ws_item_sk#50)) && isnotnull(ws_promo_sk#49)) + : : : : : +- *(20) FileScan parquet default.web_sales[ws_sold_date_sk#53,ws_item_sk#50,ws_web_site_sk#51,ws_promo_sk#49,ws_order_number#54,ws_ext_sales_price#45,ws_net_profit#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo..., ReadSchema: struct + : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(17) Project [web_site_sk#52, web_site_id#44] + : : +- *(17) Filter isnotnull(web_site_sk#52) + : : +- *(17) FileScan parquet default.web_site[web_site_sk#52,web_site_id#44] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt index 37f46f14e..28b1a009b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt @@ -1,148 +1,133 @@ -TakeOrderedAndProject [channel,id,sales,returns,profit] - WholeStageCodegen (23) - HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] +TakeOrderedAndProject [channel,id,profit,returns,sales] + WholeStageCodegen + HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] InputAdapter Exchange [channel,id,spark_grouping_id] #1 - WholeStageCodegen (22) - HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] - Expand [sales,returns,profit,channel,id] + WholeStageCodegen + HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Expand [channel,id,profit,returns,sales] InputAdapter Union - WholeStageCodegen (7) - HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + WholeStageCodegen + HashAggregate [s_store_id,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] InputAdapter Exchange [s_store_id] #2 - WholeStageCodegen (6) - HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] - Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] - BroadcastHashJoin [ss_promo_sk,p_promo_sk] - Project [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] - BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit] + WholeStageCodegen + HashAggregate [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [p_promo_sk,ss_promo_sk] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_promo_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Filter [ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [sr_item_sk,sr_ticket_number] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] + WholeStageCodegen + Project [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] + Filter [sr_item_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_id] + WholeStageCodegen + Project [s_store_id,s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen (4) + WholeStageCodegen Project [i_item_sk] Filter [i_current_price,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_current_price] + Scan parquet default.item [i_current_price,i_item_sk] [i_current_price,i_item_sk] InputAdapter BroadcastExchange #7 - WholeStageCodegen (5) + WholeStageCodegen Project [p_promo_sk] Filter [p_channel_tv,p_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.promotion [p_promo_sk,p_channel_tv] - WholeStageCodegen (14) - HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + Scan parquet default.promotion [p_channel_tv,p_promo_sk] [p_channel_tv,p_promo_sk] + WholeStageCodegen + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] InputAdapter Exchange [cp_catalog_page_id] #8 - WholeStageCodegen (13) - HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] - Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + WholeStageCodegen + HashAggregate [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit,cs_promo_sk] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] - BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] - Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] + BroadcastHashJoin [cp_catalog_page_sk,cs_catalog_page_sk] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] - BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] - Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit] + Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk,cs_sold_date_sk] + BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] + Project [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] + Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] InputAdapter BroadcastExchange #9 - WholeStageCodegen (8) - Filter [cr_item_sk,cr_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] + WholeStageCodegen + Project [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] + Filter [cr_item_sk,cr_order_number] + Scan parquet default.catalog_returns [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] InputAdapter - ReusedExchange [d_date_sk] #4 + ReusedExchange [d_date_sk] [d_date_sk] #4 InputAdapter BroadcastExchange #10 - WholeStageCodegen (10) - Filter [cp_catalog_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen + Project [cp_catalog_page_id,cp_catalog_page_sk] + Filter [cp_catalog_page_sk] + Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] [cp_catalog_page_id,cp_catalog_page_sk] InputAdapter - ReusedExchange [i_item_sk] #6 + ReusedExchange [i_item_sk] [i_item_sk] #6 InputAdapter - ReusedExchange [p_promo_sk] #7 - WholeStageCodegen (21) - HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] + ReusedExchange [p_promo_sk] [p_promo_sk] #7 + WholeStageCodegen + HashAggregate [sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),web_site_id] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] InputAdapter Exchange [web_site_id] #11 - WholeStageCodegen (20) - HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] - Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] - BroadcastHashJoin [ws_promo_sk,p_promo_sk] - Project [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Project [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] - BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] - BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] - Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit] + WholeStageCodegen + HashAggregate [sum,sum,sum,sum,sum,sum,web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum,sum,sum] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [p_promo_sk,ws_promo_sk] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_promo_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + Filter [ws_item_sk,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] InputAdapter BroadcastExchange #12 - WholeStageCodegen (15) - Filter [wr_item_sk,wr_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] + WholeStageCodegen + Project [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] + Filter [wr_item_sk,wr_order_number] + Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] InputAdapter - ReusedExchange [d_date_sk] #4 + ReusedExchange [d_date_sk] [d_date_sk] #4 InputAdapter BroadcastExchange #13 - WholeStageCodegen (17) - Filter [web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_site_id] + WholeStageCodegen + Project [web_site_id,web_site_sk] + Filter [web_site_sk] + Scan parquet default.web_site [web_site_id,web_site_sk] [web_site_id,web_site_sk] InputAdapter - ReusedExchange [i_item_sk] #6 + ReusedExchange [i_item_sk] [i_item_sk] #6 InputAdapter - ReusedExchange [p_promo_sk] #7 + ReusedExchange [p_promo_sk] [p_promo_sk] #7 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt index 2530fe9e8..93900040c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt @@ -1,298 +1,52 @@ == Physical Plan == -TakeOrderedAndProject (52) -+- * Project (51) - +- * BroadcastHashJoin Inner BuildRight (50) - :- * Project (45) - : +- * BroadcastHashJoin Inner BuildRight (44) - : :- * Project (39) - : : +- * BroadcastHashJoin Inner BuildRight (38) - : : :- * Filter (20) - : : : +- * HashAggregate (19) - : : : +- Exchange (18) - : : : +- * HashAggregate (17) - : : : +- * Project (16) - : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : :- * Project (10) - : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.catalog_returns (1) - : : : : +- BroadcastExchange (8) - : : : : +- * Project (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.date_dim (4) - : : : +- BroadcastExchange (14) - : : : +- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.customer_address (11) - : : +- BroadcastExchange (37) - : : +- * Filter (36) - : : +- * HashAggregate (35) - : : +- Exchange (34) - : : +- * HashAggregate (33) - : : +- * HashAggregate (32) - : : +- Exchange (31) - : : +- * HashAggregate (30) - : : +- * Project (29) - : : +- * BroadcastHashJoin Inner BuildRight (28) - : : :- * Project (26) - : : : +- * BroadcastHashJoin Inner BuildRight (25) - : : : :- * Filter (23) - : : : : +- * ColumnarToRow (22) - : : : : +- Scan parquet default.catalog_returns (21) - : : : +- ReusedExchange (24) - : : +- ReusedExchange (27) - : +- BroadcastExchange (43) - : +- * Filter (42) - : +- * ColumnarToRow (41) - : +- Scan parquet default.customer (40) - +- BroadcastExchange (49) - +- * Filter (48) - +- * ColumnarToRow (47) - +- Scan parquet default.customer_address (46) - - -(1) Scan parquet default.catalog_returns -Output [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] - -(3) Filter [codegen id : 3] -Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] -Condition : ((isnotnull(cr_returned_date_sk#1) AND isnotnull(cr_returning_addr_sk#3)) AND isnotnull(cr_returning_customer_sk#2)) - -(4) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_year#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_year#6] - -(6) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_year#6] -Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_year#6] - -(8) BroadcastExchange -Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [cr_returned_date_sk#1] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(10) Project [codegen id : 3] -Output [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] -Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, d_date_sk#5] - -(11) Scan parquet default.customer_address -Output [2]: [ca_address_sk#8, ca_state#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [ca_address_sk#8, ca_state#9] - -(13) Filter [codegen id : 2] -Input [2]: [ca_address_sk#8, ca_state#9] -Condition : (isnotnull(ca_address_sk#8) AND isnotnull(ca_state#9)) - -(14) BroadcastExchange -Input [2]: [ca_address_sk#8, ca_state#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] - -(15) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [cr_returning_addr_sk#3] -Right keys [1]: [ca_address_sk#8] -Join condition: None - -(16) Project [codegen id : 3] -Output [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] -Input [5]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, ca_address_sk#8, ca_state#9] - -(17) HashAggregate [codegen id : 3] -Input [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] -Keys [2]: [cr_returning_customer_sk#2, ca_state#9] -Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#4))] -Aggregate Attributes [1]: [sum#11] -Results [3]: [cr_returning_customer_sk#2, ca_state#9, sum#12] - -(18) Exchange -Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#12] -Arguments: hashpartitioning(cr_returning_customer_sk#2, ca_state#9, 5), true, [id=#13] - -(19) HashAggregate [codegen id : 11] -Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#12] -Keys [2]: [cr_returning_customer_sk#2, ca_state#9] -Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))#14] -Results [3]: [cr_returning_customer_sk#2 AS ctr_customer_sk#15, ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#4))#14,17,2) AS ctr_total_return#17] - -(20) Filter [codegen id : 11] -Input [3]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17] -Condition : isnotnull(ctr_total_return#17) - -(21) Scan parquet default.catalog_returns -Output [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)] -ReadSchema: struct - -(22) ColumnarToRow [codegen id : 6] -Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] - -(23) Filter [codegen id : 6] -Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] -Condition : (isnotnull(cr_returned_date_sk#1) AND isnotnull(cr_returning_addr_sk#3)) - -(24) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#5] - -(25) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cr_returned_date_sk#1] -Right keys [1]: [d_date_sk#5] -Join condition: None - -(26) Project [codegen id : 6] -Output [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] -Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, d_date_sk#5] - -(27) ReusedExchange [Reuses operator id: 14] -Output [2]: [ca_address_sk#8, ca_state#9] - -(28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cr_returning_addr_sk#3] -Right keys [1]: [ca_address_sk#8] -Join condition: None - -(29) Project [codegen id : 6] -Output [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] -Input [5]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, ca_address_sk#8, ca_state#9] - -(30) HashAggregate [codegen id : 6] -Input [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] -Keys [2]: [cr_returning_customer_sk#2, ca_state#9] -Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#4))] -Aggregate Attributes [1]: [sum#18] -Results [3]: [cr_returning_customer_sk#2, ca_state#9, sum#19] - -(31) Exchange -Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#19] -Arguments: hashpartitioning(cr_returning_customer_sk#2, ca_state#9, 5), true, [id=#20] - -(32) HashAggregate [codegen id : 7] -Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#19] -Keys [2]: [cr_returning_customer_sk#2, ca_state#9] -Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))#21] -Results [2]: [ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#4))#21,17,2) AS ctr_total_return#17] - -(33) HashAggregate [codegen id : 7] -Input [2]: [ctr_state#16, ctr_total_return#17] -Keys [1]: [ctr_state#16] -Functions [1]: [partial_avg(ctr_total_return#17)] -Aggregate Attributes [2]: [sum#22, count#23] -Results [3]: [ctr_state#16, sum#24, count#25] - -(34) Exchange -Input [3]: [ctr_state#16, sum#24, count#25] -Arguments: hashpartitioning(ctr_state#16, 5), true, [id=#26] - -(35) HashAggregate [codegen id : 8] -Input [3]: [ctr_state#16, sum#24, count#25] -Keys [1]: [ctr_state#16] -Functions [1]: [avg(ctr_total_return#17)] -Aggregate Attributes [1]: [avg(ctr_total_return#17)#27] -Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#17)#27) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16 AS ctr_state#16#29] - -(36) Filter [codegen id : 8] -Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] -Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) - -(37) BroadcastExchange -Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#30] - -(38) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ctr_state#16] -Right keys [1]: [ctr_state#16#29] -Join condition: (cast(ctr_total_return#17 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) - -(39) Project [codegen id : 11] -Output [2]: [ctr_customer_sk#15, ctr_total_return#17] -Input [5]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] - -(40) Scan parquet default.customer -Output [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] -ReadSchema: struct - -(41) ColumnarToRow [codegen id : 9] -Input [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] - -(42) Filter [codegen id : 9] -Input [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] -Condition : (isnotnull(c_customer_sk#31) AND isnotnull(c_current_addr_sk#33)) - -(43) BroadcastExchange -Input [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] - -(44) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ctr_customer_sk#15] -Right keys [1]: [c_customer_sk#31] -Join condition: None - -(45) Project [codegen id : 11] -Output [6]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] -Input [8]: [ctr_customer_sk#15, ctr_total_return#17, c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] - -(46) Scan parquet default.customer_address -Output [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(47) ColumnarToRow [codegen id : 10] -Input [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] - -(48) Filter [codegen id : 10] -Input [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] -Condition : ((isnotnull(ca_state#9) AND (ca_state#9 = GA)) AND isnotnull(ca_address_sk#8)) - -(49) BroadcastExchange -Input [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] - -(50) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [c_current_addr_sk#33] -Right keys [1]: [ca_address_sk#8] -Join condition: None - -(51) Project [codegen id : 11] -Output [16]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#17] -Input [18]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] - -(52) TakeOrderedAndProject -Input [16]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#17] -Arguments: 100, [c_customer_id#32 ASC NULLS FIRST, c_salutation#34 ASC NULLS FIRST, c_first_name#35 ASC NULLS FIRST, c_last_name#36 ASC NULLS FIRST, ca_street_number#38 ASC NULLS FIRST, ca_street_name#39 ASC NULLS FIRST, ca_street_type#40 ASC NULLS FIRST, ca_suite_number#41 ASC NULLS FIRST, ca_city#42 ASC NULLS FIRST, ca_county#43 ASC NULLS FIRST, ca_state#9 ASC NULLS FIRST, ca_zip#44 ASC NULLS FIRST, ca_country#45 ASC NULLS FIRST, ca_gmt_offset#46 ASC NULLS FIRST, ca_location_type#47 ASC NULLS FIRST, ctr_total_return#17 ASC NULLS FIRST], [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#17] - +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salutation#2 ASC NULLS FIRST,c_first_name#3 ASC NULLS FIRST,c_last_name#4 ASC NULLS FIRST,ca_street_number#5 ASC NULLS FIRST,ca_street_name#6 ASC NULLS FIRST,ca_street_type#7 ASC NULLS FIRST,ca_suite_number#8 ASC NULLS FIRST,ca_city#9 ASC NULLS FIRST,ca_county#10 ASC NULLS FIRST,ca_state#11 ASC NULLS FIRST,ca_zip#12 ASC NULLS FIRST,ca_country#13 ASC NULLS FIRST,ca_gmt_offset#14 ASC NULLS FIRST,ca_location_type#15 ASC NULLS FIRST,ctr_total_return#16 ASC NULLS FIRST], output=[c_customer_id#1,c_salutation#2,c_first_name#3,c_last_name#4,ca_street_number#5,ca_street_name#6,ca_street_type#7,ca_suite_number#8,ca_city#9,ca_county#10,ca_state#11,ca_zip#12,ca_country#13,ca_gmt_offset#14,ca_location_type#15,ctr_total_return#16]) ++- *(11) Project [c_customer_id#1, c_salutation#2, c_first_name#3, c_last_name#4, ca_street_number#5, ca_street_name#6, ca_street_type#7, ca_suite_number#8, ca_city#9, ca_county#10, ca_state#11, ca_zip#12, ca_country#13, ca_gmt_offset#14, ca_location_type#15, ctr_total_return#16] + +- *(11) BroadcastHashJoin [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight + :- *(11) Project [ctr_total_return#16, c_customer_id#1, c_current_addr_sk#17, c_salutation#2, c_first_name#3, c_last_name#4] + : +- *(11) BroadcastHashJoin [ctr_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + : :- *(11) Project [ctr_customer_sk#19, ctr_total_return#16] + : : +- *(11) BroadcastHashJoin [ctr_state#21], [ctr_state#21#22], Inner, BuildRight, (cast(ctr_total_return#16 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) + : : :- *(11) Filter isnotnull(ctr_total_return#16) + : : : +- *(11) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 5) + : : : +- *(3) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : : +- *(3) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] + : : : +- *(3) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight + : : : :- *(3) Project [cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : : +- *(3) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight + : : : : :- *(3) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : : : +- *(3) Filter ((isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) && isnotnull(cr_returning_customer_sk#24)) + : : : : : +- *(3) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer..., ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [ca_address_sk#18, ca_state#11] + : : : +- *(2) Filter (isnotnull(ca_address_sk#18) && isnotnull(ca_state#11)) + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#18,ca_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) + : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) + : : +- *(8) HashAggregate(keys=[ctr_state#21], functions=[avg(ctr_total_return#16)]) + : : +- Exchange hashpartitioning(ctr_state#21, 5) + : : +- *(7) HashAggregate(keys=[ctr_state#21], functions=[partial_avg(ctr_total_return#16)]) + : : +- *(7) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 5) + : : +- *(6) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) + : : +- *(6) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] + : : +- *(6) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight + : : :- *(6) Project [cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : +- *(6) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight + : : : :- *(6) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] + : : : : +- *(6) Filter (isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) + : : : : +- *(6) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)], ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] - -(3) Filter [codegen id : 4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] -Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 62.00)) AND (cast(i_current_price#4 as decimal(12,2)) <= 92.00)) AND i_manufact_id#5 IN (129,270,821,423)) AND isnotnull(i_item_sk#1)) - -(4) Project [codegen id : 4] -Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] - -(5) Scan parquet default.inventory -Output [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/inventory] -PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] - -(7) Filter [codegen id : 1] -Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] -Condition : ((((isnotnull(inv_quantity_on_hand#8) AND (inv_quantity_on_hand#8 >= 100)) AND (inv_quantity_on_hand#8 <= 500)) AND isnotnull(inv_item_sk#7)) AND isnotnull(inv_date_sk#6)) - -(8) Project [codegen id : 1] -Output [2]: [inv_date_sk#6, inv_item_sk#7] -Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] - -(9) BroadcastExchange -Input [2]: [inv_date_sk#6, inv_item_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#9] - -(10) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_sk#1] -Right keys [1]: [inv_item_sk#7] -Join condition: None - -(11) Project [codegen id : 4] -Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6] -Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, inv_item_sk#7] - -(12) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_date#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), IsNotNull(d_date_sk)] -ReadSchema: struct - -(13) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#10, d_date#11] - -(14) Filter [codegen id : 2] -Input [2]: [d_date_sk#10, d_date#11] -Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 11102)) AND (d_date#11 <= 11162)) AND isnotnull(d_date_sk#10)) - -(15) Project [codegen id : 2] -Output [1]: [d_date_sk#10] -Input [2]: [d_date_sk#10, d_date#11] - -(16) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(17) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [inv_date_sk#6] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(18) Project [codegen id : 4] -Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, d_date_sk#10] - -(19) Scan parquet default.store_sales -Output [1]: [ss_item_sk#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk)] -ReadSchema: struct - -(20) ColumnarToRow [codegen id : 3] -Input [1]: [ss_item_sk#13] - -(21) Filter [codegen id : 3] -Input [1]: [ss_item_sk#13] -Condition : isnotnull(ss_item_sk#13) - -(22) BroadcastExchange -Input [1]: [ss_item_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] - -(23) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#13] -Join condition: None - -(24) Project [codegen id : 4] -Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#13] - -(25) HashAggregate [codegen id : 4] -Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] - -(26) Exchange -Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), true, [id=#15] - -(27) HashAggregate [codegen id : 5] -Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] - -(28) TakeOrderedAndProject -Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] - +TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,i_current_price#3]) ++- *(5) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) + +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, i_current_price#3, 5) + +- *(4) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) + +- *(4) Project [i_item_id#1, i_item_desc#2, i_current_price#3] + +- *(4) BroadcastHashJoin [i_item_sk#4], [ss_item_sk#5], Inner, BuildRight + :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] + : +- *(4) BroadcastHashJoin [inv_date_sk#6], [d_date_sk#7], Inner, BuildRight + : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3, inv_date_sk#6] + : : +- *(4) BroadcastHashJoin [i_item_sk#4], [inv_item_sk#8], Inner, BuildRight + : : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] + : : : +- *(4) Filter ((((isnotnull(i_current_price#3) && (i_current_price#3 >= 62.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 92.00)) && i_manufact_id#9 IN (129,270,821,423)) && isnotnull(i_item_sk#4)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), In(i_manufact_id, [129,27..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) + : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#7] + : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 11102)) && (d_date#11 <= 11162)) && isnotnull(d_date_sk#7)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), Is..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [ss_item_sk#5] + +- *(3) Filter isnotnull(ss_item_sk#5) + +- *(3) FileScan parquet default.store_sales[ss_item_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt index 92f8729ca..e0e54c6c0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt @@ -1,41 +1,34 @@ -TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] - WholeStageCodegen (5) - HashAggregate [i_item_id,i_item_desc,i_current_price] +TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] + WholeStageCodegen + HashAggregate [i_current_price,i_item_desc,i_item_id] InputAdapter - Exchange [i_item_id,i_item_desc,i_current_price] #1 - WholeStageCodegen (4) - HashAggregate [i_item_id,i_item_desc,i_current_price] - Project [i_item_id,i_item_desc,i_current_price] + Exchange [i_current_price,i_item_desc,i_item_id] #1 + WholeStageCodegen + HashAggregate [i_current_price,i_item_desc,i_item_id] + Project [i_current_price,i_item_desc,i_item_id] BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_item_sk,i_item_id,i_item_desc,i_current_price] - BroadcastHashJoin [inv_date_sk,d_date_sk] - Project [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + BroadcastHashJoin [d_date_sk,inv_date_sk] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [i_item_sk,i_item_id,i_item_desc,i_current_price] - Filter [i_current_price,i_manufact_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] + Project [i_current_price,i_item_desc,i_item_id,i_item_sk] + Filter [i_current_price,i_item_sk,i_manufact_id] + Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [inv_date_sk,inv_item_sk] - Filter [inv_quantity_on_hand,inv_item_sk,inv_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_item_sk] + WholeStageCodegen + Project [ss_item_sk] + Filter [ss_item_sk] + Scan parquet default.store_sales [ss_item_sk] [ss_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt index 0d44a0146..8b674b50e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt @@ -1,344 +1,69 @@ == Physical Plan == -TakeOrderedAndProject (61) -+- * Project (60) - +- * BroadcastHashJoin Inner BuildRight (59) - :- * Project (45) - : +- * BroadcastHashJoin Inner BuildRight (44) - : :- * HashAggregate (30) - : : +- Exchange (29) - : : +- * HashAggregate (28) - : : +- * Project (27) - : : +- * BroadcastHashJoin Inner BuildRight (26) - : : :- * Project (9) - : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_returns (1) - : : : +- BroadcastExchange (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.item (4) - : : +- BroadcastExchange (25) - : : +- * Project (24) - : : +- * BroadcastHashJoin LeftSemi BuildRight (23) - : : :- * Filter (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.date_dim (10) - : : +- BroadcastExchange (22) - : : +- * Project (21) - : : +- * BroadcastHashJoin LeftSemi BuildRight (20) - : : :- * ColumnarToRow (14) - : : : +- Scan parquet default.date_dim (13) - : : +- BroadcastExchange (19) - : : +- * Project (18) - : : +- * Filter (17) - : : +- * ColumnarToRow (16) - : : +- Scan parquet default.date_dim (15) - : +- BroadcastExchange (43) - : +- * HashAggregate (42) - : +- Exchange (41) - : +- * HashAggregate (40) - : +- * Project (39) - : +- * BroadcastHashJoin Inner BuildRight (38) - : :- * Project (36) - : : +- * BroadcastHashJoin Inner BuildRight (35) - : : :- * Filter (33) - : : : +- * ColumnarToRow (32) - : : : +- Scan parquet default.catalog_returns (31) - : : +- ReusedExchange (34) - : +- ReusedExchange (37) - +- BroadcastExchange (58) - +- * HashAggregate (57) - +- Exchange (56) - +- * HashAggregate (55) - +- * Project (54) - +- * BroadcastHashJoin Inner BuildRight (53) - :- * Project (51) - : +- * BroadcastHashJoin Inner BuildRight (50) - : :- * Filter (48) - : : +- * ColumnarToRow (47) - : : +- Scan parquet default.web_returns (46) - : +- ReusedExchange (49) - +- ReusedExchange (52) - - -(1) Scan parquet default.store_returns -Output [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] - -(3) Filter [codegen id : 5] -Input [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] -Condition : (isnotnull(sr_item_sk#2) AND isnotnull(sr_returned_date_sk#1)) - -(4) Scan parquet default.item -Output [2]: [i_item_sk#4, i_item_id#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [i_item_sk#4, i_item_id#5] - -(6) Filter [codegen id : 1] -Input [2]: [i_item_sk#4, i_item_id#5] -Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) - -(7) BroadcastExchange -Input [2]: [i_item_sk#4, i_item_id#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] - -(8) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [sr_item_sk#2] -Right keys [1]: [cast(i_item_sk#4 as bigint)] -Join condition: None - -(9) Project [codegen id : 5] -Output [3]: [sr_returned_date_sk#1, sr_return_quantity#3, i_item_id#5] -Input [5]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3, i_item_sk#4, i_item_id#5] - -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_date#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 4] -Input [2]: [d_date_sk#7, d_date#8] - -(12) Filter [codegen id : 4] -Input [2]: [d_date_sk#7, d_date#8] -Condition : isnotnull(d_date_sk#7) - -(13) Scan parquet default.date_dim -Output [2]: [d_date#8, d_week_seq#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -ReadSchema: struct - -(14) ColumnarToRow [codegen id : 3] -Input [2]: [d_date#8, d_week_seq#9] - -(15) Scan parquet default.date_dim -Output [2]: [d_date#8, d_week_seq#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -ReadSchema: struct - -(16) ColumnarToRow [codegen id : 2] -Input [2]: [d_date#8, d_week_seq#9] - -(17) Filter [codegen id : 2] -Input [2]: [d_date#8, d_week_seq#9] -Condition : cast(d_date#8 as string) IN (2000-06-30,2000-09-27,2000-11-17) - -(18) Project [codegen id : 2] -Output [1]: [d_week_seq#9 AS d_week_seq#9#10] -Input [2]: [d_date#8, d_week_seq#9] - -(19) BroadcastExchange -Input [1]: [d_week_seq#9#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] - -(20) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [d_week_seq#9] -Right keys [1]: [d_week_seq#9#10] -Join condition: None - -(21) Project [codegen id : 3] -Output [1]: [d_date#8 AS d_date#8#12] -Input [2]: [d_date#8, d_week_seq#9] - -(22) BroadcastExchange -Input [1]: [d_date#8#12] -Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#13] - -(23) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [d_date#8] -Right keys [1]: [d_date#8#12] -Join condition: None - -(24) Project [codegen id : 4] -Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_date#8] - -(25) BroadcastExchange -Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] - -(26) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [sr_returned_date_sk#1] -Right keys [1]: [cast(d_date_sk#7 as bigint)] -Join condition: None - -(27) Project [codegen id : 5] -Output [2]: [sr_return_quantity#3, i_item_id#5] -Input [4]: [sr_returned_date_sk#1, sr_return_quantity#3, i_item_id#5, d_date_sk#7] - -(28) HashAggregate [codegen id : 5] -Input [2]: [sr_return_quantity#3, i_item_id#5] -Keys [1]: [i_item_id#5] -Functions [1]: [partial_sum(cast(sr_return_quantity#3 as bigint))] -Aggregate Attributes [1]: [sum#15] -Results [2]: [i_item_id#5, sum#16] - -(29) Exchange -Input [2]: [i_item_id#5, sum#16] -Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#17] - -(30) HashAggregate [codegen id : 18] -Input [2]: [i_item_id#5, sum#16] -Keys [1]: [i_item_id#5] -Functions [1]: [sum(cast(sr_return_quantity#3 as bigint))] -Aggregate Attributes [1]: [sum(cast(sr_return_quantity#3 as bigint))#18] -Results [2]: [i_item_id#5 AS item_id#19, sum(cast(sr_return_quantity#3 as bigint))#18 AS sr_item_qty#20] - -(31) Scan parquet default.catalog_returns -Output [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_returned_date_sk)] -ReadSchema: struct - -(32) ColumnarToRow [codegen id : 10] -Input [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] - -(33) Filter [codegen id : 10] -Input [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] -Condition : (isnotnull(cr_item_sk#22) AND isnotnull(cr_returned_date_sk#21)) - -(34) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#4, i_item_id#5] - -(35) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cr_item_sk#22] -Right keys [1]: [i_item_sk#4] -Join condition: None - -(36) Project [codegen id : 10] -Output [3]: [cr_returned_date_sk#21, cr_return_quantity#23, i_item_id#5] -Input [5]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23, i_item_sk#4, i_item_id#5] - -(37) ReusedExchange [Reuses operator id: 25] -Output [1]: [d_date_sk#7] - -(38) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cr_returned_date_sk#21] -Right keys [1]: [d_date_sk#7] -Join condition: None - -(39) Project [codegen id : 10] -Output [2]: [cr_return_quantity#23, i_item_id#5] -Input [4]: [cr_returned_date_sk#21, cr_return_quantity#23, i_item_id#5, d_date_sk#7] - -(40) HashAggregate [codegen id : 10] -Input [2]: [cr_return_quantity#23, i_item_id#5] -Keys [1]: [i_item_id#5] -Functions [1]: [partial_sum(cast(cr_return_quantity#23 as bigint))] -Aggregate Attributes [1]: [sum#24] -Results [2]: [i_item_id#5, sum#25] - -(41) Exchange -Input [2]: [i_item_id#5, sum#25] -Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#26] - -(42) HashAggregate [codegen id : 11] -Input [2]: [i_item_id#5, sum#25] -Keys [1]: [i_item_id#5] -Functions [1]: [sum(cast(cr_return_quantity#23 as bigint))] -Aggregate Attributes [1]: [sum(cast(cr_return_quantity#23 as bigint))#27] -Results [2]: [i_item_id#5 AS item_id#28, sum(cast(cr_return_quantity#23 as bigint))#27 AS cr_item_qty#29] - -(43) BroadcastExchange -Input [2]: [item_id#28, cr_item_qty#29] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#30] - -(44) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [item_id#19] -Right keys [1]: [item_id#28] -Join condition: None - -(45) Project [codegen id : 18] -Output [3]: [item_id#19, sr_item_qty#20, cr_item_qty#29] -Input [4]: [item_id#19, sr_item_qty#20, item_id#28, cr_item_qty#29] - -(46) Scan parquet default.web_returns -Output [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_returned_date_sk)] -ReadSchema: struct - -(47) ColumnarToRow [codegen id : 16] -Input [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] - -(48) Filter [codegen id : 16] -Input [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] -Condition : (isnotnull(wr_item_sk#32) AND isnotnull(wr_returned_date_sk#31)) - -(49) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#4, i_item_id#5] - -(50) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [wr_item_sk#32] -Right keys [1]: [cast(i_item_sk#4 as bigint)] -Join condition: None - -(51) Project [codegen id : 16] -Output [3]: [wr_returned_date_sk#31, wr_return_quantity#33, i_item_id#5] -Input [5]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33, i_item_sk#4, i_item_id#5] - -(52) ReusedExchange [Reuses operator id: 25] -Output [1]: [d_date_sk#7] - -(53) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [wr_returned_date_sk#31] -Right keys [1]: [cast(d_date_sk#7 as bigint)] -Join condition: None - -(54) Project [codegen id : 16] -Output [2]: [wr_return_quantity#33, i_item_id#5] -Input [4]: [wr_returned_date_sk#31, wr_return_quantity#33, i_item_id#5, d_date_sk#7] - -(55) HashAggregate [codegen id : 16] -Input [2]: [wr_return_quantity#33, i_item_id#5] -Keys [1]: [i_item_id#5] -Functions [1]: [partial_sum(cast(wr_return_quantity#33 as bigint))] -Aggregate Attributes [1]: [sum#34] -Results [2]: [i_item_id#5, sum#35] - -(56) Exchange -Input [2]: [i_item_id#5, sum#35] -Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#36] - -(57) HashAggregate [codegen id : 17] -Input [2]: [i_item_id#5, sum#35] -Keys [1]: [i_item_id#5] -Functions [1]: [sum(cast(wr_return_quantity#33 as bigint))] -Aggregate Attributes [1]: [sum(cast(wr_return_quantity#33 as bigint))#37] -Results [2]: [i_item_id#5 AS item_id#38, sum(cast(wr_return_quantity#33 as bigint))#37 AS wr_item_qty#39] - -(58) BroadcastExchange -Input [2]: [item_id#38, wr_item_qty#39] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] - -(59) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [item_id#19] -Right keys [1]: [item_id#38] -Join condition: None - -(60) Project [codegen id : 18] -Output [8]: [item_id#19, sr_item_qty#20, (((cast(sr_item_qty#20 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#29, (((cast(cr_item_qty#29 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as decimal(20,0)) as decimal(21,1))) / 3.0), DecimalType(27,6), true) AS average#44] -Input [5]: [item_id#19, sr_item_qty#20, cr_item_qty#29, item_id#38, wr_item_qty#39] - -(61) TakeOrderedAndProject -Input [8]: [item_id#19, sr_item_qty#20, sr_dev#41, cr_item_qty#29, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] -Arguments: 100, [item_id#19 ASC NULLS FIRST, sr_item_qty#20 ASC NULLS FIRST], [item_id#19, sr_item_qty#20, sr_dev#41, cr_item_qty#29, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] - +TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,sr_item_qty#2 ASC NULLS FIRST], output=[item_id#1,sr_item_qty#2,sr_dev#3,cr_item_qty#4,cr_dev#5,wr_item_qty#6,wr_dev#7,average#8]) ++- *(18) Project [item_id#1, sr_item_qty#2, (((cast(sr_item_qty#2 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS sr_dev#3, cr_item_qty#4, (((cast(cr_item_qty#4 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS cr_dev#5, wr_item_qty#6, (((cast(wr_item_qty#6 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS wr_dev#7, CheckOverflow((promote_precision(cast(cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as decimal(20,0)) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#8] + +- *(18) BroadcastHashJoin [item_id#1], [item_id#9], Inner, BuildRight + :- *(18) Project [item_id#1, sr_item_qty#2, cr_item_qty#4] + : +- *(18) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight + : :- *(18) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(sr_return_quantity#12 as bigint))]) + : : +- Exchange hashpartitioning(i_item_id#11, 5) + : : +- *(5) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(sr_return_quantity#12 as bigint))]) + : : +- *(5) Project [sr_return_quantity#12, i_item_id#11] + : : +- *(5) BroadcastHashJoin [sr_returned_date_sk#13], [cast(d_date_sk#14 as bigint)], Inner, BuildRight + : : :- *(5) Project [sr_returned_date_sk#13, sr_return_quantity#12, i_item_id#11] + : : : +- *(5) BroadcastHashJoin [sr_item_sk#15], [cast(i_item_sk#16 as bigint)], Inner, BuildRight + : : : :- *(5) Project [sr_returned_date_sk#13, sr_item_sk#15, sr_return_quantity#12] + : : : : +- *(5) Filter (isnotnull(sr_item_sk#15) && isnotnull(sr_returned_date_sk#13)) + : : : : +- *(5) FileScan parquet default.store_returns[sr_returned_date_sk#13,sr_item_sk#15,sr_return_quantity#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [i_item_sk#16, i_item_id#11] + : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) + : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [d_date_sk#14] + : : +- *(4) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight + : : :- *(4) Project [d_date_sk#14, d_date#17] + : : : +- *(4) Filter isnotnull(d_date_sk#14) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + : : +- *(3) Project [d_date#17 AS d_date#17#18] + : : +- *(3) BroadcastHashJoin [d_week_seq#19], [d_week_seq#19#20], LeftSemi, BuildRight + : : :- *(3) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [d_week_seq#19 AS d_week_seq#19#20] + : : +- *(2) Filter cast(d_date#17 as string) IN (2000-06-30,2000-09-27,2000-11-17) + : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + : +- *(11) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(cr_return_quantity#21 as bigint))]) + : +- Exchange hashpartitioning(i_item_id#11, 5) + : +- *(10) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(cr_return_quantity#21 as bigint))]) + : +- *(10) Project [cr_return_quantity#21, i_item_id#11] + : +- *(10) BroadcastHashJoin [cr_returned_date_sk#22], [d_date_sk#14], Inner, BuildRight + : :- *(10) Project [cr_returned_date_sk#22, cr_return_quantity#21, i_item_id#11] + : : +- *(10) BroadcastHashJoin [cr_item_sk#23], [i_item_sk#16], Inner, BuildRight + : : :- *(10) Project [cr_returned_date_sk#22, cr_item_sk#23, cr_return_quantity#21] + : : : +- *(10) Filter (isnotnull(cr_item_sk#23) && isnotnull(cr_returned_date_sk#22)) + : : : +- *(10) FileScan parquet default.catalog_returns[cr_returned_date_sk#22,cr_item_sk#23,cr_return_quantity#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_returned_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [i_item_sk#16, i_item_id#11] + : : +- *(6) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) + : : +- *(6) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(9) Project [d_date_sk#14] + : +- *(9) BroadcastHashJoin [d_date#17], [d_date#17#24], LeftSemi, BuildRight + : :- *(9) Project [d_date_sk#14, d_date#17] + : : +- *(9) Filter isnotnull(d_date_sk#14) + : : +- *(9) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : +- ReusedExchange [d_date#17#24], BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) + +- *(17) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(wr_return_quantity#25 as bigint))]) + +- Exchange hashpartitioning(i_item_id#11, 5) + +- *(16) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(wr_return_quantity#25 as bigint))]) + +- *(16) Project [wr_return_quantity#25, i_item_id#11] + +- *(16) BroadcastHashJoin [wr_returned_date_sk#26], [cast(d_date_sk#14 as bigint)], Inner, BuildRight + :- *(16) Project [wr_returned_date_sk#26, wr_return_quantity#25, i_item_id#11] + : +- *(16) BroadcastHashJoin [wr_item_sk#27], [cast(i_item_sk#16 as bigint)], Inner, BuildRight + : :- *(16) Project [wr_returned_date_sk#26, wr_item_sk#27, wr_return_quantity#25] + : : +- *(16) Filter (isnotnull(wr_item_sk#27) && isnotnull(wr_returned_date_sk#26)) + : : +- *(16) FileScan parquet default.web_returns[wr_returned_date_sk#26,wr_item_sk#27,wr_return_quantity#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_returned_date_sk)], ReadSchema: struct + : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [d_date_sk#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt index c12d5ffaa..94ffc5da9 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt @@ -1,91 +1,94 @@ -TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average] - WholeStageCodegen (18) - Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty] +TakeOrderedAndProject [average,cr_dev,cr_item_qty,item_id,sr_dev,sr_item_qty,wr_dev,wr_item_qty] + WholeStageCodegen + Project [cr_item_qty,item_id,sr_item_qty,wr_item_qty] BroadcastHashJoin [item_id,item_id] - Project [item_id,sr_item_qty,cr_item_qty] + Project [cr_item_qty,item_id,sr_item_qty] BroadcastHashJoin [item_id,item_id] - HashAggregate [i_item_id,sum] [sum(cast(sr_return_quantity as bigint)),item_id,sr_item_qty,sum] + HashAggregate [i_item_id,sum,sum(cast(sr_return_quantity as bigint))] [item_id,sr_item_qty,sum,sum(cast(sr_return_quantity as bigint))] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen (5) - HashAggregate [i_item_id,sr_return_quantity] [sum,sum] - Project [sr_return_quantity,i_item_id] - BroadcastHashJoin [sr_returned_date_sk,d_date_sk] - Project [sr_returned_date_sk,sr_return_quantity,i_item_id] - BroadcastHashJoin [sr_item_sk,i_item_sk] - Filter [sr_item_sk,sr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_return_quantity] + WholeStageCodegen + HashAggregate [i_item_id,sr_return_quantity,sum,sum] [sum,sum] + Project [i_item_id,sr_return_quantity] + BroadcastHashJoin [d_date_sk,sr_returned_date_sk] + Project [i_item_id,sr_return_quantity,sr_returned_date_sk] + BroadcastHashJoin [i_item_sk,sr_item_sk] + Project [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + Filter [sr_item_sk,sr_returned_date_sk] + Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk] [sr_item_sk,sr_return_quantity,sr_returned_date_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [i_item_sk,i_item_id] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id] + WholeStageCodegen + Project [i_item_id,i_item_sk] + Filter [i_item_id,i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (4) + WholeStageCodegen Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Filter [d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Project [d_date,d_date_sk] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date] BroadcastHashJoin [d_week_seq,d_week_seq] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] InputAdapter BroadcastExchange #5 - WholeStageCodegen (2) + WholeStageCodegen Project [d_week_seq] Filter [d_date] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date,d_week_seq] + Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] InputAdapter BroadcastExchange #6 - WholeStageCodegen (11) - HashAggregate [i_item_id,sum] [sum(cast(cr_return_quantity as bigint)),item_id,cr_item_qty,sum] + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(cast(cr_return_quantity as bigint))] [cr_item_qty,item_id,sum,sum(cast(cr_return_quantity as bigint))] InputAdapter Exchange [i_item_id] #7 - WholeStageCodegen (10) - HashAggregate [i_item_id,cr_return_quantity] [sum,sum] + WholeStageCodegen + HashAggregate [cr_return_quantity,i_item_id,sum,sum] [sum,sum] Project [cr_return_quantity,i_item_id] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_returned_date_sk,cr_return_quantity,i_item_id] + Project [cr_return_quantity,cr_returned_date_sk,i_item_id] BroadcastHashJoin [cr_item_sk,i_item_sk] - Filter [cr_item_sk,cr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_item_sk,cr_return_quantity] + Project [cr_item_sk,cr_return_quantity,cr_returned_date_sk] + Filter [cr_item_sk,cr_returned_date_sk] + Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk] [cr_item_sk,cr_return_quantity,cr_returned_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_item_id] #2 + BroadcastExchange #8 + WholeStageCodegen + Project [i_item_id,i_item_sk] + Filter [i_item_id,i_item_sk] + Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] InputAdapter - ReusedExchange [d_date_sk] #3 + BroadcastExchange #9 + WholeStageCodegen + Project [d_date_sk] + BroadcastHashJoin [d_date,d_date] + Project [d_date,d_date_sk] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + InputAdapter + ReusedExchange [d_date] [d_date] #4 InputAdapter - BroadcastExchange #8 - WholeStageCodegen (17) - HashAggregate [i_item_id,sum] [sum(cast(wr_return_quantity as bigint)),item_id,wr_item_qty,sum] + BroadcastExchange #10 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum(cast(wr_return_quantity as bigint))] [item_id,sum,sum(cast(wr_return_quantity as bigint)),wr_item_qty] InputAdapter - Exchange [i_item_id] #9 - WholeStageCodegen (16) - HashAggregate [i_item_id,wr_return_quantity] [sum,sum] - Project [wr_return_quantity,i_item_id] - BroadcastHashJoin [wr_returned_date_sk,d_date_sk] - Project [wr_returned_date_sk,wr_return_quantity,i_item_id] - BroadcastHashJoin [wr_item_sk,i_item_sk] - Filter [wr_item_sk,wr_returned_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_return_quantity] + Exchange [i_item_id] #11 + WholeStageCodegen + HashAggregate [i_item_id,sum,sum,wr_return_quantity] [sum,sum] + Project [i_item_id,wr_return_quantity] + BroadcastHashJoin [d_date_sk,wr_returned_date_sk] + Project [i_item_id,wr_return_quantity,wr_returned_date_sk] + BroadcastHashJoin [i_item_sk,wr_item_sk] + Project [wr_item_sk,wr_return_quantity,wr_returned_date_sk] + Filter [wr_item_sk,wr_returned_date_sk] + Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk] [wr_item_sk,wr_return_quantity,wr_returned_date_sk] InputAdapter - ReusedExchange [i_item_sk,i_item_id] #2 + ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 InputAdapter - ReusedExchange [d_date_sk] #3 + ReusedExchange [d_date_sk] [d_date_sk] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt index 9e5fb4386..c0cde7a5d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt @@ -1,200 +1,35 @@ == Physical Plan == -TakeOrderedAndProject (36) -+- * Project (35) - +- * BroadcastHashJoin Inner BuildRight (34) - :- * Project (29) - : +- * BroadcastHashJoin Inner BuildRight (28) - : :- * Project (22) - : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Project (16) - : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : :- * Project (10) - : : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.customer (1) - : : : : +- BroadcastExchange (8) - : : : : +- * Project (7) - : : : : +- * Filter (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.customer_address (4) - : : : +- BroadcastExchange (14) - : : : +- * Filter (13) - : : : +- * ColumnarToRow (12) - : : : +- Scan parquet default.customer_demographics (11) - : : +- BroadcastExchange (20) - : : +- * Filter (19) - : : +- * ColumnarToRow (18) - : : +- Scan parquet default.household_demographics (17) - : +- BroadcastExchange (27) - : +- * Project (26) - : +- * Filter (25) - : +- * ColumnarToRow (24) - : +- Scan parquet default.income_band (23) - +- BroadcastExchange (33) - +- * Filter (32) - +- * ColumnarToRow (31) - +- Scan parquet default.store_returns (30) - - -(1) Scan parquet default.customer -Output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 6] -Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] - -(3) Filter [codegen id : 6] -Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] -Condition : ((isnotnull(c_current_addr_sk#4) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3)) - -(4) Scan parquet default.customer_address -Output [2]: [ca_address_sk#7, ca_city#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_city), EqualTo(ca_city,Edgewood), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [ca_address_sk#7, ca_city#8] - -(6) Filter [codegen id : 1] -Input [2]: [ca_address_sk#7, ca_city#8] -Condition : ((isnotnull(ca_city#8) AND (ca_city#8 = Edgewood)) AND isnotnull(ca_address_sk#7)) - -(7) Project [codegen id : 1] -Output [1]: [ca_address_sk#7] -Input [2]: [ca_address_sk#7, ca_city#8] - -(8) BroadcastExchange -Input [1]: [ca_address_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] - -(9) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_current_addr_sk#4] -Right keys [1]: [ca_address_sk#7] -Join condition: None - -(10) Project [codegen id : 6] -Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] -Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] - -(11) Scan parquet default.customer_demographics -Output [1]: [cd_demo_sk#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [1]: [cd_demo_sk#10] - -(13) Filter [codegen id : 2] -Input [1]: [cd_demo_sk#10] -Condition : isnotnull(cd_demo_sk#10) - -(14) BroadcastExchange -Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] - -(15) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#10] -Join condition: None - -(16) Project [codegen id : 6] -Output [5]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10] -Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10] - -(17) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#12, hd_income_band_sk#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] - -(19) Filter [codegen id : 3] -Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] -Condition : (isnotnull(hd_demo_sk#12) AND isnotnull(hd_income_band_sk#13)) - -(20) BroadcastExchange -Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] - -(21) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_current_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#12] -Join condition: None - -(22) Project [codegen id : 6] -Output [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_income_band_sk#13] -Input [7]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_demo_sk#12, hd_income_band_sk#13] - -(23) Scan parquet default.income_band -Output [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/income_band] -PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] - -(25) Filter [codegen id : 4] -Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] -Condition : ((((isnotnull(ib_lower_bound#16) AND isnotnull(ib_upper_bound#17)) AND (ib_lower_bound#16 >= 38128)) AND (ib_upper_bound#17 <= 88128)) AND isnotnull(ib_income_band_sk#15)) - -(26) Project [codegen id : 4] -Output [1]: [ib_income_band_sk#15] -Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] - -(27) BroadcastExchange -Input [1]: [ib_income_band_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] - -(28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [hd_income_band_sk#13] -Right keys [1]: [ib_income_band_sk#15] -Join condition: None - -(29) Project [codegen id : 6] -Output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10] -Input [6]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_income_band_sk#13, ib_income_band_sk#15] - -(30) Scan parquet default.store_returns -Output [1]: [sr_cdemo_sk#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_cdemo_sk)] -ReadSchema: struct - -(31) ColumnarToRow [codegen id : 5] -Input [1]: [sr_cdemo_sk#19] - -(32) Filter [codegen id : 5] -Input [1]: [sr_cdemo_sk#19] -Condition : isnotnull(sr_cdemo_sk#19) - -(33) BroadcastExchange -Input [1]: [sr_cdemo_sk#19] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#20] - -(34) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cast(cd_demo_sk#10 as bigint)] -Right keys [1]: [sr_cdemo_sk#19] -Join condition: None - -(35) Project [codegen id : 6] -Output [3]: [c_customer_id#1 AS customer_id#21, concat(c_last_name#6, , , c_first_name#5) AS customername#22, c_customer_id#1] -Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, sr_cdemo_sk#19] - -(36) TakeOrderedAndProject -Input [3]: [customer_id#21, customername#22, c_customer_id#1] -Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#21, customername#22] - +TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], output=[customer_id#2,customername#3]) ++- *(6) Project [c_customer_id#1 AS customer_id#2, concat(c_last_name#4, , , c_first_name#5) AS customername#3, c_customer_id#1] + +- *(6) BroadcastHashJoin [cast(cd_demo_sk#6 as bigint)], [sr_cdemo_sk#7], Inner, BuildRight + :- *(6) Project [c_customer_id#1, c_first_name#5, c_last_name#4, cd_demo_sk#6] + : +- *(6) BroadcastHashJoin [hd_income_band_sk#8], [ib_income_band_sk#9], Inner, BuildRight + : :- *(6) Project [c_customer_id#1, c_first_name#5, c_last_name#4, cd_demo_sk#6, hd_income_band_sk#8] + : : +- *(6) BroadcastHashJoin [c_current_hdemo_sk#10], [hd_demo_sk#11], Inner, BuildRight + : : :- *(6) Project [c_customer_id#1, c_current_hdemo_sk#10, c_first_name#5, c_last_name#4, cd_demo_sk#6] + : : : +- *(6) BroadcastHashJoin [c_current_cdemo_sk#12], [cd_demo_sk#6], Inner, BuildRight + : : : :- *(6) Project [c_customer_id#1, c_current_cdemo_sk#12, c_current_hdemo_sk#10, c_first_name#5, c_last_name#4] + : : : : +- *(6) BroadcastHashJoin [c_current_addr_sk#13], [ca_address_sk#14], Inner, BuildRight + : : : : :- *(6) Project [c_customer_id#1, c_current_cdemo_sk#12, c_current_hdemo_sk#10, c_current_addr_sk#13, c_first_name#5, c_last_name#4] + : : : : : +- *(6) Filter ((isnotnull(c_current_addr_sk#13) && isnotnull(c_current_cdemo_sk#12)) && isnotnull(c_current_hdemo_sk#10)) + : : : : : +- *(6) FileScan parquet default.customer[c_customer_id#1,c_current_cdemo_sk#12,c_current_hdemo_sk#10,c_current_addr_sk#13,c_first_name#5,c_last_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(2) Project [cd_demo_sk#6] + : : : +- *(2) Filter isnotnull(cd_demo_sk#6) + : : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [hd_demo_sk#11, hd_income_band_sk#8] + : : +- *(3) Filter (isnotnull(hd_demo_sk#11) && isnotnull(hd_income_band_sk#8)) + : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#11,hd_income_band_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ib_income_band_sk#9] + : +- *(4) Filter ((((isnotnull(ib_lower_bound#16) && isnotnull(ib_upper_bound#17)) && (ib_lower_bound#16 >= 38128)) && (ib_upper_bound#17 <= 88128)) && isnotnull(ib_income_band_sk#9)) + : +- *(4) FileScan parquet default.income_band[ib_income_band_sk#9,ib_lower_bound#16,ib_upper_bound#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), ..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) + +- *(5) Project [sr_cdemo_sk#7] + +- *(5) Filter isnotnull(sr_cdemo_sk#7) + +- *(5) FileScan parquet default.store_returns[sr_cdemo_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_cdemo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt index 014d808cf..894b15298 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt @@ -1,53 +1,45 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername] - WholeStageCodegen (6) - Project [c_customer_id,c_last_name,c_first_name] + WholeStageCodegen + Project [c_customer_id,c_first_name,c_last_name] BroadcastHashJoin [cd_demo_sk,sr_cdemo_sk] Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [c_customer_id,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk] + Project [c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name,cd_demo_sk] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_first_name,c_last_name] + Project [c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] InputAdapter BroadcastExchange #1 - WholeStageCodegen (1) + WholeStageCodegen Project [ca_address_sk] - Filter [ca_city,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_city] + Filter [ca_address_sk,ca_city] + Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] InputAdapter BroadcastExchange #2 - WholeStageCodegen (2) - Filter [cd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk] + WholeStageCodegen + Project [cd_demo_sk] + Filter [cd_demo_sk] + Scan parquet default.customer_demographics [cd_demo_sk] [cd_demo_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (3) - Filter [hd_demo_sk,hd_income_band_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] + WholeStageCodegen + Project [hd_demo_sk,hd_income_band_sk] + Filter [hd_demo_sk,hd_income_band_sk] + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (4) + WholeStageCodegen Project [ib_income_band_sk] - Filter [ib_lower_bound,ib_upper_bound,ib_income_band_sk] - ColumnarToRow - InputAdapter - Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + Filter [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] [ib_income_band_sk,ib_lower_bound,ib_upper_bound] InputAdapter BroadcastExchange #5 - WholeStageCodegen (5) - Filter [sr_cdemo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_cdemo_sk] + WholeStageCodegen + Project [sr_cdemo_sk] + Filter [sr_cdemo_sk] + Scan parquet default.store_returns [sr_cdemo_sk] [sr_cdemo_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt index 23598c2e7..106b129ea 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt @@ -1,287 +1,50 @@ == Physical Plan == -TakeOrderedAndProject (51) -+- * HashAggregate (50) - +- Exchange (49) - +- * HashAggregate (48) - +- * Project (47) - +- * BroadcastHashJoin Inner BuildRight (46) - :- * Project (41) - : +- * BroadcastHashJoin Inner BuildRight (40) - : :- * Project (34) - : : +- * BroadcastHashJoin Inner BuildRight (33) - : : :- * Project (27) - : : : +- * BroadcastHashJoin Inner BuildRight (26) - : : : :- * Project (21) - : : : : +- * BroadcastHashJoin Inner BuildRight (20) - : : : : :- * Project (15) - : : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : : :- * Project (9) - : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : : : :- * Filter (3) - : : : : : : : +- * ColumnarToRow (2) - : : : : : : : +- Scan parquet default.web_sales (1) - : : : : : : +- BroadcastExchange (7) - : : : : : : +- * Filter (6) - : : : : : : +- * ColumnarToRow (5) - : : : : : : +- Scan parquet default.web_returns (4) - : : : : : +- BroadcastExchange (13) - : : : : : +- * Filter (12) - : : : : : +- * ColumnarToRow (11) - : : : : : +- Scan parquet default.web_page (10) - : : : : +- BroadcastExchange (19) - : : : : +- * Filter (18) - : : : : +- * ColumnarToRow (17) - : : : : +- Scan parquet default.customer_demographics (16) - : : : +- BroadcastExchange (25) - : : : +- * Filter (24) - : : : +- * ColumnarToRow (23) - : : : +- Scan parquet default.customer_demographics (22) - : : +- BroadcastExchange (32) - : : +- * Project (31) - : : +- * Filter (30) - : : +- * ColumnarToRow (29) - : : +- Scan parquet default.customer_address (28) - : +- BroadcastExchange (39) - : +- * Project (38) - : +- * Filter (37) - : +- * ColumnarToRow (36) - : +- Scan parquet default.date_dim (35) - +- BroadcastExchange (45) - +- * Filter (44) - +- * ColumnarToRow (43) - +- Scan parquet default.reason (42) - - -(1) Scan parquet default.web_sales -Output [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_date_sk), Or(Or(And(GreaterThanOrEqual(ws_sales_price,100.00),LessThanOrEqual(ws_sales_price,150.00)),And(GreaterThanOrEqual(ws_sales_price,50.00),LessThanOrEqual(ws_sales_price,100.00))),And(GreaterThanOrEqual(ws_sales_price,150.00),LessThanOrEqual(ws_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ws_net_profit,100.00),LessThanOrEqual(ws_net_profit,200.00)),And(GreaterThanOrEqual(ws_net_profit,150.00),LessThanOrEqual(ws_net_profit,300.00))),And(GreaterThanOrEqual(ws_net_profit,50.00),LessThanOrEqual(ws_net_profit,250.00)))] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 8] -Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] - -(3) Filter [codegen id : 8] -Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] -Condition : (((((isnotnull(ws_item_sk#2) AND isnotnull(ws_order_number#4)) AND isnotnull(ws_web_page_sk#3)) AND isnotnull(ws_sold_date_sk#1)) AND ((((ws_sales_price#6 >= 100.00) AND (ws_sales_price#6 <= 150.00)) OR ((ws_sales_price#6 >= 50.00) AND (ws_sales_price#6 <= 100.00))) OR ((ws_sales_price#6 >= 150.00) AND (ws_sales_price#6 <= 200.00)))) AND ((((ws_net_profit#7 >= 100.00) AND (ws_net_profit#7 <= 200.00)) OR ((ws_net_profit#7 >= 150.00) AND (ws_net_profit#7 <= 300.00))) OR ((ws_net_profit#7 >= 50.00) AND (ws_net_profit#7 <= 250.00)))) - -(4) Scan parquet default.web_returns -Output [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr_returning_cdemo_sk), IsNotNull(wr_refunded_addr_sk), IsNotNull(wr_reason_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] - -(6) Filter [codegen id : 1] -Input [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] -Condition : (((((isnotnull(wr_item_sk#8) AND isnotnull(wr_order_number#13)) AND isnotnull(wr_refunded_cdemo_sk#9)) AND isnotnull(wr_returning_cdemo_sk#11)) AND isnotnull(wr_refunded_addr_sk#10)) AND isnotnull(wr_reason_sk#12)) - -(7) BroadcastExchange -Input [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[5, bigint, false]),false), [id=#16] - -(8) BroadcastHashJoin [codegen id : 8] -Left keys [2]: [cast(ws_item_sk#2 as bigint), cast(ws_order_number#4 as bigint)] -Right keys [2]: [wr_item_sk#8, wr_order_number#13] -Join condition: None - -(9) Project [codegen id : 8] -Output [11]: [ws_sold_date_sk#1, ws_web_page_sk#3, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] -Input [15]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] - -(10) Scan parquet default.web_page -Output [1]: [wp_web_page_sk#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_page] -PushedFilters: [IsNotNull(wp_web_page_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [1]: [wp_web_page_sk#17] - -(12) Filter [codegen id : 2] -Input [1]: [wp_web_page_sk#17] -Condition : isnotnull(wp_web_page_sk#17) - -(13) BroadcastExchange -Input [1]: [wp_web_page_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] - -(14) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_web_page_sk#3] -Right keys [1]: [wp_web_page_sk#17] -Join condition: None - -(15) Project [codegen id : 8] -Output [10]: [ws_sold_date_sk#1, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] -Input [12]: [ws_sold_date_sk#1, ws_web_page_sk#3, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, wp_web_page_sk#17] - -(16) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] - -(18) Filter [codegen id : 3] -Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] -Condition : (((isnotnull(cd_demo_sk#19) AND isnotnull(cd_marital_status#20)) AND isnotnull(cd_education_status#21)) AND ((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree)) OR ((cd_marital_status#20 = S) AND (cd_education_status#21 = College))) OR ((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree)))) - -(19) BroadcastExchange -Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] - -(20) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [wr_refunded_cdemo_sk#9] -Right keys [1]: [cast(cd_demo_sk#19 as bigint)] -Join condition: ((((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree)) AND (ws_sales_price#6 >= 100.00)) AND (ws_sales_price#6 <= 150.00)) OR ((((cd_marital_status#20 = S) AND (cd_education_status#21 = College)) AND (ws_sales_price#6 >= 50.00)) AND (ws_sales_price#6 <= 100.00))) OR ((((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree)) AND (ws_sales_price#6 >= 150.00)) AND (ws_sales_price#6 <= 200.00))) - -(21) Project [codegen id : 8] -Output [10]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#20, cd_education_status#21] -Input [13]: [ws_sold_date_sk#1, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] - -(22) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)] -ReadSchema: struct - -(23) ColumnarToRow [codegen id : 4] -Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] - -(24) Filter [codegen id : 4] -Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] -Condition : ((isnotnull(cd_demo_sk#23) AND isnotnull(cd_marital_status#24)) AND isnotnull(cd_education_status#25)) - -(25) BroadcastExchange -Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint), input[1, string, false], input[2, string, false]),false), [id=#26] - -(26) BroadcastHashJoin [codegen id : 8] -Left keys [3]: [wr_returning_cdemo_sk#11, cd_marital_status#20, cd_education_status#21] -Right keys [3]: [cast(cd_demo_sk#23 as bigint), cd_marital_status#24, cd_education_status#25] -Join condition: None - -(27) Project [codegen id : 8] -Output [7]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] -Input [13]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#20, cd_education_status#21, cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] - -(28) Scan parquet default.customer_address -Output [3]: [ca_address_sk#27, ca_state#28, ca_country#29] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [IN,OH,NJ]),In(ca_state, [WI,CT,KY])),In(ca_state, [LA,IA,AR]))] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 5] -Input [3]: [ca_address_sk#27, ca_state#28, ca_country#29] - -(30) Filter [codegen id : 5] -Input [3]: [ca_address_sk#27, ca_state#28, ca_country#29] -Condition : (((isnotnull(ca_country#29) AND (ca_country#29 = United States)) AND isnotnull(ca_address_sk#27)) AND ((ca_state#28 IN (IN,OH,NJ) OR ca_state#28 IN (WI,CT,KY)) OR ca_state#28 IN (LA,IA,AR))) - -(31) Project [codegen id : 5] -Output [2]: [ca_address_sk#27, ca_state#28] -Input [3]: [ca_address_sk#27, ca_state#28, ca_country#29] - -(32) BroadcastExchange -Input [2]: [ca_address_sk#27, ca_state#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] - -(33) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [wr_refunded_addr_sk#10] -Right keys [1]: [cast(ca_address_sk#27 as bigint)] -Join condition: ((((ca_state#28 IN (IN,OH,NJ) AND (ws_net_profit#7 >= 100.00)) AND (ws_net_profit#7 <= 200.00)) OR ((ca_state#28 IN (WI,CT,KY) AND (ws_net_profit#7 >= 150.00)) AND (ws_net_profit#7 <= 300.00))) OR ((ca_state#28 IN (LA,IA,AR) AND (ws_net_profit#7 >= 50.00)) AND (ws_net_profit#7 <= 250.00))) - -(34) Project [codegen id : 8] -Output [5]: [ws_sold_date_sk#1, ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] -Input [9]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, ca_address_sk#27, ca_state#28] - -(35) Scan parquet default.date_dim -Output [2]: [d_date_sk#31, d_year#32] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] -ReadSchema: struct - -(36) ColumnarToRow [codegen id : 6] -Input [2]: [d_date_sk#31, d_year#32] - -(37) Filter [codegen id : 6] -Input [2]: [d_date_sk#31, d_year#32] -Condition : ((isnotnull(d_year#32) AND (d_year#32 = 2000)) AND isnotnull(d_date_sk#31)) - -(38) Project [codegen id : 6] -Output [1]: [d_date_sk#31] -Input [2]: [d_date_sk#31, d_year#32] - -(39) BroadcastExchange -Input [1]: [d_date_sk#31] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] - -(40) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_sold_date_sk#1] -Right keys [1]: [d_date_sk#31] -Join condition: None - -(41) Project [codegen id : 8] -Output [4]: [ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] -Input [6]: [ws_sold_date_sk#1, ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, d_date_sk#31] - -(42) Scan parquet default.reason -Output [2]: [r_reason_sk#34, r_reason_desc#35] -Batched: true -Location [not included in comparison]/{warehouse_dir}/reason] -PushedFilters: [IsNotNull(r_reason_sk)] -ReadSchema: struct - -(43) ColumnarToRow [codegen id : 7] -Input [2]: [r_reason_sk#34, r_reason_desc#35] - -(44) Filter [codegen id : 7] -Input [2]: [r_reason_sk#34, r_reason_desc#35] -Condition : isnotnull(r_reason_sk#34) - -(45) BroadcastExchange -Input [2]: [r_reason_sk#34, r_reason_desc#35] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] - -(46) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [wr_reason_sk#12] -Right keys [1]: [cast(r_reason_sk#34 as bigint)] -Join condition: None - -(47) Project [codegen id : 8] -Output [4]: [ws_quantity#5, wr_fee#14, wr_refunded_cash#15, r_reason_desc#35] -Input [6]: [ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, r_reason_sk#34, r_reason_desc#35] - -(48) HashAggregate [codegen id : 8] -Input [4]: [ws_quantity#5, wr_fee#14, wr_refunded_cash#15, r_reason_desc#35] -Keys [1]: [r_reason_desc#35] -Functions [3]: [partial_avg(cast(ws_quantity#5 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#15)), partial_avg(UnscaledValue(wr_fee#14))] -Aggregate Attributes [6]: [sum#37, count#38, sum#39, count#40, sum#41, count#42] -Results [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] - -(49) Exchange -Input [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] -Arguments: hashpartitioning(r_reason_desc#35, 5), ENSURE_REQUIREMENTS, [id=#49] - -(50) HashAggregate [codegen id : 9] -Input [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] -Keys [1]: [r_reason_desc#35] -Functions [3]: [avg(cast(ws_quantity#5 as bigint)), avg(UnscaledValue(wr_refunded_cash#15)), avg(UnscaledValue(wr_fee#14))] -Aggregate Attributes [3]: [avg(cast(ws_quantity#5 as bigint))#50, avg(UnscaledValue(wr_refunded_cash#15))#51, avg(UnscaledValue(wr_fee#14))#52] -Results [4]: [substr(r_reason_desc#35, 1, 20) AS substr(r_reason_desc, 1, 20)#53, avg(cast(ws_quantity#5 as bigint))#50 AS avg(ws_quantity)#54, cast((avg(UnscaledValue(wr_refunded_cash#15))#51 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#55, cast((avg(UnscaledValue(wr_fee#14))#52 / 100.0) as decimal(11,6)) AS avg(wr_fee)#56] - -(51) TakeOrderedAndProject -Input [4]: [substr(r_reason_desc, 1, 20)#53, avg(ws_quantity)#54, avg(wr_refunded_cash)#55, avg(wr_fee)#56] -Arguments: 100, [substr(r_reason_desc, 1, 20)#53 ASC NULLS FIRST, avg(ws_quantity)#54 ASC NULLS FIRST, avg(wr_refunded_cash)#55 ASC NULLS FIRST, avg(wr_fee)#56 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#53, avg(ws_quantity)#54, avg(wr_refunded_cash)#55, avg(wr_fee)#56] - +TakeOrderedAndProject(limit=100, orderBy=[substring(r_reason_desc, 1, 20)#1 ASC NULLS FIRST,aggOrder#2 ASC NULLS FIRST,avg(wr_refunded_cash)#3 ASC NULLS FIRST,avg(wr_fee)#4 ASC NULLS FIRST], output=[substring(r_reason_desc, 1, 20)#1,avg(ws_quantity)#5,avg(wr_refunded_cash)#3,avg(wr_fee)#4]) ++- *(9) HashAggregate(keys=[r_reason_desc#6], functions=[avg(cast(ws_quantity#7 as bigint)), avg(UnscaledValue(wr_refunded_cash#8)), avg(UnscaledValue(wr_fee#9))]) + +- Exchange hashpartitioning(r_reason_desc#6, 5) + +- *(8) HashAggregate(keys=[r_reason_desc#6], functions=[partial_avg(cast(ws_quantity#7 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#8)), partial_avg(UnscaledValue(wr_fee#9))]) + +- *(8) Project [ws_quantity#7, wr_fee#9, wr_refunded_cash#8, r_reason_desc#6] + +- *(8) BroadcastHashJoin [wr_reason_sk#10], [cast(r_reason_sk#11 as bigint)], Inner, BuildRight + :- *(8) Project [ws_quantity#7, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : +- *(8) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : +- *(8) BroadcastHashJoin [wr_refunded_addr_sk#14], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight, ((((ca_state#16 IN (IN,OH,NJ) && (ws_net_profit#17 >= 100.00)) && (ws_net_profit#17 <= 200.00)) || ((ca_state#16 IN (WI,CT,KY) && (ws_net_profit#17 >= 150.00)) && (ws_net_profit#17 <= 300.00))) || ((ca_state#16 IN (LA,IA,AR) && (ws_net_profit#17 >= 50.00)) && (ws_net_profit#17 <= 250.00))) + : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_net_profit#17, wr_refunded_addr_sk#14, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : : +- *(8) BroadcastHashJoin [wr_returning_cdemo_sk#18, cd_marital_status#19, cd_education_status#20], [cast(cd_demo_sk#21 as bigint), cd_marital_status#22, cd_education_status#23], Inner, BuildRight + : : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_net_profit#17, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8, cd_marital_status#19, cd_education_status#20] + : : : : +- *(8) BroadcastHashJoin [wr_refunded_cdemo_sk#24], [cast(cd_demo_sk#25 as bigint)], Inner, BuildRight, ((((((cd_marital_status#19 = M) && (cd_education_status#20 = Advanced Degree)) && (ws_sales_price#26 >= 100.00)) && (ws_sales_price#26 <= 150.00)) || ((((cd_marital_status#19 = S) && (cd_education_status#20 = College)) && (ws_sales_price#26 >= 50.00)) && (ws_sales_price#26 <= 100.00))) || ((((cd_marital_status#19 = W) && (cd_education_status#20 = 2 yr Degree)) && (ws_sales_price#26 >= 150.00)) && (ws_sales_price#26 <= 200.00))) + : : : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_sales_price#26, ws_net_profit#17, wr_refunded_cdemo_sk#24, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : : : : +- *(8) BroadcastHashJoin [ws_web_page_sk#27], [wp_web_page_sk#28], Inner, BuildRight + : : : : : :- *(8) Project [ws_sold_date_sk#12, ws_web_page_sk#27, ws_quantity#7, ws_sales_price#26, ws_net_profit#17, wr_refunded_cdemo_sk#24, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] + : : : : : : +- *(8) BroadcastHashJoin [cast(ws_item_sk#29 as bigint), cast(ws_order_number#30 as bigint)], [wr_item_sk#31, wr_order_number#32], Inner, BuildRight + : : : : : : :- *(8) Project [ws_sold_date_sk#12, ws_item_sk#29, ws_web_page_sk#27, ws_order_number#30, ws_quantity#7, ws_sales_price#26, ws_net_profit#17] + : : : : : : : +- *(8) Filter (((isnotnull(ws_item_sk#29) && isnotnull(ws_order_number#30)) && isnotnull(ws_web_page_sk#27)) && isnotnull(ws_sold_date_sk#12)) + : : : : : : : +- *(8) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#29,ws_web_page_sk#27,ws_order_number#30,ws_quantity#7,ws_sales_price#26,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_..., ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : : +- *(3) Project [cd_demo_sk#25, cd_marital_status#19, cd_education_status#20] + : : : : +- *(3) Filter ((isnotnull(cd_demo_sk#25) && isnotnull(cd_education_status#20)) && isnotnull(cd_marital_status#19)) + : : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, string, true], input[2, string, true])) + : : : +- *(4) Project [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] + : : : +- *(4) Filter ((isnotnull(cd_marital_status#22) && isnotnull(cd_demo_sk#21)) && isnotnull(cd_education_status#23)) + : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk), IsNotNull(cd_education_status)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(5) Project [ca_address_sk#15, ca_state#16] + : : +- *(5) Filter ((isnotnull(ca_country#33) && (ca_country#33 = United States)) && isnotnull(ca_address_sk#15)) + : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(6) Project [d_date_sk#13] + : +- *(6) Filter ((isnotnull(d_year#34) && (d_year#34 = 2000)) && isnotnull(d_date_sk#13)) + : +- *(6) FileScan parquet default.date_dim[d_date_sk#13,d_year#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(7) Project [r_reason_sk#11, r_reason_desc#6] + +- *(7) Filter isnotnull(r_reason_sk#11) + +- *(7) FileScan parquet default.reason[r_reason_sk#11,r_reason_desc#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt index b95873716..86a427106 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt @@ -1,76 +1,66 @@ -TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee)] - WholeStageCodegen (9) - HashAggregate [r_reason_desc,sum,count,sum,count,sum,count] [avg(cast(ws_quantity as bigint)),avg(UnscaledValue(wr_refunded_cash)),avg(UnscaledValue(wr_fee)),substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee),sum,count,sum,count,sum,count] +TakeOrderedAndProject [aggOrder,avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),substring(r_reason_desc, 1, 20)] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),avg(cast(ws_quantity as bigint)),count,count,count,r_reason_desc,sum,sum,sum] [aggOrder,avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),avg(cast(ws_quantity as bigint)),avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),count,count,count,substring(r_reason_desc, 1, 20),sum,sum,sum] InputAdapter Exchange [r_reason_desc] #1 - WholeStageCodegen (8) - HashAggregate [r_reason_desc,ws_quantity,wr_refunded_cash,wr_fee] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] - Project [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] - BroadcastHashJoin [wr_reason_sk,r_reason_sk] - Project [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] - BroadcastHashJoin [wr_refunded_addr_sk,ca_address_sk,ca_state,ws_net_profit] - Project [ws_sold_date_sk,ws_quantity,ws_net_profit,wr_refunded_addr_sk,wr_reason_sk,wr_fee,wr_refunded_cash] - BroadcastHashJoin [wr_returning_cdemo_sk,cd_marital_status,cd_education_status,cd_demo_sk,cd_marital_status,cd_education_status] - Project [ws_sold_date_sk,ws_quantity,ws_net_profit,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_marital_status,cd_education_status] - BroadcastHashJoin [wr_refunded_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ws_sales_price] - Project [ws_sold_date_sk,ws_quantity,ws_sales_price,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] - Project [ws_sold_date_sk,ws_web_page_sk,ws_quantity,ws_sales_price,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] - BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] - Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sold_date_sk,ws_sales_price,ws_net_profit] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit] + WholeStageCodegen + HashAggregate [count,count,count,count,count,count,r_reason_desc,sum,sum,sum,sum,sum,sum,wr_fee,wr_refunded_cash,ws_quantity] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum] + Project [r_reason_desc,wr_fee,wr_refunded_cash,ws_quantity] + BroadcastHashJoin [r_reason_sk,wr_reason_sk] + Project [wr_fee,wr_reason_sk,wr_refunded_cash,ws_quantity] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [wr_fee,wr_reason_sk,wr_refunded_cash,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [ca_address_sk,ca_state,wr_refunded_addr_sk,ws_net_profit] + Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,ws_net_profit,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_education_status,cd_marital_status,cd_marital_status,wr_returning_cdemo_sk] + Project [cd_education_status,cd_marital_status,wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sold_date_sk] + BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,wr_refunded_cdemo_sk,ws_sales_price] + Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price,ws_sold_date_sk] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] + BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] + Project [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] + Filter [ws_item_sk,ws_order_number,ws_sold_date_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [wr_item_sk,wr_order_number,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] + WholeStageCodegen + Project [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] + Filter [wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] + Scan parquet default.web_returns [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [wp_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_page [wp_web_page_sk] + WholeStageCodegen + Project [wp_web_page_sk] + Filter [wp_web_page_sk] + Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [cd_demo_sk,cd_marital_status,cd_education_status] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + WholeStageCodegen + Project [cd_demo_sk,cd_education_status,cd_marital_status] + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) - Filter [cd_demo_sk,cd_marital_status,cd_education_status] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + WholeStageCodegen + Project [cd_demo_sk,cd_education_status,cd_marital_status] + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] InputAdapter BroadcastExchange #6 - WholeStageCodegen (5) + WholeStageCodegen Project [ca_address_sk,ca_state] - Filter [ca_country,ca_address_sk,ca_state] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] + Filter [ca_address_sk,ca_country] + Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] InputAdapter BroadcastExchange #7 - WholeStageCodegen (6) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] InputAdapter BroadcastExchange #8 - WholeStageCodegen (7) - Filter [r_reason_sk] - ColumnarToRow - InputAdapter - Scan parquet default.reason [r_reason_sk,r_reason_desc] + WholeStageCodegen + Project [r_reason_desc,r_reason_sk] + Filter [r_reason_sk] + Scan parquet default.reason [r_reason_desc,r_reason_sk] [r_reason_desc,r_reason_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt index 20ae4d244..61285df23 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt @@ -1,142 +1,25 @@ == Physical Plan == -TakeOrderedAndProject (25) -+- * Project (24) - +- Window (23) - +- * Sort (22) - +- Exchange (21) - +- * HashAggregate (20) - +- Exchange (19) - +- * HashAggregate (18) - +- * Expand (17) - +- * Project (16) - +- * BroadcastHashJoin Inner BuildRight (15) - :- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.web_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.date_dim (4) - +- BroadcastExchange (14) - +- * Filter (13) - +- * ColumnarToRow (12) - +- Scan parquet default.item (11) - - -(1) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] - -(3) Filter [codegen id : 3] -Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] -Condition : (isnotnull(ws_sold_date_sk#1) AND isnotnull(ws_item_sk#2)) - -(4) Scan parquet default.date_dim -Output [2]: [d_date_sk#4, d_month_seq#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#4, d_month_seq#5] - -(6) Filter [codegen id : 1] -Input [2]: [d_date_sk#4, d_month_seq#5] -Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#4] -Input [2]: [d_date_sk#4, d_month_seq#5] - -(8) BroadcastExchange -Input [1]: [d_date_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] - -(9) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_sold_date_sk#1] -Right keys [1]: [d_date_sk#4] -Join condition: None - -(10) Project [codegen id : 3] -Output [2]: [ws_item_sk#2, ws_net_paid#3] -Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3, d_date_sk#4] - -(11) Scan parquet default.item -Output [3]: [i_item_sk#7, i_class#8, i_category#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_item_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [i_item_sk#7, i_class#8, i_category#9] - -(13) Filter [codegen id : 2] -Input [3]: [i_item_sk#7, i_class#8, i_category#9] -Condition : isnotnull(i_item_sk#7) - -(14) BroadcastExchange -Input [3]: [i_item_sk#7, i_class#8, i_category#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] - -(15) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_item_sk#2] -Right keys [1]: [i_item_sk#7] -Join condition: None - -(16) Project [codegen id : 3] -Output [3]: [ws_net_paid#3, i_category#9, i_class#8] -Input [5]: [ws_item_sk#2, ws_net_paid#3, i_item_sk#7, i_class#8, i_category#9] - -(17) Expand [codegen id : 3] -Input [3]: [ws_net_paid#3, i_category#9, i_class#8] -Arguments: [List(ws_net_paid#3, i_category#9, i_class#8, 0), List(ws_net_paid#3, i_category#9, null, 1), List(ws_net_paid#3, null, null, 3)], [ws_net_paid#3, i_category#11, i_class#12, spark_grouping_id#13] - -(18) HashAggregate [codegen id : 3] -Input [4]: [ws_net_paid#3, i_category#11, i_class#12, spark_grouping_id#13] -Keys [3]: [i_category#11, i_class#12, spark_grouping_id#13] -Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#3))] -Aggregate Attributes [1]: [sum#14] -Results [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] - -(19) Exchange -Input [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] -Arguments: hashpartitioning(i_category#11, i_class#12, spark_grouping_id#13, 5), true, [id=#16] - -(20) HashAggregate [codegen id : 4] -Input [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] -Keys [3]: [i_category#11, i_class#12, spark_grouping_id#13] -Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#17] -Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#17,17,2) AS total_sum#18, i_category#11, i_class#12, (cast((shiftright(spark_grouping_id#13, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint)) AS lochierarchy#19, (cast((shiftright(spark_grouping_id#13, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint)) AS _w1#20, CASE WHEN (cast(cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint) as int) = 0) THEN i_category#11 END AS _w2#21, MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#17,17,2) AS _w3#22] - -(21) Exchange -Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] -Arguments: hashpartitioning(_w1#20, _w2#21, 5), true, [id=#23] - -(22) Sort [codegen id : 5] -Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] -Arguments: [_w1#20 ASC NULLS FIRST, _w2#21 ASC NULLS FIRST, _w3#22 DESC NULLS LAST], false, 0 - -(23) Window -Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] -Arguments: [rank(_w3#22) windowspecdefinition(_w1#20, _w2#21, _w3#22 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#24], [_w1#20, _w2#21], [_w3#22 DESC NULLS LAST] - -(24) Project [codegen id : 6] -Output [5]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] -Input [8]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22, rank_within_parent#24] - -(25) TakeOrderedAndProject -Input [5]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] -Arguments: 100, [lochierarchy#19 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#19 as int) = 0) THEN i_category#11 END ASC NULLS FIRST, rank_within_parent#24 ASC NULLS FIRST], [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] - +TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN i_category#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[total_sum#4,i_category#2,i_class#5,lochierarchy#1,rank_within_parent#3]) ++- *(6) Project [total_sum#4, i_category#2, i_class#5, lochierarchy#1, rank_within_parent#3] + +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] + +- *(5) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 + +- Exchange hashpartitioning(_w1#7, _w2#8, 5) + +- *(4) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ws_net_paid#10))]) + +- Exchange hashpartitioning(i_category#2, i_class#5, spark_grouping_id#9, 5) + +- *(3) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ws_net_paid#10))]) + +- *(3) Expand [List(ws_net_paid#10, i_category#11, i_class#12, 0), List(ws_net_paid#10, i_category#11, null, 1), List(ws_net_paid#10, null, null, 3)], [ws_net_paid#10, i_category#2, i_class#5, spark_grouping_id#9] + +- *(3) Project [ws_net_paid#10, i_category#13 AS i_category#11, i_class#14 AS i_class#12] + +- *(3) BroadcastHashJoin [ws_item_sk#15], [i_item_sk#16], Inner, BuildRight + :- *(3) Project [ws_item_sk#15, ws_net_paid#10] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#17, ws_item_sk#15, ws_net_paid#10] + : : +- *(3) Filter (isnotnull(ws_sold_date_sk#17) && isnotnull(ws_item_sk#15)) + : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#17,ws_item_sk#15,ws_net_paid#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#18] + : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [i_item_sk#16, i_class#14, i_category#13] + +- *(2) Filter isnotnull(i_item_sk#16) + +- *(2) FileScan parquet default.item[i_item_sk#16,i_class#14,i_category#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt index cac22cb0f..5ec040bda 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt @@ -1,39 +1,35 @@ -TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] - WholeStageCodegen (6) - Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] +TakeOrderedAndProject [i_category,i_class,lochierarchy,rank_within_parent,total_sum] + WholeStageCodegen + Project [i_category,i_class,lochierarchy,rank_within_parent,total_sum] InputAdapter - Window [_w3,_w1,_w2] - WholeStageCodegen (5) + Window [_w1,_w2,_w3] + WholeStageCodegen Sort [_w1,_w2,_w3] InputAdapter Exchange [_w1,_w2] #1 - WholeStageCodegen (4) - HashAggregate [i_category,i_class,spark_grouping_id,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,lochierarchy,_w1,_w2,_w3,sum] + WholeStageCodegen + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum(UnscaledValue(ws_net_paid))] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ws_net_paid)),total_sum] InputAdapter Exchange [i_category,i_class,spark_grouping_id] #2 - WholeStageCodegen (3) - HashAggregate [i_category,i_class,spark_grouping_id,ws_net_paid] [sum,sum] - Expand [ws_net_paid,i_category,i_class] - Project [ws_net_paid,i_category,i_class] - BroadcastHashJoin [ws_item_sk,i_item_sk] + WholeStageCodegen + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum,ws_net_paid] [sum,sum] + Expand [i_category,i_class,ws_net_paid] + Project [i_category,i_class,ws_net_paid] + BroadcastHashJoin [i_item_sk,ws_item_sk] Project [ws_item_sk,ws_net_paid] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_item_sk,ws_net_paid,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) + WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_class,i_category] + WholeStageCodegen + Project [i_category,i_class,i_item_sk] + Filter [i_item_sk] + Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt index 3d59a670b..c47ee7616 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt @@ -1,323 +1,54 @@ == Physical Plan == -* HashAggregate (54) -+- Exchange (53) - +- * HashAggregate (52) - +- * HashAggregate (51) - +- * HashAggregate (50) - +- * HashAggregate (49) - +- * HashAggregate (48) - +- * HashAggregate (47) - +- Exchange (46) - +- * HashAggregate (45) - +- * BroadcastHashJoin LeftAnti BuildRight (44) - :- * BroadcastHashJoin LeftAnti BuildRight (30) - : :- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.customer (11) - : +- BroadcastExchange (29) - : +- * HashAggregate (28) - : +- Exchange (27) - : +- * HashAggregate (26) - : +- * Project (25) - : +- * BroadcastHashJoin Inner BuildRight (24) - : :- * Project (22) - : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.catalog_sales (17) - : : +- ReusedExchange (20) - : +- ReusedExchange (23) - +- BroadcastExchange (43) - +- * HashAggregate (42) - +- Exchange (41) - +- * HashAggregate (40) - +- * Project (39) - +- * BroadcastHashJoin Inner BuildRight (38) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) - : :- * Filter (33) - : : +- * ColumnarToRow (32) - : : +- Scan parquet default.web_sales (31) - : +- ReusedExchange (34) - +- ReusedExchange (37) - - -(1) Scan parquet default.store_sales -Output [2]: [ss_sold_date_sk#1, ss_customer_sk#2] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 11] -Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] - -(3) Filter [codegen id : 11] -Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] -Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_customer_sk#2)) - -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] -Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) - -(7) Project [codegen id : 1] -Output [2]: [d_date_sk#3, d_date#4] -Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] - -(8) BroadcastExchange -Input [2]: [d_date_sk#3, d_date#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] - -(9) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#3] -Join condition: None - -(10) Project [codegen id : 11] -Output [2]: [ss_customer_sk#2, d_date#4] -Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, d_date_sk#3, d_date#4] - -(11) Scan parquet default.customer -Output [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] - -(13) Filter [codegen id : 2] -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Condition : isnotnull(c_customer_sk#7) - -(14) BroadcastExchange -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] - -(15) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#7] -Join condition: None - -(16) Project [codegen id : 11] -Output [3]: [d_date#4, c_first_name#8, c_last_name#9] -Input [5]: [ss_customer_sk#2, d_date#4, c_customer_sk#7, c_first_name#8, c_last_name#9] - -(17) Scan parquet default.catalog_sales -Output [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 5] -Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] - -(19) Filter [codegen id : 5] -Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] -Condition : (isnotnull(cs_sold_date_sk#11) AND isnotnull(cs_bill_customer_sk#12)) - -(20) ReusedExchange [Reuses operator id: 8] -Output [2]: [d_date_sk#13, d_date#14] - -(21) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#11] -Right keys [1]: [d_date_sk#13] -Join condition: None - -(22) Project [codegen id : 5] -Output [2]: [cs_bill_customer_sk#12, d_date#14] -Input [4]: [cs_sold_date_sk#11, cs_bill_customer_sk#12, d_date_sk#13, d_date#14] - -(23) ReusedExchange [Reuses operator id: 14] -Output [3]: [c_customer_sk#15, c_first_name#16, c_last_name#17] - -(24) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_bill_customer_sk#12] -Right keys [1]: [c_customer_sk#15] -Join condition: None - -(25) Project [codegen id : 5] -Output [3]: [c_last_name#17, c_first_name#16, d_date#14] -Input [5]: [cs_bill_customer_sk#12, d_date#14, c_customer_sk#15, c_first_name#16, c_last_name#17] - -(26) HashAggregate [codegen id : 5] -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#17, c_first_name#16, d_date#14] - -(27) Exchange -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Arguments: hashpartitioning(c_last_name#17, c_first_name#16, d_date#14, 5), true, [id=#18] - -(28) HashAggregate [codegen id : 6] -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#17, c_first_name#16, d_date#14] - -(29) BroadcastExchange -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#19] - -(30) BroadcastHashJoin [codegen id : 11] -Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] -Right keys [6]: [coalesce(c_last_name#17, ), isnull(c_last_name#17), coalesce(c_first_name#16, ), isnull(c_first_name#16), coalesce(d_date#14, 0), isnull(d_date#14)] -Join condition: None - -(31) Scan parquet default.web_sales -Output [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] -ReadSchema: struct - -(32) ColumnarToRow [codegen id : 9] -Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] - -(33) Filter [codegen id : 9] -Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] -Condition : (isnotnull(ws_sold_date_sk#20) AND isnotnull(ws_bill_customer_sk#21)) - -(34) ReusedExchange [Reuses operator id: 8] -Output [2]: [d_date_sk#22, d_date#23] - -(35) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_sold_date_sk#20] -Right keys [1]: [d_date_sk#22] -Join condition: None - -(36) Project [codegen id : 9] -Output [2]: [ws_bill_customer_sk#21, d_date#23] -Input [4]: [ws_sold_date_sk#20, ws_bill_customer_sk#21, d_date_sk#22, d_date#23] - -(37) ReusedExchange [Reuses operator id: 14] -Output [3]: [c_customer_sk#24, c_first_name#25, c_last_name#26] - -(38) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_bill_customer_sk#21] -Right keys [1]: [c_customer_sk#24] -Join condition: None - -(39) Project [codegen id : 9] -Output [3]: [c_last_name#26, c_first_name#25, d_date#23] -Input [5]: [ws_bill_customer_sk#21, d_date#23, c_customer_sk#24, c_first_name#25, c_last_name#26] - -(40) HashAggregate [codegen id : 9] -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#26, c_first_name#25, d_date#23] - -(41) Exchange -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Arguments: hashpartitioning(c_last_name#26, c_first_name#25, d_date#23, 5), true, [id=#27] - -(42) HashAggregate [codegen id : 10] -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#26, c_first_name#25, d_date#23] - -(43) BroadcastExchange -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#28] - -(44) BroadcastHashJoin [codegen id : 11] -Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] -Right keys [6]: [coalesce(c_last_name#26, ), isnull(c_last_name#26), coalesce(c_first_name#25, ), isnull(c_first_name#25), coalesce(d_date#23, 0), isnull(d_date#23)] -Join condition: None - -(45) HashAggregate [codegen id : 11] -Input [3]: [d_date#4, c_first_name#8, c_last_name#9] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#4] - -(46) Exchange -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#4, 5), true, [id=#29] - -(47) HashAggregate [codegen id : 12] -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#4] - -(48) HashAggregate [codegen id : 12] -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#4] - -(49) HashAggregate [codegen id : 12] -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#4] - -(50) HashAggregate [codegen id : 12] -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#4] - -(51) HashAggregate [codegen id : 12] -Input [3]: [c_last_name#9, c_first_name#8, d_date#4] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] -Functions: [] -Aggregate Attributes: [] -Results: [] - -(52) HashAggregate [codegen id : 12] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#30] -Results [1]: [count#31] - -(53) Exchange -Input [1]: [count#31] -Arguments: SinglePartition, true, [id=#32] - -(54) HashAggregate [codegen id : 13] -Input [1]: [count#31] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#33] -Results [1]: [count(1)#33 AS count(1)#34] - +*(13) HashAggregate(keys=[], functions=[count(1)]) ++- Exchange SinglePartition + +- *(12) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#4, ), coalesce(c_first_name#5, ), coalesce(d_date#6, 0)], LeftAnti, BuildRight, (((c_last_name#1 <=> c_last_name#4) && (c_first_name#2 <=> c_first_name#5)) && (d_date#3 <=> d_date#6)) + :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftAnti, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) + : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 5) + : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) + : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] + : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight + : : :- *(3) Project [ss_customer_sk#10, d_date#3] + : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] + : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [d_date_sk#13, d_date#3] + : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] + : : +- *(2) Filter isnotnull(c_customer_sk#11) + : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 5) + : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) + : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] + : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight + : :- *(6) Project [cs_bill_customer_sk#15, d_date#9] + : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight + : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] + : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) + +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 5) + +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) + +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] + +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight + :- *(10) Project [ws_bill_customer_sk#19, d_date#6] + : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight + : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] + : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) + : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt index a5b57a4ac..1a7672916 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt @@ -1,80 +1,74 @@ -WholeStageCodegen (13) - HashAggregate [count] [count(1),count(1),count] +WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #1 - WholeStageCodegen (12) - HashAggregate [count,count] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #2 - WholeStageCodegen (11) - HashAggregate [c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - Project [d_date,c_first_name,c_last_name] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] + WholeStageCodegen + HashAggregate [count,count] [count,count] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #2 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Project [d_date,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_sold_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] + WholeStageCodegen + Project [d_date,d_date_sk] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - Filter [c_customer_sk] - ColumnarToRow + WholeStageCodegen + Project [c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_sk] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #6 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_bill_customer_sk,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_sold_date_sk] InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (6) - HashAggregate [c_last_name,c_first_name,d_date] + ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,d_date] + InputAdapter + Exchange [c_first_name,c_last_name,d_date] #8 + WholeStageCodegen + HashAggregate [c_first_name,c_last_name,d_date] + Project [c_first_name,c_last_name,d_date] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Project [d_date,ws_bill_customer_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_bill_customer_sk,ws_sold_date_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #6 - WholeStageCodegen (5) - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] - InputAdapter - ReusedExchange [d_date_sk,d_date] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen (10) - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #8 - WholeStageCodegen (9) - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_bill_customer_sk,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] - InputAdapter - ReusedExchange [d_date_sk,d_date] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt index 22297e02e..d729f0b3f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt @@ -1,960 +1,165 @@ == Physical Plan == -BroadcastNestedLoopJoin Inner BuildRight (174) -:- BroadcastNestedLoopJoin Inner BuildRight (153) -: :- BroadcastNestedLoopJoin Inner BuildRight (132) -: : :- BroadcastNestedLoopJoin Inner BuildRight (111) -: : : :- BroadcastNestedLoopJoin Inner BuildRight (90) -: : : : :- BroadcastNestedLoopJoin Inner BuildRight (69) -: : : : : :- BroadcastNestedLoopJoin Inner BuildRight (48) -: : : : : : :- * HashAggregate (27) -: : : : : : : +- Exchange (26) -: : : : : : : +- * HashAggregate (25) -: : : : : : : +- * Project (24) -: : : : : : : +- * BroadcastHashJoin Inner BuildRight (23) -: : : : : : : :- * Project (17) -: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (16) -: : : : : : : : :- * Project (10) -: : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) -: : : : : : : : : :- * Filter (3) -: : : : : : : : : : +- * ColumnarToRow (2) -: : : : : : : : : : +- Scan parquet default.store_sales (1) -: : : : : : : : : +- BroadcastExchange (8) -: : : : : : : : : +- * Project (7) -: : : : : : : : : +- * Filter (6) -: : : : : : : : : +- * ColumnarToRow (5) -: : : : : : : : : +- Scan parquet default.household_demographics (4) -: : : : : : : : +- BroadcastExchange (15) -: : : : : : : : +- * Project (14) -: : : : : : : : +- * Filter (13) -: : : : : : : : +- * ColumnarToRow (12) -: : : : : : : : +- Scan parquet default.time_dim (11) -: : : : : : : +- BroadcastExchange (22) -: : : : : : : +- * Project (21) -: : : : : : : +- * Filter (20) -: : : : : : : +- * ColumnarToRow (19) -: : : : : : : +- Scan parquet default.store (18) -: : : : : : +- BroadcastExchange (47) -: : : : : : +- * HashAggregate (46) -: : : : : : +- Exchange (45) -: : : : : : +- * HashAggregate (44) -: : : : : : +- * Project (43) -: : : : : : +- * BroadcastHashJoin Inner BuildRight (42) -: : : : : : :- * Project (40) -: : : : : : : +- * BroadcastHashJoin Inner BuildRight (39) -: : : : : : : :- * Project (33) -: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (32) -: : : : : : : : :- * Filter (30) -: : : : : : : : : +- * ColumnarToRow (29) -: : : : : : : : : +- Scan parquet default.store_sales (28) -: : : : : : : : +- ReusedExchange (31) -: : : : : : : +- BroadcastExchange (38) -: : : : : : : +- * Project (37) -: : : : : : : +- * Filter (36) -: : : : : : : +- * ColumnarToRow (35) -: : : : : : : +- Scan parquet default.time_dim (34) -: : : : : : +- ReusedExchange (41) -: : : : : +- BroadcastExchange (68) -: : : : : +- * HashAggregate (67) -: : : : : +- Exchange (66) -: : : : : +- * HashAggregate (65) -: : : : : +- * Project (64) -: : : : : +- * BroadcastHashJoin Inner BuildRight (63) -: : : : : :- * Project (61) -: : : : : : +- * BroadcastHashJoin Inner BuildRight (60) -: : : : : : :- * Project (54) -: : : : : : : +- * BroadcastHashJoin Inner BuildRight (53) -: : : : : : : :- * Filter (51) -: : : : : : : : +- * ColumnarToRow (50) -: : : : : : : : +- Scan parquet default.store_sales (49) -: : : : : : : +- ReusedExchange (52) -: : : : : : +- BroadcastExchange (59) -: : : : : : +- * Project (58) -: : : : : : +- * Filter (57) -: : : : : : +- * ColumnarToRow (56) -: : : : : : +- Scan parquet default.time_dim (55) -: : : : : +- ReusedExchange (62) -: : : : +- BroadcastExchange (89) -: : : : +- * HashAggregate (88) -: : : : +- Exchange (87) -: : : : +- * HashAggregate (86) -: : : : +- * Project (85) -: : : : +- * BroadcastHashJoin Inner BuildRight (84) -: : : : :- * Project (82) -: : : : : +- * BroadcastHashJoin Inner BuildRight (81) -: : : : : :- * Project (75) -: : : : : : +- * BroadcastHashJoin Inner BuildRight (74) -: : : : : : :- * Filter (72) -: : : : : : : +- * ColumnarToRow (71) -: : : : : : : +- Scan parquet default.store_sales (70) -: : : : : : +- ReusedExchange (73) -: : : : : +- BroadcastExchange (80) -: : : : : +- * Project (79) -: : : : : +- * Filter (78) -: : : : : +- * ColumnarToRow (77) -: : : : : +- Scan parquet default.time_dim (76) -: : : : +- ReusedExchange (83) -: : : +- BroadcastExchange (110) -: : : +- * HashAggregate (109) -: : : +- Exchange (108) -: : : +- * HashAggregate (107) -: : : +- * Project (106) -: : : +- * BroadcastHashJoin Inner BuildRight (105) -: : : :- * Project (103) -: : : : +- * BroadcastHashJoin Inner BuildRight (102) -: : : : :- * Project (96) -: : : : : +- * BroadcastHashJoin Inner BuildRight (95) -: : : : : :- * Filter (93) -: : : : : : +- * ColumnarToRow (92) -: : : : : : +- Scan parquet default.store_sales (91) -: : : : : +- ReusedExchange (94) -: : : : +- BroadcastExchange (101) -: : : : +- * Project (100) -: : : : +- * Filter (99) -: : : : +- * ColumnarToRow (98) -: : : : +- Scan parquet default.time_dim (97) -: : : +- ReusedExchange (104) -: : +- BroadcastExchange (131) -: : +- * HashAggregate (130) -: : +- Exchange (129) -: : +- * HashAggregate (128) -: : +- * Project (127) -: : +- * BroadcastHashJoin Inner BuildRight (126) -: : :- * Project (124) -: : : +- * BroadcastHashJoin Inner BuildRight (123) -: : : :- * Project (117) -: : : : +- * BroadcastHashJoin Inner BuildRight (116) -: : : : :- * Filter (114) -: : : : : +- * ColumnarToRow (113) -: : : : : +- Scan parquet default.store_sales (112) -: : : : +- ReusedExchange (115) -: : : +- BroadcastExchange (122) -: : : +- * Project (121) -: : : +- * Filter (120) -: : : +- * ColumnarToRow (119) -: : : +- Scan parquet default.time_dim (118) -: : +- ReusedExchange (125) -: +- BroadcastExchange (152) -: +- * HashAggregate (151) -: +- Exchange (150) -: +- * HashAggregate (149) -: +- * Project (148) -: +- * BroadcastHashJoin Inner BuildRight (147) -: :- * Project (145) -: : +- * BroadcastHashJoin Inner BuildRight (144) -: : :- * Project (138) -: : : +- * BroadcastHashJoin Inner BuildRight (137) -: : : :- * Filter (135) -: : : : +- * ColumnarToRow (134) -: : : : +- Scan parquet default.store_sales (133) -: : : +- ReusedExchange (136) -: : +- BroadcastExchange (143) -: : +- * Project (142) -: : +- * Filter (141) -: : +- * ColumnarToRow (140) -: : +- Scan parquet default.time_dim (139) -: +- ReusedExchange (146) -+- BroadcastExchange (173) - +- * HashAggregate (172) - +- Exchange (171) - +- * HashAggregate (170) - +- * Project (169) - +- * BroadcastHashJoin Inner BuildRight (168) - :- * Project (166) - : +- * BroadcastHashJoin Inner BuildRight (165) - : :- * Project (159) - : : +- * BroadcastHashJoin Inner BuildRight (158) - : : :- * Filter (156) - : : : +- * ColumnarToRow (155) - : : : +- Scan parquet default.store_sales (154) - : : +- ReusedExchange (157) - : +- BroadcastExchange (164) - : +- * Project (163) - : +- * Filter (162) - : +- * ColumnarToRow (161) - : +- Scan parquet default.time_dim (160) - +- ReusedExchange (167) - - -(1) Scan parquet default.store_sales -Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] - -(3) Filter [codegen id : 4] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) - -(4) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,2),LessThanOrEqual(hd_vehicle_count,4))),And(EqualTo(hd_dep_count,0),LessThanOrEqual(hd_vehicle_count,2))), IsNotNull(hd_demo_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] - -(6) Filter [codegen id : 1] -Input [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] -Condition : (((((hd_dep_count#5 = 4) AND (hd_vehicle_count#6 <= 6)) OR ((hd_dep_count#5 = 2) AND (hd_vehicle_count#6 <= 4))) OR ((hd_dep_count#5 = 0) AND (hd_vehicle_count#6 <= 2))) AND isnotnull(hd_demo_sk#4)) - -(7) Project [codegen id : 1] -Output [1]: [hd_demo_sk#4] -Input [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] - -(8) BroadcastExchange -Input [1]: [hd_demo_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#4] -Join condition: None - -(10) Project [codegen id : 4] -Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] -Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] - -(11) Scan parquet default.time_dim -Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(13) Filter [codegen id : 2] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 8)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) - -(14) Project [codegen id : 2] -Output [1]: [t_time_sk#8] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(15) BroadcastExchange -Input [1]: [t_time_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] - -(16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_time_sk#1] -Right keys [1]: [t_time_sk#8] -Join condition: None - -(17) Project [codegen id : 4] -Output [1]: [ss_store_sk#3] -Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] - -(18) Scan parquet default.store -Output [2]: [s_store_sk#12, s_store_name#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#12, s_store_name#13] - -(20) Filter [codegen id : 3] -Input [2]: [s_store_sk#12, s_store_name#13] -Condition : ((isnotnull(s_store_name#13) AND (s_store_name#13 = ese)) AND isnotnull(s_store_sk#12)) - -(21) Project [codegen id : 3] -Output [1]: [s_store_sk#12] -Input [2]: [s_store_sk#12, s_store_name#13] - -(22) BroadcastExchange -Input [1]: [s_store_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] - -(23) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] -Join condition: None - -(24) Project [codegen id : 4] -Output: [] -Input [2]: [ss_store_sk#3, s_store_sk#12] - -(25) HashAggregate [codegen id : 4] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#15] -Results [1]: [count#16] - -(26) Exchange -Input [1]: [count#16] -Arguments: SinglePartition, true, [id=#17] - -(27) HashAggregate [codegen id : 5] -Input [1]: [count#16] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#18] -Results [1]: [count(1)#18 AS h8_30_to_9#19] - -(28) Scan parquet default.store_sales -Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 9] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] - -(30) Filter [codegen id : 9] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) - -(31) ReusedExchange [Reuses operator id: 8] -Output [1]: [hd_demo_sk#4] - -(32) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#4] -Join condition: None - -(33) Project [codegen id : 9] -Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] -Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] - -(34) Scan parquet default.time_dim -Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_time_sk)] -ReadSchema: struct - -(35) ColumnarToRow [codegen id : 7] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(36) Filter [codegen id : 7] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 9)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) - -(37) Project [codegen id : 7] -Output [1]: [t_time_sk#8] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(38) BroadcastExchange -Input [1]: [t_time_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] - -(39) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_sold_time_sk#1] -Right keys [1]: [t_time_sk#8] -Join condition: None - -(40) Project [codegen id : 9] -Output [1]: [ss_store_sk#3] -Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] - -(41) ReusedExchange [Reuses operator id: 22] -Output [1]: [s_store_sk#12] - -(42) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] -Join condition: None - -(43) Project [codegen id : 9] -Output: [] -Input [2]: [ss_store_sk#3, s_store_sk#12] - -(44) HashAggregate [codegen id : 9] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#21] -Results [1]: [count#22] - -(45) Exchange -Input [1]: [count#22] -Arguments: SinglePartition, true, [id=#23] - -(46) HashAggregate [codegen id : 10] -Input [1]: [count#22] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#24] -Results [1]: [count(1)#24 AS h9_to_9_30#25] - -(47) BroadcastExchange -Input [1]: [h9_to_9_30#25] -Arguments: IdentityBroadcastMode, [id=#26] - -(48) BroadcastNestedLoopJoin -Join condition: None - -(49) Scan parquet default.store_sales -Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(50) ColumnarToRow [codegen id : 14] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] - -(51) Filter [codegen id : 14] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) - -(52) ReusedExchange [Reuses operator id: 8] -Output [1]: [hd_demo_sk#4] - -(53) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#4] -Join condition: None - -(54) Project [codegen id : 14] -Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] -Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] - -(55) Scan parquet default.time_dim -Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] -ReadSchema: struct - -(56) ColumnarToRow [codegen id : 12] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(57) Filter [codegen id : 12] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 9)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) - -(58) Project [codegen id : 12] -Output [1]: [t_time_sk#8] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(59) BroadcastExchange -Input [1]: [t_time_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] - -(60) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ss_sold_time_sk#1] -Right keys [1]: [t_time_sk#8] -Join condition: None - -(61) Project [codegen id : 14] -Output [1]: [ss_store_sk#3] -Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] - -(62) ReusedExchange [Reuses operator id: 22] -Output [1]: [s_store_sk#12] - -(63) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] -Join condition: None - -(64) Project [codegen id : 14] -Output: [] -Input [2]: [ss_store_sk#3, s_store_sk#12] - -(65) HashAggregate [codegen id : 14] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#28] -Results [1]: [count#29] - -(66) Exchange -Input [1]: [count#29] -Arguments: SinglePartition, true, [id=#30] - -(67) HashAggregate [codegen id : 15] -Input [1]: [count#29] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#31] -Results [1]: [count(1)#31 AS h9_30_to_10#32] - -(68) BroadcastExchange -Input [1]: [h9_30_to_10#32] -Arguments: IdentityBroadcastMode, [id=#33] - -(69) BroadcastNestedLoopJoin -Join condition: None - -(70) Scan parquet default.store_sales -Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(71) ColumnarToRow [codegen id : 19] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] - -(72) Filter [codegen id : 19] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) - -(73) ReusedExchange [Reuses operator id: 8] -Output [1]: [hd_demo_sk#4] - -(74) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#4] -Join condition: None - -(75) Project [codegen id : 19] -Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] -Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] - -(76) Scan parquet default.time_dim -Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_time_sk)] -ReadSchema: struct - -(77) ColumnarToRow [codegen id : 17] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(78) Filter [codegen id : 17] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 10)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) - -(79) Project [codegen id : 17] -Output [1]: [t_time_sk#8] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(80) BroadcastExchange -Input [1]: [t_time_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#34] - -(81) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ss_sold_time_sk#1] -Right keys [1]: [t_time_sk#8] -Join condition: None - -(82) Project [codegen id : 19] -Output [1]: [ss_store_sk#3] -Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] - -(83) ReusedExchange [Reuses operator id: 22] -Output [1]: [s_store_sk#12] - -(84) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] -Join condition: None - -(85) Project [codegen id : 19] -Output: [] -Input [2]: [ss_store_sk#3, s_store_sk#12] - -(86) HashAggregate [codegen id : 19] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#35] -Results [1]: [count#36] - -(87) Exchange -Input [1]: [count#36] -Arguments: SinglePartition, true, [id=#37] - -(88) HashAggregate [codegen id : 20] -Input [1]: [count#36] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#38] -Results [1]: [count(1)#38 AS h10_to_10_30#39] - -(89) BroadcastExchange -Input [1]: [h10_to_10_30#39] -Arguments: IdentityBroadcastMode, [id=#40] - -(90) BroadcastNestedLoopJoin -Join condition: None - -(91) Scan parquet default.store_sales -Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(92) ColumnarToRow [codegen id : 24] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] - -(93) Filter [codegen id : 24] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) - -(94) ReusedExchange [Reuses operator id: 8] -Output [1]: [hd_demo_sk#4] - -(95) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#4] -Join condition: None - -(96) Project [codegen id : 24] -Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] -Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] - -(97) Scan parquet default.time_dim -Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] -ReadSchema: struct - -(98) ColumnarToRow [codegen id : 22] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(99) Filter [codegen id : 22] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 10)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) - -(100) Project [codegen id : 22] -Output [1]: [t_time_sk#8] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(101) BroadcastExchange -Input [1]: [t_time_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#41] - -(102) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ss_sold_time_sk#1] -Right keys [1]: [t_time_sk#8] -Join condition: None - -(103) Project [codegen id : 24] -Output [1]: [ss_store_sk#3] -Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] - -(104) ReusedExchange [Reuses operator id: 22] -Output [1]: [s_store_sk#12] - -(105) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] -Join condition: None - -(106) Project [codegen id : 24] -Output: [] -Input [2]: [ss_store_sk#3, s_store_sk#12] - -(107) HashAggregate [codegen id : 24] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#42] -Results [1]: [count#43] - -(108) Exchange -Input [1]: [count#43] -Arguments: SinglePartition, true, [id=#44] - -(109) HashAggregate [codegen id : 25] -Input [1]: [count#43] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#45] -Results [1]: [count(1)#45 AS h10_30_to_11#46] - -(110) BroadcastExchange -Input [1]: [h10_30_to_11#46] -Arguments: IdentityBroadcastMode, [id=#47] - -(111) BroadcastNestedLoopJoin -Join condition: None - -(112) Scan parquet default.store_sales -Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(113) ColumnarToRow [codegen id : 29] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] - -(114) Filter [codegen id : 29] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) - -(115) ReusedExchange [Reuses operator id: 8] -Output [1]: [hd_demo_sk#4] - -(116) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#4] -Join condition: None - -(117) Project [codegen id : 29] -Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] -Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] - -(118) Scan parquet default.time_dim -Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_time_sk)] -ReadSchema: struct - -(119) ColumnarToRow [codegen id : 27] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(120) Filter [codegen id : 27] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 11)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) - -(121) Project [codegen id : 27] -Output [1]: [t_time_sk#8] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(122) BroadcastExchange -Input [1]: [t_time_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48] - -(123) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ss_sold_time_sk#1] -Right keys [1]: [t_time_sk#8] -Join condition: None - -(124) Project [codegen id : 29] -Output [1]: [ss_store_sk#3] -Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] - -(125) ReusedExchange [Reuses operator id: 22] -Output [1]: [s_store_sk#12] - -(126) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] -Join condition: None - -(127) Project [codegen id : 29] -Output: [] -Input [2]: [ss_store_sk#3, s_store_sk#12] - -(128) HashAggregate [codegen id : 29] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#49] -Results [1]: [count#50] - -(129) Exchange -Input [1]: [count#50] -Arguments: SinglePartition, true, [id=#51] - -(130) HashAggregate [codegen id : 30] -Input [1]: [count#50] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#52] -Results [1]: [count(1)#52 AS h11_to_11_30#53] - -(131) BroadcastExchange -Input [1]: [h11_to_11_30#53] -Arguments: IdentityBroadcastMode, [id=#54] - -(132) BroadcastNestedLoopJoin -Join condition: None - -(133) Scan parquet default.store_sales -Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(134) ColumnarToRow [codegen id : 34] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] - -(135) Filter [codegen id : 34] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) - -(136) ReusedExchange [Reuses operator id: 8] -Output [1]: [hd_demo_sk#4] - -(137) BroadcastHashJoin [codegen id : 34] -Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#4] -Join condition: None - -(138) Project [codegen id : 34] -Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] -Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] - -(139) Scan parquet default.time_dim -Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] -ReadSchema: struct - -(140) ColumnarToRow [codegen id : 32] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(141) Filter [codegen id : 32] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 11)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) - -(142) Project [codegen id : 32] -Output [1]: [t_time_sk#8] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(143) BroadcastExchange -Input [1]: [t_time_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#55] - -(144) BroadcastHashJoin [codegen id : 34] -Left keys [1]: [ss_sold_time_sk#1] -Right keys [1]: [t_time_sk#8] -Join condition: None - -(145) Project [codegen id : 34] -Output [1]: [ss_store_sk#3] -Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] - -(146) ReusedExchange [Reuses operator id: 22] -Output [1]: [s_store_sk#12] - -(147) BroadcastHashJoin [codegen id : 34] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] -Join condition: None - -(148) Project [codegen id : 34] -Output: [] -Input [2]: [ss_store_sk#3, s_store_sk#12] - -(149) HashAggregate [codegen id : 34] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#56] -Results [1]: [count#57] - -(150) Exchange -Input [1]: [count#57] -Arguments: SinglePartition, true, [id=#58] - -(151) HashAggregate [codegen id : 35] -Input [1]: [count#57] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#59] -Results [1]: [count(1)#59 AS h11_30_to_12#60] - -(152) BroadcastExchange -Input [1]: [h11_30_to_12#60] -Arguments: IdentityBroadcastMode, [id=#61] - -(153) BroadcastNestedLoopJoin -Join condition: None - -(154) Scan parquet default.store_sales -Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(155) ColumnarToRow [codegen id : 39] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] - -(156) Filter [codegen id : 39] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) - -(157) ReusedExchange [Reuses operator id: 8] -Output [1]: [hd_demo_sk#4] - -(158) BroadcastHashJoin [codegen id : 39] -Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#4] -Join condition: None - -(159) Project [codegen id : 39] -Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] -Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] - -(160) Scan parquet default.time_dim -Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_time_sk)] -ReadSchema: struct - -(161) ColumnarToRow [codegen id : 37] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(162) Filter [codegen id : 37] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 12)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) - -(163) Project [codegen id : 37] -Output [1]: [t_time_sk#8] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] - -(164) BroadcastExchange -Input [1]: [t_time_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#62] - -(165) BroadcastHashJoin [codegen id : 39] -Left keys [1]: [ss_sold_time_sk#1] -Right keys [1]: [t_time_sk#8] -Join condition: None - -(166) Project [codegen id : 39] -Output [1]: [ss_store_sk#3] -Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] - -(167) ReusedExchange [Reuses operator id: 22] -Output [1]: [s_store_sk#12] - -(168) BroadcastHashJoin [codegen id : 39] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] -Join condition: None - -(169) Project [codegen id : 39] -Output: [] -Input [2]: [ss_store_sk#3, s_store_sk#12] - -(170) HashAggregate [codegen id : 39] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#63] -Results [1]: [count#64] - -(171) Exchange -Input [1]: [count#64] -Arguments: SinglePartition, true, [id=#65] - -(172) HashAggregate [codegen id : 40] -Input [1]: [count#64] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#66] -Results [1]: [count(1)#66 AS h12_to_12_30#67] - -(173) BroadcastExchange -Input [1]: [h12_to_12_30#67] -Arguments: IdentityBroadcastMode, [id=#68] - -(174) BroadcastNestedLoopJoin -Join condition: None - +BroadcastNestedLoopJoin BuildRight, Inner +:- BroadcastNestedLoopJoin BuildRight, Inner +: :- BroadcastNestedLoopJoin BuildRight, Inner +: : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : : : :- BroadcastNestedLoopJoin BuildRight, Inner +: : : : : : :- *(5) HashAggregate(keys=[], functions=[count(1)]) +: : : : : : : +- Exchange SinglePartition +: : : : : : : +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : : : : +- *(4) Project +: : : : : : : +- *(4) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : : : : :- *(4) Project [ss_store_sk#1] +: : : : : : : : +- *(4) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : : : : :- *(4) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : : : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : : : : :- *(4) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : : : +- *(1) Project [hd_demo_sk#6] +: : : : : : : : : +- *(1) Filter (((((hd_dep_count#7 = 4) && (hd_vehicle_count#8 <= 6)) || ((hd_dep_count#7 = 2) && (hd_vehicle_count#8 <= 4))) || ((hd_dep_count#7 = 0) && (hd_vehicle_count#8 <= 2))) && isnotnull(hd_demo_sk#6)) +: : : : : : : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#6,hd_dep_count#7,hd_vehicle_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,..., ReadSchema: struct +: : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : : +- *(2) Project [t_time_sk#4] +: : : : : : : : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 8)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : : : : : : : +- *(2) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct +: : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : +- *(3) Project [s_store_sk#2] +: : : : : : : +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#2)) +: : : : : : : +- *(3) FileScan parquet default.store[s_store_sk#2,s_store_name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct +: : : : : : +- BroadcastExchange IdentityBroadcastMode +: : : : : : +- *(10) HashAggregate(keys=[], functions=[count(1)]) +: : : : : : +- Exchange SinglePartition +: : : : : : +- *(9) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : : : +- *(9) Project +: : : : : : +- *(9) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : : : :- *(9) Project [ss_store_sk#1] +: : : : : : : +- *(9) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : : : :- *(9) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : : : +- *(9) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : : : :- *(9) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : : : +- *(9) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : : : +- *(9) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : : +- *(7) Project [t_time_sk#4] +: : : : : : : +- *(7) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) +: : : : : : : +- *(7) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_ti..., ReadSchema: struct +: : : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : +- BroadcastExchange IdentityBroadcastMode +: : : : : +- *(15) HashAggregate(keys=[], functions=[count(1)]) +: : : : : +- Exchange SinglePartition +: : : : : +- *(14) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : : +- *(14) Project +: : : : : +- *(14) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : : :- *(14) Project [ss_store_sk#1] +: : : : : : +- *(14) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : : :- *(14) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : : +- *(14) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : : :- *(14) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : : +- *(14) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : : +- *(12) Project [t_time_sk#4] +: : : : : : +- *(12) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : : : : : +- *(12) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct +: : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : +- BroadcastExchange IdentityBroadcastMode +: : : : +- *(20) HashAggregate(keys=[], functions=[count(1)]) +: : : : +- Exchange SinglePartition +: : : : +- *(19) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : : +- *(19) Project +: : : : +- *(19) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : : :- *(19) Project [ss_store_sk#1] +: : : : : +- *(19) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : : :- *(19) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : : +- *(19) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : : :- *(19) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : : +- *(19) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : : +- *(19) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : : +- *(17) Project [t_time_sk#4] +: : : : : +- *(17) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) +: : : : : +- *(17) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct +: : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : +- BroadcastExchange IdentityBroadcastMode +: : : +- *(25) HashAggregate(keys=[], functions=[count(1)]) +: : : +- Exchange SinglePartition +: : : +- *(24) HashAggregate(keys=[], functions=[partial_count(1)]) +: : : +- *(24) Project +: : : +- *(24) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : : :- *(24) Project [ss_store_sk#1] +: : : : +- *(24) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : : :- *(24) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : : +- *(24) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : : :- *(24) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : : +- *(24) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : : +- *(22) Project [t_time_sk#4] +: : : : +- *(22) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : : : +- *(22) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct +: : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : +- BroadcastExchange IdentityBroadcastMode +: : +- *(30) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(29) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(29) Project +: : +- *(29) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: : :- *(29) Project [ss_store_sk#1] +: : : +- *(29) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : : :- *(29) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : : +- *(29) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : : :- *(29) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : : +- *(29) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : : +- *(29) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : : +- *(27) Project [t_time_sk#4] +: : : +- *(27) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) +: : : +- *(27) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct +: : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: +- BroadcastExchange IdentityBroadcastMode +: +- *(35) HashAggregate(keys=[], functions=[count(1)]) +: +- Exchange SinglePartition +: +- *(34) HashAggregate(keys=[], functions=[partial_count(1)]) +: +- *(34) Project +: +- *(34) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight +: :- *(34) Project [ss_store_sk#1] +: : +- *(34) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight +: : :- *(34) Project [ss_sold_time_sk#3, ss_store_sk#1] +: : : +- *(34) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight +: : : :- *(34) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] +: : : : +- *(34) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) +: : : : +- *(34) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +: : +- *(32) Project [t_time_sk#4] +: : +- *(32) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) +: : +- *(32) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct +: +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) ++- BroadcastExchange IdentityBroadcastMode + +- *(40) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(39) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(39) Project + +- *(39) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight + :- *(39) Project [ss_store_sk#1] + : +- *(39) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight + : :- *(39) Project [ss_sold_time_sk#3, ss_store_sk#1] + : : +- *(39) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight + : : :- *(39) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] + : : : +- *(39) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) + : : : +- *(39) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(37) Project [t_time_sk#4] + : +- *(37) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 12)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) + : +- *(37) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct + +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt index 8e72594b0..77bbef312 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt @@ -5,246 +5,218 @@ BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin - WholeStageCodegen (5) - HashAggregate [count] [count(1),h8_30_to_9,count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),h8_30_to_9] InputAdapter Exchange #1 - WholeStageCodegen (4) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [s_store_sk] Filter [s_store_name,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name] + Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] BroadcastExchange #5 - WholeStageCodegen (10) - HashAggregate [count] [count(1),h9_to_9_30,count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),h9_to_9_30] InputAdapter Exchange #6 - WholeStageCodegen (9) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter BroadcastExchange #7 - WholeStageCodegen (7) + WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter - ReusedExchange [s_store_sk] #4 + ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #8 - WholeStageCodegen (15) - HashAggregate [count] [count(1),h9_30_to_10,count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),h9_30_to_10] InputAdapter Exchange #9 - WholeStageCodegen (14) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter BroadcastExchange #10 - WholeStageCodegen (12) + WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter - ReusedExchange [s_store_sk] #4 + ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #11 - WholeStageCodegen (20) - HashAggregate [count] [count(1),h10_to_10_30,count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),h10_to_10_30] InputAdapter Exchange #12 - WholeStageCodegen (19) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter BroadcastExchange #13 - WholeStageCodegen (17) + WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter - ReusedExchange [s_store_sk] #4 + ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #14 - WholeStageCodegen (25) - HashAggregate [count] [count(1),h10_30_to_11,count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),h10_30_to_11] InputAdapter Exchange #15 - WholeStageCodegen (24) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter BroadcastExchange #16 - WholeStageCodegen (22) + WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter - ReusedExchange [s_store_sk] #4 + ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #17 - WholeStageCodegen (30) - HashAggregate [count] [count(1),h11_to_11_30,count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),h11_to_11_30] InputAdapter Exchange #18 - WholeStageCodegen (29) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter BroadcastExchange #19 - WholeStageCodegen (27) + WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter - ReusedExchange [s_store_sk] #4 + ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #20 - WholeStageCodegen (35) - HashAggregate [count] [count(1),h11_30_to_12,count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),h11_30_to_12] InputAdapter Exchange #21 - WholeStageCodegen (34) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter BroadcastExchange #22 - WholeStageCodegen (32) + WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter - ReusedExchange [s_store_sk] #4 + ReusedExchange [s_store_sk] [s_store_sk] #4 BroadcastExchange #23 - WholeStageCodegen (40) - HashAggregate [count] [count(1),h12_to_12_30,count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),h12_to_12_30] InputAdapter Exchange #24 - WholeStageCodegen (39) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter BroadcastExchange #25 - WholeStageCodegen (37) + WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter - ReusedExchange [s_store_sk] #4 + ReusedExchange [s_store_sk] [s_store_sk] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt index 8dca84461..fb0d68dce 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt @@ -1,175 +1,31 @@ == Physical Plan == -TakeOrderedAndProject (31) -+- * Project (30) - +- * Filter (29) - +- Window (28) - +- * Sort (27) - +- Exchange (26) - +- * HashAggregate (25) - +- Exchange (24) - +- * HashAggregate (23) - +- * Project (22) - +- * BroadcastHashJoin Inner BuildRight (21) - :- * Project (16) - : +- * BroadcastHashJoin Inner BuildRight (15) - : :- * Project (9) - : : +- * BroadcastHashJoin Inner BuildRight (8) - : : :- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.item (1) - : : +- BroadcastExchange (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.store_sales (4) - : +- BroadcastExchange (14) - : +- * Project (13) - : +- * Filter (12) - : +- * ColumnarToRow (11) - : +- Scan parquet default.date_dim (10) - +- BroadcastExchange (20) - +- * Filter (19) - +- * ColumnarToRow (18) - +- Scan parquet default.store (17) - - -(1) Scan parquet default.item -Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [Or(And(In(i_category, [Books,Electronics,Sports]),In(i_class, [computers,stereo,football])),And(In(i_category, [Men,Jewelry,Women]),In(i_class, [shirts,birdal,dresses]))), IsNotNull(i_item_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] - -(3) Filter [codegen id : 4] -Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] -Condition : (((i_category#4 IN (Books,Electronics,Sports) AND i_class#3 IN (computers,stereo,football)) OR (i_category#4 IN (Men,Jewelry,Women) AND i_class#3 IN (shirts,birdal,dresses))) AND isnotnull(i_item_sk#1)) - -(4) Scan parquet default.store_sales -Output [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] - -(6) Filter [codegen id : 1] -Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] -Condition : ((isnotnull(ss_item_sk#6) AND isnotnull(ss_sold_date_sk#5)) AND isnotnull(ss_store_sk#7)) - -(7) BroadcastExchange -Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#9] - -(8) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#6] -Join condition: None - -(9) Project [codegen id : 4] -Output [6]: [i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_store_sk#7, ss_sales_price#8] -Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] - -(10) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] - -(12) Filter [codegen id : 2] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Condition : ((isnotnull(d_year#11) AND (d_year#11 = 1999)) AND isnotnull(d_date_sk#10)) - -(13) Project [codegen id : 2] -Output [2]: [d_date_sk#10, d_moy#12] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] - -(14) BroadcastExchange -Input [2]: [d_date_sk#10, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#5] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(16) Project [codegen id : 4] -Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, d_moy#12] -Input [8]: [i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_store_sk#7, ss_sales_price#8, d_date_sk#10, d_moy#12] - -(17) Scan parquet default.store -Output [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] - -(19) Filter [codegen id : 3] -Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] -Condition : isnotnull(s_store_sk#14) - -(20) BroadcastExchange -Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] - -(21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#7] -Right keys [1]: [s_store_sk#14] -Join condition: None - -(22) Project [codegen id : 4] -Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#8, d_moy#12, s_store_name#15, s_company_name#16] -Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, d_moy#12, s_store_sk#14, s_store_name#15, s_company_name#16] - -(23) HashAggregate [codegen id : 4] -Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#8, d_moy#12, s_store_name#15, s_company_name#16] -Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#8))] -Aggregate Attributes [1]: [sum#18] -Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] - -(24) Exchange -Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] -Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, 5), true, [id=#20] - -(25) HashAggregate [codegen id : 5] -Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] -Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12] -Functions [1]: [sum(UnscaledValue(ss_sales_price#8))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#8))#21] -Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#8))#21,17,2) AS sum_sales#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#8))#21,17,2) AS _w0#23] - -(26) Exchange -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] -Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#15, s_company_name#16, 5), true, [id=#24] - -(27) Sort [codegen id : 6] -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] -Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#15 ASC NULLS FIRST, s_company_name#16 ASC NULLS FIRST], false, 0 - -(28) Window -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] -Arguments: [avg(_w0#23) windowspecdefinition(i_category#4, i_brand#2, s_store_name#15, s_company_name#16, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#4, i_brand#2, s_store_name#15, s_company_name#16] - -(29) Filter [codegen id : 7] -Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23, avg_monthly_sales#25] -Condition : (CASE WHEN NOT (avg_monthly_sales#25 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) - -(30) Project [codegen id : 7] -Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] -Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23, avg_monthly_sales#25] - -(31) TakeOrderedAndProject -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#15 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] - +TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_class#5,i_brand#6,s_store_name#3,s_company_name#7,d_moy#8,sum_sales#1,avg_monthly_sales#2]) ++- *(7) Project [i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, sum_sales#1, avg_monthly_sales#2] + +- *(7) Filter (CASE WHEN NOT (avg_monthly_sales#2 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) + +- Window [avg(_w0#9) windowspecdefinition(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#6, s_store_name#3, s_company_name#7] + +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#6 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#7 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, 5) + +- *(5) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#10))]) + +- Exchange hashpartitioning(i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, 5) + +- *(4) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#10))]) + +- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_sales_price#10, d_moy#8, s_store_name#3, s_company_name#7] + +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight + :- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_store_sk#11, ss_sales_price#10, d_moy#8] + : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight + : :- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_sold_date_sk#13, ss_store_sk#11, ss_sales_price#10] + : : +- *(4) BroadcastHashJoin [i_item_sk#15], [ss_item_sk#16], Inner, BuildRight + : : :- *(4) Project [i_item_sk#15, i_brand#6, i_class#5, i_category#4] + : : : +- *(4) Filter (((i_category#4 IN (Books,Electronics,Sports) && i_class#5 IN (computers,stereo,football)) || (i_category#4 IN (Men,Jewelry,Women) && i_class#5 IN (shirts,birdal,dresses))) && isnotnull(i_item_sk#15)) + : : : +- *(4) FileScan parquet default.item[i_item_sk#15,i_brand#6,i_class#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(In(i_category, [Books,Electronics,Sports]),In(i_class, [computers,stereo,football])),And(..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- *(1) Project [ss_sold_date_sk#13, ss_item_sk#16, ss_store_sk#11, ss_sales_price#10] + : : +- *(1) Filter ((isnotnull(ss_item_sk#16) && isnotnull(ss_sold_date_sk#13)) && isnotnull(ss_store_sk#11)) + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#16,ss_store_sk#11,ss_sales_price#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#14, d_moy#8] + : +- *(2) Filter ((isnotnull(d_year#17) && (d_year#17 = 1999)) && isnotnull(d_date_sk#14)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#14,d_year#17,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#12, s_store_name#3, s_company_name#7] + +- *(3) Filter isnotnull(s_store_sk#12) + +- *(3) FileScan parquet default.store[s_store_sk#12,s_store_name#3,s_company_name#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt index efeab7f69..33e1ef0af 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt @@ -1,48 +1,43 @@ -TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_class,i_brand,s_company_name,d_moy] - WholeStageCodegen (7) - Project [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum_sales,avg_monthly_sales] +TakeOrderedAndProject [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] + WholeStageCodegen + Project [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] Filter [avg_monthly_sales,sum_sales] InputAdapter - Window [_w0,i_category,i_brand,s_store_name,s_company_name] - WholeStageCodegen (6) - Sort [i_category,i_brand,s_store_name,s_company_name] + Window [_w0,i_brand,i_category,s_company_name,s_store_name] + WholeStageCodegen + Sort [i_brand,i_category,s_company_name,s_store_name] InputAdapter - Exchange [i_category,i_brand,s_store_name,s_company_name] #1 - WholeStageCodegen (5) - HashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + Exchange [i_brand,i_category,s_company_name,s_store_name] #1 + WholeStageCodegen + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] InputAdapter - Exchange [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy] #2 - WholeStageCodegen (4) - HashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,ss_sales_price] [sum,sum] - Project [i_brand,i_class,i_category,ss_sales_price,d_moy,s_store_name,s_company_name] - BroadcastHashJoin [ss_store_sk,s_store_sk] - Project [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,d_moy] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [i_brand,i_class,i_category,ss_sold_date_sk,ss_store_sk,ss_sales_price] + Exchange [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name] #2 + WholeStageCodegen + HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] + Project [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] + BroadcastHashJoin [s_store_sk,ss_store_sk] + Project [d_moy,i_brand,i_category,i_class,ss_sales_price,ss_store_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_brand,i_category,i_class,ss_sales_price,ss_sold_date_sk,ss_store_sk] BroadcastHashJoin [i_item_sk,ss_item_sk] - Filter [i_category,i_class,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_brand,i_class,i_category] + Project [i_brand,i_category,i_class,i_item_sk] + Filter [i_category,i_class,i_item_sk] + Scan parquet default.item [i_brand,i_category,i_class,i_item_sk] [i_brand,i_category,i_class,i_item_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + WholeStageCodegen + Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk,d_moy] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) - Filter [s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name,s_company_name] + WholeStageCodegen + Project [s_company_name,s_store_name,s_store_sk] + Filter [s_store_sk] + Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt index 55f7977b2..0ed6b1727 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt @@ -1,718 +1,109 @@ == Physical Plan == -* Project (4) -+- * Filter (3) - +- * ColumnarToRow (2) - +- Scan parquet default.reason (1) - - -(1) Scan parquet default.reason -Output [1]: [r_reason_sk#1] -Batched: true -Location [not included in comparison]/{warehouse_dir}/reason] -PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 1] -Input [1]: [r_reason_sk#1] - -(3) Filter [codegen id : 1] -Input [1]: [r_reason_sk#1] -Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) - -(4) Project [codegen id : 1] -Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3] > 62316685) THEN Subquery scalar-subquery#4, [id=#5] ELSE Subquery scalar-subquery#6, [id=#7] END AS bucket1#8, CASE WHEN (Subquery scalar-subquery#9, [id=#10] > 19045798) THEN Subquery scalar-subquery#11, [id=#12] ELSE Subquery scalar-subquery#13, [id=#14] END AS bucket2#15, CASE WHEN (Subquery scalar-subquery#16, [id=#17] > 365541424) THEN Subquery scalar-subquery#18, [id=#19] ELSE Subquery scalar-subquery#20, [id=#21] END AS bucket3#22, CASE WHEN (Subquery scalar-subquery#23, [id=#24] > 216357808) THEN Subquery scalar-subquery#25, [id=#26] ELSE Subquery scalar-subquery#27, [id=#28] END AS bucket4#29, CASE WHEN (Subquery scalar-subquery#30, [id=#31] > 184483884) THEN Subquery scalar-subquery#32, [id=#33] ELSE Subquery scalar-subquery#34, [id=#35] END AS bucket5#36] -Input [1]: [r_reason_sk#1] - -===== Subqueries ===== - -Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#2, [id=#3] -* HashAggregate (11) -+- Exchange (10) - +- * HashAggregate (9) - +- * Project (8) - +- * Filter (7) - +- * ColumnarToRow (6) - +- Scan parquet default.store_sales (5) - - -(5) Scan parquet default.store_sales -Output [1]: [ss_quantity#37] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [1]: [ss_quantity#37] - -(7) Filter [codegen id : 1] -Input [1]: [ss_quantity#37] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) - -(8) Project [codegen id : 1] -Output: [] -Input [1]: [ss_quantity#37] - -(9) HashAggregate [codegen id : 1] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#38] -Results [1]: [count#39] - -(10) Exchange -Input [1]: [count#39] -Arguments: SinglePartition, true, [id=#40] - -(11) HashAggregate [codegen id : 2] -Input [1]: [count#39] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#41] -Results [1]: [count(1)#41 AS count(1)#42] - -Subquery:2 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#4, [id=#5] -* HashAggregate (18) -+- Exchange (17) - +- * HashAggregate (16) - +- * Project (15) - +- * Filter (14) - +- * ColumnarToRow (13) - +- Scan parquet default.store_sales (12) - - -(12) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] -ReadSchema: struct - -(13) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] - -(14) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) - -(15) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#43] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] - -(16) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#43] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] -Aggregate Attributes [2]: [sum#44, count#45] -Results [2]: [sum#46, count#47] - -(17) Exchange -Input [2]: [sum#46, count#47] -Arguments: SinglePartition, true, [id=#48] - -(18) HashAggregate [codegen id : 2] -Input [2]: [sum#46, count#47] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#49] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#49 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#50] - -Subquery:3 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#6, [id=#7] -* HashAggregate (25) -+- Exchange (24) - +- * HashAggregate (23) - +- * Project (22) - +- * Filter (21) - +- * ColumnarToRow (20) - +- Scan parquet default.store_sales (19) - - -(19) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_net_paid#51] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] -ReadSchema: struct - -(20) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_net_paid#51] - -(21) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_net_paid#51] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) - -(22) Project [codegen id : 1] -Output [1]: [ss_net_paid#51] -Input [2]: [ss_quantity#37, ss_net_paid#51] - -(23) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#51] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] -Aggregate Attributes [2]: [sum#52, count#53] -Results [2]: [sum#54, count#55] - -(24) Exchange -Input [2]: [sum#54, count#55] -Arguments: SinglePartition, true, [id=#56] - -(25) HashAggregate [codegen id : 2] -Input [2]: [sum#54, count#55] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#57] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#57 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#58] - -Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#9, [id=#10] -* HashAggregate (32) -+- Exchange (31) - +- * HashAggregate (30) - +- * Project (29) - +- * Filter (28) - +- * ColumnarToRow (27) - +- Scan parquet default.store_sales (26) - - -(26) Scan parquet default.store_sales -Output [1]: [ss_quantity#37] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] -ReadSchema: struct - -(27) ColumnarToRow [codegen id : 1] -Input [1]: [ss_quantity#37] - -(28) Filter [codegen id : 1] -Input [1]: [ss_quantity#37] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) - -(29) Project [codegen id : 1] -Output: [] -Input [1]: [ss_quantity#37] - -(30) HashAggregate [codegen id : 1] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#59] -Results [1]: [count#60] - -(31) Exchange -Input [1]: [count#60] -Arguments: SinglePartition, true, [id=#61] - -(32) HashAggregate [codegen id : 2] -Input [1]: [count#60] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#62] -Results [1]: [count(1)#62 AS count(1)#63] - -Subquery:5 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] -* HashAggregate (39) -+- Exchange (38) - +- * HashAggregate (37) - +- * Project (36) - +- * Filter (35) - +- * ColumnarToRow (34) - +- Scan parquet default.store_sales (33) - - -(33) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] -ReadSchema: struct - -(34) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] - -(35) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) - -(36) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#43] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] - -(37) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#43] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] -Aggregate Attributes [2]: [sum#64, count#65] -Results [2]: [sum#66, count#67] - -(38) Exchange -Input [2]: [sum#66, count#67] -Arguments: SinglePartition, true, [id=#68] - -(39) HashAggregate [codegen id : 2] -Input [2]: [sum#66, count#67] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#69] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#69 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#70] - -Subquery:6 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#13, [id=#14] -* HashAggregate (46) -+- Exchange (45) - +- * HashAggregate (44) - +- * Project (43) - +- * Filter (42) - +- * ColumnarToRow (41) - +- Scan parquet default.store_sales (40) - - -(40) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_net_paid#51] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] -ReadSchema: struct - -(41) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_net_paid#51] - -(42) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_net_paid#51] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) - -(43) Project [codegen id : 1] -Output [1]: [ss_net_paid#51] -Input [2]: [ss_quantity#37, ss_net_paid#51] - -(44) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#51] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] -Aggregate Attributes [2]: [sum#71, count#72] -Results [2]: [sum#73, count#74] - -(45) Exchange -Input [2]: [sum#73, count#74] -Arguments: SinglePartition, true, [id=#75] - -(46) HashAggregate [codegen id : 2] -Input [2]: [sum#73, count#74] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#76] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#76 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#77] - -Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#16, [id=#17] -* HashAggregate (53) -+- Exchange (52) - +- * HashAggregate (51) - +- * Project (50) - +- * Filter (49) - +- * ColumnarToRow (48) - +- Scan parquet default.store_sales (47) - - -(47) Scan parquet default.store_sales -Output [1]: [ss_quantity#37] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] -ReadSchema: struct - -(48) ColumnarToRow [codegen id : 1] -Input [1]: [ss_quantity#37] - -(49) Filter [codegen id : 1] -Input [1]: [ss_quantity#37] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) - -(50) Project [codegen id : 1] -Output: [] -Input [1]: [ss_quantity#37] - -(51) HashAggregate [codegen id : 1] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#78] -Results [1]: [count#79] - -(52) Exchange -Input [1]: [count#79] -Arguments: SinglePartition, true, [id=#80] - -(53) HashAggregate [codegen id : 2] -Input [1]: [count#79] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#81] -Results [1]: [count(1)#81 AS count(1)#82] - -Subquery:8 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#18, [id=#19] -* HashAggregate (60) -+- Exchange (59) - +- * HashAggregate (58) - +- * Project (57) - +- * Filter (56) - +- * ColumnarToRow (55) - +- Scan parquet default.store_sales (54) - - -(54) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] -ReadSchema: struct - -(55) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] - -(56) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) - -(57) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#43] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] - -(58) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#43] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] -Aggregate Attributes [2]: [sum#83, count#84] -Results [2]: [sum#85, count#86] - -(59) Exchange -Input [2]: [sum#85, count#86] -Arguments: SinglePartition, true, [id=#87] - -(60) HashAggregate [codegen id : 2] -Input [2]: [sum#85, count#86] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#88] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#88 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#89] - -Subquery:9 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#20, [id=#21] -* HashAggregate (67) -+- Exchange (66) - +- * HashAggregate (65) - +- * Project (64) - +- * Filter (63) - +- * ColumnarToRow (62) - +- Scan parquet default.store_sales (61) - - -(61) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_net_paid#51] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] -ReadSchema: struct - -(62) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_net_paid#51] - -(63) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_net_paid#51] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) - -(64) Project [codegen id : 1] -Output [1]: [ss_net_paid#51] -Input [2]: [ss_quantity#37, ss_net_paid#51] - -(65) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#51] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] -Aggregate Attributes [2]: [sum#90, count#91] -Results [2]: [sum#92, count#93] - -(66) Exchange -Input [2]: [sum#92, count#93] -Arguments: SinglePartition, true, [id=#94] - -(67) HashAggregate [codegen id : 2] -Input [2]: [sum#92, count#93] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#95] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#95 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#96] - -Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#23, [id=#24] -* HashAggregate (74) -+- Exchange (73) - +- * HashAggregate (72) - +- * Project (71) - +- * Filter (70) - +- * ColumnarToRow (69) - +- Scan parquet default.store_sales (68) - - -(68) Scan parquet default.store_sales -Output [1]: [ss_quantity#37] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] -ReadSchema: struct - -(69) ColumnarToRow [codegen id : 1] -Input [1]: [ss_quantity#37] - -(70) Filter [codegen id : 1] -Input [1]: [ss_quantity#37] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) - -(71) Project [codegen id : 1] -Output: [] -Input [1]: [ss_quantity#37] - -(72) HashAggregate [codegen id : 1] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#97] -Results [1]: [count#98] - -(73) Exchange -Input [1]: [count#98] -Arguments: SinglePartition, true, [id=#99] - -(74) HashAggregate [codegen id : 2] -Input [1]: [count#98] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#100] -Results [1]: [count(1)#100 AS count(1)#101] - -Subquery:11 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#25, [id=#26] -* HashAggregate (81) -+- Exchange (80) - +- * HashAggregate (79) - +- * Project (78) - +- * Filter (77) - +- * ColumnarToRow (76) - +- Scan parquet default.store_sales (75) - - -(75) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] -ReadSchema: struct - -(76) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] - -(77) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) - -(78) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#43] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] - -(79) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#43] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] -Aggregate Attributes [2]: [sum#102, count#103] -Results [2]: [sum#104, count#105] - -(80) Exchange -Input [2]: [sum#104, count#105] -Arguments: SinglePartition, true, [id=#106] - -(81) HashAggregate [codegen id : 2] -Input [2]: [sum#104, count#105] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#107] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#107 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#108] - -Subquery:12 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#27, [id=#28] -* HashAggregate (88) -+- Exchange (87) - +- * HashAggregate (86) - +- * Project (85) - +- * Filter (84) - +- * ColumnarToRow (83) - +- Scan parquet default.store_sales (82) - - -(82) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_net_paid#51] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] -ReadSchema: struct - -(83) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_net_paid#51] - -(84) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_net_paid#51] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) - -(85) Project [codegen id : 1] -Output [1]: [ss_net_paid#51] -Input [2]: [ss_quantity#37, ss_net_paid#51] - -(86) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#51] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] -Aggregate Attributes [2]: [sum#109, count#110] -Results [2]: [sum#111, count#112] - -(87) Exchange -Input [2]: [sum#111, count#112] -Arguments: SinglePartition, true, [id=#113] - -(88) HashAggregate [codegen id : 2] -Input [2]: [sum#111, count#112] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#114] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#114 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#115] - -Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#30, [id=#31] -* HashAggregate (95) -+- Exchange (94) - +- * HashAggregate (93) - +- * Project (92) - +- * Filter (91) - +- * ColumnarToRow (90) - +- Scan parquet default.store_sales (89) - - -(89) Scan parquet default.store_sales -Output [1]: [ss_quantity#37] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] -ReadSchema: struct - -(90) ColumnarToRow [codegen id : 1] -Input [1]: [ss_quantity#37] - -(91) Filter [codegen id : 1] -Input [1]: [ss_quantity#37] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) - -(92) Project [codegen id : 1] -Output: [] -Input [1]: [ss_quantity#37] - -(93) HashAggregate [codegen id : 1] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#116] -Results [1]: [count#117] - -(94) Exchange -Input [1]: [count#117] -Arguments: SinglePartition, true, [id=#118] - -(95) HashAggregate [codegen id : 2] -Input [1]: [count#117] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#119] -Results [1]: [count(1)#119 AS count(1)#120] - -Subquery:14 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#32, [id=#33] -* HashAggregate (102) -+- Exchange (101) - +- * HashAggregate (100) - +- * Project (99) - +- * Filter (98) - +- * ColumnarToRow (97) - +- Scan parquet default.store_sales (96) - - -(96) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] -ReadSchema: struct - -(97) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] - -(98) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) - -(99) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#43] -Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] - -(100) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#43] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] -Aggregate Attributes [2]: [sum#121, count#122] -Results [2]: [sum#123, count#124] - -(101) Exchange -Input [2]: [sum#123, count#124] -Arguments: SinglePartition, true, [id=#125] - -(102) HashAggregate [codegen id : 2] -Input [2]: [sum#123, count#124] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#126] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#126 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#127] - -Subquery:15 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#34, [id=#35] -* HashAggregate (109) -+- Exchange (108) - +- * HashAggregate (107) - +- * Project (106) - +- * Filter (105) - +- * ColumnarToRow (104) - +- Scan parquet default.store_sales (103) - - -(103) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_net_paid#51] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] -ReadSchema: struct - -(104) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_net_paid#51] - -(105) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_net_paid#51] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) - -(106) Project [codegen id : 1] -Output [1]: [ss_net_paid#51] -Input [2]: [ss_quantity#37, ss_net_paid#51] - -(107) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#51] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] -Aggregate Attributes [2]: [sum#128, count#129] -Results [2]: [sum#130, count#131] - -(108) Exchange -Input [2]: [sum#130, count#131] -Arguments: SinglePartition, true, [id=#132] - -(109) HashAggregate [codegen id : 2] -Input [2]: [sum#130, count#131] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#133] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#133 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#134] - - +*(1) Project [CASE WHEN (Subquery subquery1150 > 62316685) THEN Subquery subquery1151 ELSE Subquery subquery1152 END AS bucket1#1, CASE WHEN (Subquery subquery1154 > 19045798) THEN Subquery subquery1155 ELSE Subquery subquery1156 END AS bucket2#2, CASE WHEN (Subquery subquery1158 > 365541424) THEN Subquery subquery1159 ELSE Subquery subquery1160 END AS bucket3#3, CASE WHEN (Subquery subquery1162 > 216357808) THEN Subquery subquery1163 ELSE Subquery subquery1164 END AS bucket4#4, CASE WHEN (Subquery subquery1166 > 184483884) THEN Subquery subquery1167 ELSE Subquery subquery1168 END AS bucket5#5] +: :- Subquery subquery1150 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: :- Subquery subquery1151 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: :- Subquery subquery1152 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: :- Subquery subquery1154 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: :- Subquery subquery1155 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: :- Subquery subquery1156 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: :- Subquery subquery1158 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: :- Subquery subquery1159 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: :- Subquery subquery1160 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: :- Subquery subquery1162 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: :- Subquery subquery1163 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: :- Subquery subquery1164 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: : +- *(1) Project [ss_net_paid#8] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: :- Subquery subquery1166 +: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) +: : +- *(1) Project +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct +: :- Subquery subquery1167 +: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- Exchange SinglePartition +: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) +: : +- *(1) Project [ss_ext_discount_amt#7] +: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct +: +- Subquery subquery1168 +: +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) +: +- Exchange SinglePartition +: +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) +: +- *(1) Project [ss_net_paid#8] +: +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) +: +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct ++- *(1) Filter (isnotnull(r_reason_sk#9) && (r_reason_sk#9 = 1)) + +- *(1) FileScan parquet default.reason[r_reason_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt index 66502080d..f0bccc7ec 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt @@ -1,186 +1,154 @@ -WholeStageCodegen (1) +WholeStageCodegen Project Subquery #1 - WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #1 - WholeStageCodegen (1) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] Subquery #2 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] InputAdapter Exchange #2 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] Project [ss_ext_discount_amt] Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] Subquery #3 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] InputAdapter Exchange #3 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] Project [ss_net_paid] Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid] + Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] Subquery #4 - WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #4 - WholeStageCodegen (1) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] Subquery #5 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] InputAdapter Exchange #5 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] Project [ss_ext_discount_amt] Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] Subquery #6 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] InputAdapter Exchange #6 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] Project [ss_net_paid] Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid] + Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] Subquery #7 - WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #7 - WholeStageCodegen (1) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] Subquery #8 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] InputAdapter Exchange #8 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] Project [ss_ext_discount_amt] Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] Subquery #9 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] InputAdapter Exchange #9 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] Project [ss_net_paid] Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid] + Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] Subquery #10 - WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #10 - WholeStageCodegen (1) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] Subquery #11 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] InputAdapter Exchange #11 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] Project [ss_ext_discount_amt] Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] Subquery #12 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] InputAdapter Exchange #12 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] Project [ss_net_paid] Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid] + Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] Subquery #13 - WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #13 - WholeStageCodegen (1) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity] + Scan parquet default.store_sales [ss_quantity] [ss_quantity] Subquery #14 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] InputAdapter Exchange #14 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] Project [ss_ext_discount_amt] Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] + Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] Subquery #15 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] + WholeStageCodegen + HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] InputAdapter Exchange #15 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] + WholeStageCodegen + HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] Project [ss_net_paid] Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid] + Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] Filter [r_reason_sk] - ColumnarToRow - InputAdapter - Scan parquet default.reason [r_reason_sk] + Scan parquet default.reason [r_reason_sk] [r_reason_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt index 550bf89ce..0d0c65f33 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt @@ -1,280 +1,47 @@ == Physical Plan == -TakeOrderedAndProject (50) -+- * Project (49) - +- BroadcastNestedLoopJoin Inner BuildRight (48) - :- * HashAggregate (27) - : +- Exchange (26) - : +- * HashAggregate (25) - : +- * Project (24) - : +- * BroadcastHashJoin Inner BuildRight (23) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.web_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.household_demographics (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.time_dim (11) - : +- BroadcastExchange (22) - : +- * Project (21) - : +- * Filter (20) - : +- * ColumnarToRow (19) - : +- Scan parquet default.web_page (18) - +- BroadcastExchange (47) - +- * HashAggregate (46) - +- Exchange (45) - +- * HashAggregate (44) - +- * Project (43) - +- * BroadcastHashJoin Inner BuildRight (42) - :- * Project (40) - : +- * BroadcastHashJoin Inner BuildRight (39) - : :- * Project (33) - : : +- * BroadcastHashJoin Inner BuildRight (32) - : : :- * Filter (30) - : : : +- * ColumnarToRow (29) - : : : +- Scan parquet default.web_sales (28) - : : +- ReusedExchange (31) - : +- BroadcastExchange (38) - : +- * Project (37) - : +- * Filter (36) - : +- * ColumnarToRow (35) - : +- Scan parquet default.time_dim (34) - +- ReusedExchange (41) - - -(1) Scan parquet default.web_sales -Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] - -(3) Filter [codegen id : 4] -Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] -Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) - -(4) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#4, hd_dep_count#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [hd_demo_sk#4, hd_dep_count#5] - -(6) Filter [codegen id : 1] -Input [2]: [hd_demo_sk#4, hd_dep_count#5] -Condition : ((isnotnull(hd_dep_count#5) AND (hd_dep_count#5 = 6)) AND isnotnull(hd_demo_sk#4)) - -(7) Project [codegen id : 1] -Output [1]: [hd_demo_sk#4] -Input [2]: [hd_demo_sk#4, hd_dep_count#5] - -(8) BroadcastExchange -Input [1]: [hd_demo_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_ship_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#4] -Join condition: None - -(10) Project [codegen id : 4] -Output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] -Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#4] - -(11) Scan parquet default.time_dim -Output [2]: [t_time_sk#7, t_hour#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [2]: [t_time_sk#7, t_hour#8] - -(13) Filter [codegen id : 2] -Input [2]: [t_time_sk#7, t_hour#8] -Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 8)) AND (t_hour#8 <= 9)) AND isnotnull(t_time_sk#7)) - -(14) Project [codegen id : 2] -Output [1]: [t_time_sk#7] -Input [2]: [t_time_sk#7, t_hour#8] - -(15) BroadcastExchange -Input [1]: [t_time_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] - -(16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_sold_time_sk#1] -Right keys [1]: [t_time_sk#7] -Join condition: None - -(17) Project [codegen id : 4] -Output [1]: [ws_web_page_sk#3] -Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] - -(18) Scan parquet default.web_page -Output [2]: [wp_web_page_sk#10, wp_char_count#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_page] -PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,5200), IsNotNull(wp_web_page_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [2]: [wp_web_page_sk#10, wp_char_count#11] - -(20) Filter [codegen id : 3] -Input [2]: [wp_web_page_sk#10, wp_char_count#11] -Condition : (((isnotnull(wp_char_count#11) AND (wp_char_count#11 >= 5000)) AND (wp_char_count#11 <= 5200)) AND isnotnull(wp_web_page_sk#10)) - -(21) Project [codegen id : 3] -Output [1]: [wp_web_page_sk#10] -Input [2]: [wp_web_page_sk#10, wp_char_count#11] - -(22) BroadcastExchange -Input [1]: [wp_web_page_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(23) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_web_page_sk#3] -Right keys [1]: [wp_web_page_sk#10] -Join condition: None - -(24) Project [codegen id : 4] -Output: [] -Input [2]: [ws_web_page_sk#3, wp_web_page_sk#10] - -(25) HashAggregate [codegen id : 4] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#13] -Results [1]: [count#14] - -(26) Exchange -Input [1]: [count#14] -Arguments: SinglePartition, true, [id=#15] - -(27) HashAggregate [codegen id : 5] -Input [1]: [count#14] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#16] -Results [1]: [count(1)#16 AS amc#17] - -(28) Scan parquet default.web_sales -Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 9] -Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] - -(30) Filter [codegen id : 9] -Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] -Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) - -(31) ReusedExchange [Reuses operator id: 8] -Output [1]: [hd_demo_sk#4] - -(32) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_ship_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#4] -Join condition: None - -(33) Project [codegen id : 9] -Output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] -Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#4] - -(34) Scan parquet default.time_dim -Output [2]: [t_time_sk#7, t_hour#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)] -ReadSchema: struct - -(35) ColumnarToRow [codegen id : 7] -Input [2]: [t_time_sk#7, t_hour#8] - -(36) Filter [codegen id : 7] -Input [2]: [t_time_sk#7, t_hour#8] -Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 19)) AND (t_hour#8 <= 20)) AND isnotnull(t_time_sk#7)) - -(37) Project [codegen id : 7] -Output [1]: [t_time_sk#7] -Input [2]: [t_time_sk#7, t_hour#8] - -(38) BroadcastExchange -Input [1]: [t_time_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] - -(39) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_sold_time_sk#1] -Right keys [1]: [t_time_sk#7] -Join condition: None - -(40) Project [codegen id : 9] -Output [1]: [ws_web_page_sk#3] -Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] - -(41) ReusedExchange [Reuses operator id: 22] -Output [1]: [wp_web_page_sk#10] - -(42) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_web_page_sk#3] -Right keys [1]: [wp_web_page_sk#10] -Join condition: None - -(43) Project [codegen id : 9] -Output: [] -Input [2]: [ws_web_page_sk#3, wp_web_page_sk#10] - -(44) HashAggregate [codegen id : 9] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#19] -Results [1]: [count#20] - -(45) Exchange -Input [1]: [count#20] -Arguments: SinglePartition, true, [id=#21] - -(46) HashAggregate [codegen id : 10] -Input [1]: [count#20] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#22] -Results [1]: [count(1)#22 AS pmc#23] - -(47) BroadcastExchange -Input [1]: [pmc#23] -Arguments: IdentityBroadcastMode, [id=#24] - -(48) BroadcastNestedLoopJoin -Join condition: None - -(49) Project [codegen id : 11] -Output [1]: [CheckOverflow((promote_precision(cast(amc#17 as decimal(15,4))) / promote_precision(cast(pmc#23 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#25] -Input [2]: [amc#17, pmc#23] - -(50) TakeOrderedAndProject -Input [1]: [am_pm_ratio#25] -Arguments: 100, [am_pm_ratio#25 ASC NULLS FIRST], [am_pm_ratio#25] - +TakeOrderedAndProject(limit=100, orderBy=[am_pm_ratio#1 ASC NULLS FIRST], output=[am_pm_ratio#1]) ++- *(11) Project [CheckOverflow((promote_precision(cast(amc#2 as decimal(15,4))) / promote_precision(cast(pmc#3 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#1] + +- BroadcastNestedLoopJoin BuildRight, Inner + :- *(5) HashAggregate(keys=[], functions=[count(1)]) + : +- Exchange SinglePartition + : +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) + : +- *(4) Project + : +- *(4) BroadcastHashJoin [ws_web_page_sk#4], [wp_web_page_sk#5], Inner, BuildRight + : :- *(4) Project [ws_web_page_sk#4] + : : +- *(4) BroadcastHashJoin [ws_sold_time_sk#6], [t_time_sk#7], Inner, BuildRight + : : :- *(4) Project [ws_sold_time_sk#6, ws_web_page_sk#4] + : : : +- *(4) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight + : : : :- *(4) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] + : : : : +- *(4) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(1) Project [hd_demo_sk#9] + : : : +- *(1) Filter ((isnotnull(hd_dep_count#10) && (hd_dep_count#10 = 6)) && isnotnull(hd_demo_sk#9)) + : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_dep_count#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [t_time_sk#7] + : : +- *(2) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 8)) && (t_hour#11 <= 9)) && isnotnull(t_time_sk#7)) + : : +- *(2) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [wp_web_page_sk#5] + : +- *(3) Filter (((isnotnull(wp_char_count#12) && (wp_char_count#12 >= 5000)) && (wp_char_count#12 <= 5200)) && isnotnull(wp_web_page_sk#5)) + : +- *(3) FileScan parquet default.web_page[wp_web_page_sk#5,wp_char_count#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,..., ReadSchema: struct + +- BroadcastExchange IdentityBroadcastMode + +- *(10) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(9) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(9) Project + +- *(9) BroadcastHashJoin [ws_web_page_sk#4], [wp_web_page_sk#5], Inner, BuildRight + :- *(9) Project [ws_web_page_sk#4] + : +- *(9) BroadcastHashJoin [ws_sold_time_sk#6], [t_time_sk#7], Inner, BuildRight + : :- *(9) Project [ws_sold_time_sk#6, ws_web_page_sk#4] + : : +- *(9) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight + : : :- *(9) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] + : : : +- *(9) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) + : : : +- *(9) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + : : +- ReusedExchange [hd_demo_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [t_time_sk#7] + : +- *(7) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 19)) && (t_hour#11 <= 20)) && isnotnull(t_time_sk#7)) + : +- *(7) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)], ReadSchema: struct + +- ReusedExchange [wp_web_page_sk#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt index 121d84d9d..1d0dc7ad2 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt @@ -1,74 +1,64 @@ TakeOrderedAndProject [am_pm_ratio] - WholeStageCodegen (11) + WholeStageCodegen Project [amc,pmc] InputAdapter BroadcastNestedLoopJoin - WholeStageCodegen (5) - HashAggregate [count] [count(1),amc,count] + WholeStageCodegen + HashAggregate [count,count(1)] [amc,count,count(1)] InputAdapter Exchange #1 - WholeStageCodegen (4) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] Project [ws_web_page_sk] - BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + BroadcastHashJoin [t_time_sk,ws_sold_time_sk] Project [ws_sold_time_sk,ws_web_page_sk] - BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] - Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] + Project [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] + Filter [hd_demo_sk,hd_dep_count] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour] + Scan parquet default.time_dim [t_hour,t_time_sk] [t_hour,t_time_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [wp_web_page_sk] Filter [wp_char_count,wp_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_page [wp_web_page_sk,wp_char_count] + Scan parquet default.web_page [wp_char_count,wp_web_page_sk] [wp_char_count,wp_web_page_sk] BroadcastExchange #5 - WholeStageCodegen (10) - HashAggregate [count] [count(1),pmc,count] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),pmc] InputAdapter Exchange #6 - WholeStageCodegen (9) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] Project [ws_web_page_sk] - BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + BroadcastHashJoin [t_time_sk,ws_sold_time_sk] Project [ws_sold_time_sk,ws_web_page_sk] - BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] - Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] + BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] + Project [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] InputAdapter - ReusedExchange [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 InputAdapter BroadcastExchange #7 - WholeStageCodegen (7) + WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour] + Scan parquet default.time_dim [t_hour,t_time_sk] [t_hour,t_time_sk] InputAdapter - ReusedExchange [wp_web_page_sk] #4 + ReusedExchange [wp_web_page_sk] [wp_web_page_sk] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt index 1956baf78..fd5605d2e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt @@ -1,264 +1,45 @@ == Physical Plan == -* Sort (47) -+- Exchange (46) - +- * HashAggregate (45) - +- Exchange (44) - +- * HashAggregate (43) - +- * Project (42) - +- * BroadcastHashJoin Inner BuildRight (41) - :- * Project (35) - : +- * BroadcastHashJoin Inner BuildRight (34) - : :- * Project (29) - : : +- * BroadcastHashJoin Inner BuildRight (28) - : : :- * Project (22) - : : : +- * BroadcastHashJoin Inner BuildRight (21) - : : : :- * Project (16) - : : : : +- * BroadcastHashJoin Inner BuildRight (15) - : : : : :- * Project (9) - : : : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : : : :- * Filter (3) - : : : : : : +- * ColumnarToRow (2) - : : : : : : +- Scan parquet default.call_center (1) - : : : : : +- BroadcastExchange (7) - : : : : : +- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.catalog_returns (4) - : : : : +- BroadcastExchange (14) - : : : : +- * Project (13) - : : : : +- * Filter (12) - : : : : +- * ColumnarToRow (11) - : : : : +- Scan parquet default.date_dim (10) - : : : +- BroadcastExchange (20) - : : : +- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.customer (17) - : : +- BroadcastExchange (27) - : : +- * Project (26) - : : +- * Filter (25) - : : +- * ColumnarToRow (24) - : : +- Scan parquet default.customer_address (23) - : +- BroadcastExchange (33) - : +- * Filter (32) - : +- * ColumnarToRow (31) - : +- Scan parquet default.customer_demographics (30) - +- BroadcastExchange (40) - +- * Project (39) - +- * Filter (38) - +- * ColumnarToRow (37) - +- Scan parquet default.household_demographics (36) - - -(1) Scan parquet default.call_center -Output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/call_center] -PushedFilters: [IsNotNull(cc_call_center_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 7] -Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] - -(3) Filter [codegen id : 7] -Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] -Condition : isnotnull(cc_call_center_sk#1) - -(4) Scan parquet default.catalog_returns -Output [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] - -(6) Filter [codegen id : 1] -Input [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] -Condition : ((isnotnull(cr_call_center_sk#7) AND isnotnull(cr_returned_date_sk#5)) AND isnotnull(cr_returning_customer_sk#6)) - -(7) BroadcastExchange -Input [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#9] - -(8) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [cc_call_center_sk#1] -Right keys [1]: [cr_call_center_sk#7] -Join condition: None - -(9) Project [codegen id : 7] -Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_net_loss#8] -Input [8]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] - -(10) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] - -(12) Filter [codegen id : 2] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#12)) AND (d_year#11 = 1998)) AND (d_moy#12 = 11)) AND isnotnull(d_date_sk#10)) - -(13) Project [codegen id : 2] -Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] - -(14) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(15) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [cr_returned_date_sk#5] -Right keys [1]: [d_date_sk#10] -Join condition: None - -(16) Project [codegen id : 7] -Output [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#6, cr_net_loss#8] -Input [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_net_loss#8, d_date_sk#10] - -(17) Scan parquet default.customer -Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer] -PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] -ReadSchema: struct - -(18) ColumnarToRow [codegen id : 3] -Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] - -(19) Filter [codegen id : 3] -Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] -Condition : (((isnotnull(c_customer_sk#14) AND isnotnull(c_current_addr_sk#17)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_hdemo_sk#16)) - -(20) BroadcastExchange -Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] - -(21) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [cr_returning_customer_sk#6] -Right keys [1]: [c_customer_sk#14] -Join condition: None - -(22) Project [codegen id : 7] -Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] -Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#6, cr_net_loss#8, c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] - -(23) Scan parquet default.customer_address -Output [2]: [ca_address_sk#19, ca_gmt_offset#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(24) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#19, ca_gmt_offset#20] - -(25) Filter [codegen id : 4] -Input [2]: [ca_address_sk#19, ca_gmt_offset#20] -Condition : ((isnotnull(ca_gmt_offset#20) AND (ca_gmt_offset#20 = -7.00)) AND isnotnull(ca_address_sk#19)) - -(26) Project [codegen id : 4] -Output [1]: [ca_address_sk#19] -Input [2]: [ca_address_sk#19, ca_gmt_offset#20] - -(27) BroadcastExchange -Input [1]: [ca_address_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] - -(28) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#17] -Right keys [1]: [ca_address_sk#19] -Join condition: None - -(29) Project [codegen id : 7] -Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16] -Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17, ca_address_sk#19] - -(30) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_demographics] -PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)] -ReadSchema: struct - -(31) ColumnarToRow [codegen id : 5] -Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] - -(32) Filter [codegen id : 5] -Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] -Condition : ((((cd_marital_status#23 = M) AND (cd_education_status#24 = Unknown)) OR ((cd_marital_status#23 = W) AND (cd_education_status#24 = Advanced Degree))) AND isnotnull(cd_demo_sk#22)) - -(33) BroadcastExchange -Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] - -(34) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_cdemo_sk#15] -Right keys [1]: [cd_demo_sk#22] -Join condition: None - -(35) Project [codegen id : 7] -Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_hdemo_sk#16, cd_marital_status#23, cd_education_status#24] -Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16, cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] - -(36) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#26, hd_buy_potential#27] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)] -ReadSchema: struct - -(37) ColumnarToRow [codegen id : 6] -Input [2]: [hd_demo_sk#26, hd_buy_potential#27] - -(38) Filter [codegen id : 6] -Input [2]: [hd_demo_sk#26, hd_buy_potential#27] -Condition : ((isnotnull(hd_buy_potential#27) AND StartsWith(hd_buy_potential#27, Unknown)) AND isnotnull(hd_demo_sk#26)) - -(39) Project [codegen id : 6] -Output [1]: [hd_demo_sk#26] -Input [2]: [hd_demo_sk#26, hd_buy_potential#27] - -(40) BroadcastExchange -Input [1]: [hd_demo_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] - -(41) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_hdemo_sk#16] -Right keys [1]: [hd_demo_sk#26] -Join condition: None - -(42) Project [codegen id : 7] -Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, cd_marital_status#23, cd_education_status#24] -Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_hdemo_sk#16, cd_marital_status#23, cd_education_status#24, hd_demo_sk#26] - -(43) HashAggregate [codegen id : 7] -Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, cd_marital_status#23, cd_education_status#24] -Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24] -Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#8))] -Aggregate Attributes [1]: [sum#29] -Results [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, sum#30] - -(44) Exchange -Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, sum#30] -Arguments: hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, 5), true, [id=#31] - -(45) HashAggregate [codegen id : 8] -Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, sum#30] -Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24] -Functions [1]: [sum(UnscaledValue(cr_net_loss#8))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#8))#32] -Results [4]: [cc_call_center_id#2 AS Call_Center#33, cc_name#3 AS Call_Center_Name#34, cc_manager#4 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#8))#32,17,2) AS Returns_Loss#36] - -(46) Exchange -Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36] -Arguments: rangepartitioning(Returns_Loss#36 DESC NULLS LAST, 5), true, [id=#37] - -(47) Sort [codegen id : 9] -Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36] -Arguments: [Returns_Loss#36 DESC NULLS LAST], true, 0 - +*(9) Sort [Returns_Loss#1 DESC NULLS LAST], true, 0 ++- Exchange rangepartitioning(Returns_Loss#1 DESC NULLS LAST, 5) + +- *(8) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[sum(UnscaledValue(cr_net_loss#7))]) + +- Exchange hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6, 5) + +- *(7) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[partial_sum(UnscaledValue(cr_net_loss#7))]) + +- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#5, cd_education_status#6] + +- *(7) BroadcastHashJoin [c_current_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight + :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#8, cd_marital_status#5, cd_education_status#6] + : +- *(7) BroadcastHashJoin [c_current_cdemo_sk#10], [cd_demo_sk#11], Inner, BuildRight + : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#10, c_current_hdemo_sk#8] + : : +- *(7) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight + : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#10, c_current_hdemo_sk#8, c_current_addr_sk#12] + : : : +- *(7) BroadcastHashJoin [cr_returning_customer_sk#14], [c_customer_sk#15], Inner, BuildRight + : : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#14, cr_net_loss#7] + : : : : +- *(7) BroadcastHashJoin [cr_returned_date_sk#16], [d_date_sk#17], Inner, BuildRight + : : : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#16, cr_returning_customer_sk#14, cr_net_loss#7] + : : : : : +- *(7) BroadcastHashJoin [cc_call_center_sk#18], [cr_call_center_sk#19], Inner, BuildRight + : : : : : :- *(7) Project [cc_call_center_sk#18, cc_call_center_id#2, cc_name#3, cc_manager#4] + : : : : : : +- *(7) Filter isnotnull(cc_call_center_sk#18) + : : : : : : +- *(7) FileScan parquet default.call_center[cc_call_center_sk#18,cc_call_center_id#2,cc_name#3,cc_manager#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct + : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) + : : : : : +- *(1) Project [cr_returned_date_sk#16, cr_returning_customer_sk#14, cr_call_center_sk#19, cr_net_loss#7] + : : : : : +- *(1) Filter ((isnotnull(cr_call_center_sk#19) && isnotnull(cr_returned_date_sk#16)) && isnotnull(cr_returning_customer_sk#14)) + : : : : : +- *(1) FileScan parquet default.catalog_returns[cr_returned_date_sk#16,cr_returning_customer_sk#14,cr_call_center_sk#19,cr_net_loss#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(3) Project [c_customer_sk#15, c_current_cdemo_sk#10, c_current_hdemo_sk#8, c_current_addr_sk#12] + : : : +- *(3) Filter (((isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) && isnotnull(c_current_cdemo_sk#10)) && isnotnull(c_current_hdemo_sk#8)) + : : : +- *(3) FileScan parquet default.customer[c_customer_sk#15,c_current_cdemo_sk#10,c_current_hdemo_sk#8,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(4) Project [ca_address_sk#13] + : : +- *(4) Filter ((isnotnull(ca_gmt_offset#22) && (ca_gmt_offset#22 = -7.00)) && isnotnull(ca_address_sk#13)) + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#13,ca_gmt_offset#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(5) Project [cd_demo_sk#11, cd_marital_status#5, cd_education_status#6] + : +- *(5) Filter ((((cd_marital_status#5 = M) && (cd_education_status#6 = Unknown)) || ((cd_marital_status#5 = W) && (cd_education_status#6 = Advanced Degree))) && isnotnull(cd_demo_sk#11)) + : +- *(5) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#5,cd_education_status#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(6) Project [hd_demo_sk#9] + +- *(6) Filter ((isnotnull(hd_buy_potential#23) && StartsWith(hd_buy_potential#23, Unknown)) && isnotnull(hd_demo_sk#9)) + +- *(6) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_buy_potential#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt index 58ebe15d1..1030b9095 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt @@ -1,71 +1,61 @@ -WholeStageCodegen (9) +WholeStageCodegen Sort [Returns_Loss] InputAdapter Exchange [Returns_Loss] #1 - WholeStageCodegen (8) - HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,sum] [sum(UnscaledValue(cr_net_loss)),Call_Center,Call_Center_Name,Manager,Returns_Loss,sum] + WholeStageCodegen + HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,sum,sum(UnscaledValue(cr_net_loss))] [Call_Center,Call_Center_Name,Manager,Returns_Loss,sum,sum(UnscaledValue(cr_net_loss))] InputAdapter - Exchange [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status] #2 - WholeStageCodegen (7) - HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,cr_net_loss] [sum,sum] - Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,cd_marital_status,cd_education_status] + Exchange [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status] #2 + WholeStageCodegen + HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss,sum,sum] [sum,sum] + Project [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_hdemo_sk,cd_marital_status,cd_education_status] + Project [c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk] + Project [c_current_cdemo_sk,c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cr_net_loss] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] - BroadcastHashJoin [cr_returning_customer_sk,c_customer_sk] - Project [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss] + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cr_net_loss] + BroadcastHashJoin [c_customer_sk,cr_returning_customer_sk] + Project [cc_call_center_id,cc_manager,cc_name,cr_net_loss,cr_returning_customer_sk] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cc_call_center_id,cc_name,cc_manager,cr_returned_date_sk,cr_returning_customer_sk,cr_net_loss] + Project [cc_call_center_id,cc_manager,cc_name,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] - Filter [cc_call_center_sk] - ColumnarToRow - InputAdapter - Scan parquet default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] + Project [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] + Filter [cc_call_center_sk] + Scan parquet default.call_center [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] + WholeStageCodegen + Project [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] + Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] + Scan parquet default.catalog_returns [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + Filter [d_date_sk,d_moy,d_year] + Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) - Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + WholeStageCodegen + Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] + Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen (4) + WholeStageCodegen Project [ca_address_sk] - Filter [ca_gmt_offset,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] + Filter [ca_address_sk,ca_gmt_offset] + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #7 - WholeStageCodegen (5) - Filter [cd_marital_status,cd_education_status,cd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] + WholeStageCodegen + Project [cd_demo_sk,cd_education_status,cd_marital_status] + Filter [cd_demo_sk,cd_education_status,cd_marital_status] + Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] InputAdapter BroadcastExchange #8 - WholeStageCodegen (6) + WholeStageCodegen Project [hd_demo_sk] Filter [hd_buy_potential,hd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] + Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] [hd_buy_potential,hd_demo_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt index 8a441392f..c27934561 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt @@ -1,196 +1,33 @@ == Physical Plan == -* Sort (34) -+- * HashAggregate (33) - +- Exchange (32) - +- * HashAggregate (31) - +- * Project (30) - +- * BroadcastHashJoin Inner BuildRight (29) - :- * Project (27) - : +- * BroadcastHashJoin Inner BuildRight (26) - : :- * Project (10) - : : +- * BroadcastHashJoin Inner BuildRight (9) - : : :- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.web_sales (1) - : : +- BroadcastExchange (8) - : : +- * Project (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.item (4) - : +- BroadcastExchange (25) - : +- * Filter (24) - : +- * HashAggregate (23) - : +- Exchange (22) - : +- * HashAggregate (21) - : +- * Project (20) - : +- * BroadcastHashJoin Inner BuildRight (19) - : :- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.web_sales (11) - : +- BroadcastExchange (18) - : +- * Project (17) - : +- * Filter (16) - : +- * ColumnarToRow (15) - : +- Scan parquet default.date_dim (14) - +- ReusedExchange (28) - - -(1) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 6] -Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] - -(3) Filter [codegen id : 6] -Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] -Condition : ((isnotnull(ws_item_sk#2) AND isnotnull(ws_ext_discount_amt#3)) AND isnotnull(ws_sold_date_sk#1)) - -(4) Scan parquet default.item -Output [2]: [i_item_sk#4, i_manufact_id#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [i_item_sk#4, i_manufact_id#5] - -(6) Filter [codegen id : 1] -Input [2]: [i_item_sk#4, i_manufact_id#5] -Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 350)) AND isnotnull(i_item_sk#4)) - -(7) Project [codegen id : 1] -Output [1]: [i_item_sk#4] -Input [2]: [i_item_sk#4, i_manufact_id#5] - -(8) BroadcastExchange -Input [1]: [i_item_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] - -(9) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_item_sk#2] -Right keys [1]: [i_item_sk#4] -Join condition: None - -(10) Project [codegen id : 6] -Output [3]: [ws_sold_date_sk#1, ws_ext_discount_amt#3, i_item_sk#4] -Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3, i_item_sk#4] - -(11) Scan parquet default.web_sales -Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 3] -Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] - -(13) Filter [codegen id : 3] -Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] -Condition : (isnotnull(ws_sold_date_sk#1) AND isnotnull(ws_item_sk#2)) - -(14) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_date#8] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] -ReadSchema: struct - -(15) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#7, d_date#8] - -(16) Filter [codegen id : 2] -Input [2]: [d_date_sk#7, d_date#8] -Condition : (((isnotnull(d_date#8) AND (d_date#8 >= 10983)) AND (d_date#8 <= 11073)) AND isnotnull(d_date_sk#7)) - -(17) Project [codegen id : 2] -Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_date#8] - -(18) BroadcastExchange -Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] - -(19) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_sold_date_sk#1] -Right keys [1]: [d_date_sk#7] -Join condition: None - -(20) Project [codegen id : 3] -Output [2]: [ws_item_sk#2, ws_ext_discount_amt#3] -Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3, d_date_sk#7] - -(21) HashAggregate [codegen id : 3] -Input [2]: [ws_item_sk#2, ws_ext_discount_amt#3] -Keys [1]: [ws_item_sk#2] -Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#3))] -Aggregate Attributes [2]: [sum#10, count#11] -Results [3]: [ws_item_sk#2, sum#12, count#13] - -(22) Exchange -Input [3]: [ws_item_sk#2, sum#12, count#13] -Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#14] - -(23) HashAggregate [codegen id : 4] -Input [3]: [ws_item_sk#2, sum#12, count#13] -Keys [1]: [ws_item_sk#2] -Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#3))] -Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#3))#15] -Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#3))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2 AS ws_item_sk#2#17] - -(24) Filter [codegen id : 4] -Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2#17] -Condition : isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16) - -(25) BroadcastExchange -Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#18] - -(26) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [i_item_sk#4] -Right keys [1]: [ws_item_sk#2#17] -Join condition: (cast(ws_ext_discount_amt#3 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16) - -(27) Project [codegen id : 6] -Output [2]: [ws_sold_date_sk#1, ws_ext_discount_amt#3] -Input [5]: [ws_sold_date_sk#1, ws_ext_discount_amt#3, i_item_sk#4, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2#17] - -(28) ReusedExchange [Reuses operator id: 18] -Output [1]: [d_date_sk#7] - -(29) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#1] -Right keys [1]: [d_date_sk#7] -Join condition: None - -(30) Project [codegen id : 6] -Output [1]: [ws_ext_discount_amt#3] -Input [3]: [ws_sold_date_sk#1, ws_ext_discount_amt#3, d_date_sk#7] - -(31) HashAggregate [codegen id : 6] -Input [1]: [ws_ext_discount_amt#3] -Keys: [] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#3))] -Aggregate Attributes [1]: [sum#19] -Results [1]: [sum#20] - -(32) Exchange -Input [1]: [sum#20] -Arguments: SinglePartition, true, [id=#21] - -(33) HashAggregate [codegen id : 7] -Input [1]: [sum#20] -Keys: [] -Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))#22] -Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#3))#22,17,2) AS Excess Discount Amount #23] - -(34) Sort [codegen id : 7] -Input [1]: [Excess Discount Amount #23] -Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 - +TakeOrderedAndProject(limit=100, orderBy=[Excess Discount Amount #1 ASC NULLS FIRST], output=[Excess Discount Amount #1]) ++- *(7) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_discount_amt#2))]) + +- Exchange SinglePartition + +- *(6) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ws_ext_discount_amt#2))]) + +- *(6) Project [ws_ext_discount_amt#2] + +- *(6) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + :- *(6) Project [ws_sold_date_sk#3, ws_ext_discount_amt#2] + : +- *(6) BroadcastHashJoin [i_item_sk#5], [ws_item_sk#6#7], Inner, BuildRight, (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) + : :- *(6) Project [ws_sold_date_sk#3, ws_ext_discount_amt#2, i_item_sk#5] + : : +- *(6) BroadcastHashJoin [ws_item_sk#6], [i_item_sk#5], Inner, BuildRight + : : :- *(6) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] + : : : +- *(6) Filter ((isnotnull(ws_item_sk#6) && isnotnull(ws_ext_discount_amt#2)) && isnotnull(ws_sold_date_sk#3)) + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [i_item_sk#5] + : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 350)) && isnotnull(i_item_sk#5)) + : : +- *(1) FileScan parquet default.item[i_item_sk#5,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) + : +- *(4) HashAggregate(keys=[ws_item_sk#6], functions=[avg(UnscaledValue(ws_ext_discount_amt#2))]) + : +- Exchange hashpartitioning(ws_item_sk#6, 5) + : +- *(3) HashAggregate(keys=[ws_item_sk#6], functions=[partial_avg(UnscaledValue(ws_ext_discount_amt#2))]) + : +- *(3) Project [ws_item_sk#6, ws_ext_discount_amt#2] + : +- *(3) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight + : :- *(3) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] + : : +- *(3) Filter (isnotnull(ws_sold_date_sk#3) && isnotnull(ws_item_sk#6)) + : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [d_date_sk#4] + : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#4)) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#4,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt index 1f24a7c96..cab3234b8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt @@ -1,50 +1,44 @@ -WholeStageCodegen (7) - Sort [Excess Discount Amount ] - HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] +TakeOrderedAndProject [Excess Discount Amount ] + WholeStageCodegen + HashAggregate [sum,sum(UnscaledValue(ws_ext_discount_amt))] [Excess Discount Amount ,sum,sum(UnscaledValue(ws_ext_discount_amt))] InputAdapter Exchange #1 - WholeStageCodegen (6) - HashAggregate [ws_ext_discount_amt] [sum,sum] + WholeStageCodegen + HashAggregate [sum,sum,ws_ext_discount_amt] [sum,sum] Project [ws_ext_discount_amt] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_ext_discount_amt] - BroadcastHashJoin [i_item_sk,ws_item_sk,ws_ext_discount_amt,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] - Project [ws_sold_date_sk,ws_ext_discount_amt,i_item_sk] - BroadcastHashJoin [ws_item_sk,i_item_sk] - Filter [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_ext_discount_amt,ws_sold_date_sk] + BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),i_item_sk,ws_ext_discount_amt,ws_item_sk] + Project [i_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + BroadcastHashJoin [i_item_sk,ws_item_sk] + Project [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + Filter [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [i_item_sk] - Filter [i_manufact_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_manufact_id] + Filter [i_item_sk,i_manufact_id] + Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #3 - WholeStageCodegen (4) + WholeStageCodegen Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [ws_item_sk,sum,count] [avg(UnscaledValue(ws_ext_discount_amt)),(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),ws_item_sk,sum,count] + HashAggregate [avg(UnscaledValue(ws_ext_discount_amt)),count,sum,ws_item_sk] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(ws_ext_discount_amt)),count,sum,ws_item_sk] InputAdapter Exchange [ws_item_sk] #4 - WholeStageCodegen (3) - HashAggregate [ws_item_sk,ws_ext_discount_amt] [sum,count,sum,count] - Project [ws_item_sk,ws_ext_discount_amt] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] + WholeStageCodegen + HashAggregate [count,count,sum,sum,ws_ext_discount_amt,ws_item_sk] [count,count,sum,sum] + Project [ws_ext_discount_amt,ws_item_sk] + BroadcastHashJoin [d_date_sk,ws_sold_date_sk] + Project [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter - ReusedExchange [d_date_sk] #5 + ReusedExchange [d_date_sk] [d_date_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt index 620aa6727..5cbc9a6c1 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt @@ -1,111 +1,18 @@ == Physical Plan == -TakeOrderedAndProject (19) -+- * HashAggregate (18) - +- Exchange (17) - +- * HashAggregate (16) - +- * Project (15) - +- * BroadcastHashJoin Inner BuildRight (14) - :- * Project (8) - : +- * BroadcastHashJoin Inner BuildRight (7) - : :- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (6) - : +- * Filter (5) - : +- * ColumnarToRow (4) - : +- Scan parquet default.store_returns (3) - +- BroadcastExchange (13) - +- * Project (12) - +- * Filter (11) - +- * ColumnarToRow (10) - +- Scan parquet default.reason (9) - - -(1) Scan parquet default.store_sales -Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] - -(3) Scan parquet default.store_returns -Output [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_reason_sk)] -ReadSchema: struct - -(4) ColumnarToRow [codegen id : 1] -Input [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] - -(5) Filter [codegen id : 1] -Input [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] -Condition : ((isnotnull(sr_item_sk#6) AND isnotnull(sr_ticket_number#8)) AND isnotnull(sr_reason_sk#7)) - -(6) BroadcastExchange -Input [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[2, bigint, false]),false), [id=#10] - -(7) BroadcastHashJoin [codegen id : 3] -Left keys [2]: [cast(ss_item_sk#1 as bigint), cast(ss_ticket_number#3 as bigint)] -Right keys [2]: [sr_item_sk#6, sr_ticket_number#8] -Join condition: None - -(8) Project [codegen id : 3] -Output [5]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#7, sr_return_quantity#9] -Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] - -(9) Scan parquet default.reason -Output [2]: [r_reason_sk#11, r_reason_desc#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/reason] -PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28), IsNotNull(r_reason_sk)] -ReadSchema: struct - -(10) ColumnarToRow [codegen id : 2] -Input [2]: [r_reason_sk#11, r_reason_desc#12] - -(11) Filter [codegen id : 2] -Input [2]: [r_reason_sk#11, r_reason_desc#12] -Condition : ((isnotnull(r_reason_desc#12) AND (r_reason_desc#12 = reason 28)) AND isnotnull(r_reason_sk#11)) - -(12) Project [codegen id : 2] -Output [1]: [r_reason_sk#11] -Input [2]: [r_reason_sk#11, r_reason_desc#12] - -(13) BroadcastExchange -Input [1]: [r_reason_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(14) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [sr_reason_sk#7] -Right keys [1]: [cast(r_reason_sk#11 as bigint)] -Join condition: None - -(15) Project [codegen id : 3] -Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#9) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#4 - sr_return_quantity#9) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) END AS act_sales#14] -Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#7, sr_return_quantity#9, r_reason_sk#11] - -(16) HashAggregate [codegen id : 3] -Input [2]: [ss_customer_sk#2, act_sales#14] -Keys [1]: [ss_customer_sk#2] -Functions [1]: [partial_sum(act_sales#14)] -Aggregate Attributes [2]: [sum#15, isEmpty#16] -Results [3]: [ss_customer_sk#2, sum#17, isEmpty#18] - -(17) Exchange -Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#19] - -(18) HashAggregate [codegen id : 4] -Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] -Keys [1]: [ss_customer_sk#2] -Functions [1]: [sum(act_sales#14)] -Aggregate Attributes [1]: [sum(act_sales#14)#20] -Results [2]: [ss_customer_sk#2, sum(act_sales#14)#20 AS sumsales#21] - -(19) TakeOrderedAndProject -Input [2]: [ss_customer_sk#2, sumsales#21] -Arguments: 100, [sumsales#21 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], [ss_customer_sk#2, sumsales#21] - +TakeOrderedAndProject(limit=100, orderBy=[sumsales#1 ASC NULLS FIRST,ss_customer_sk#2 ASC NULLS FIRST], output=[ss_customer_sk#2,sumsales#1]) ++- *(4) HashAggregate(keys=[ss_customer_sk#2], functions=[sum(act_sales#3)]) + +- Exchange hashpartitioning(ss_customer_sk#2, 5) + +- *(3) HashAggregate(keys=[ss_customer_sk#2], functions=[partial_sum(act_sales#3)]) + +- *(3) Project [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#5 - sr_return_quantity#4) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#5 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#3] + +- *(3) BroadcastHashJoin [sr_reason_sk#7], [cast(r_reason_sk#8 as bigint)], Inner, BuildRight + :- *(3) Project [ss_customer_sk#2, ss_quantity#5, ss_sales_price#6, sr_reason_sk#7, sr_return_quantity#4] + : +- *(3) BroadcastHashJoin [cast(ss_item_sk#9 as bigint), cast(ss_ticket_number#10 as bigint)], [sr_item_sk#11, sr_ticket_number#12], Inner, BuildRight + : :- *(3) FileScan parquet default.store_sales[ss_item_sk#9,ss_customer_sk#2,ss_ticket_number#10,ss_quantity#5,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(2) Project [r_reason_sk#8] + +- *(2) Filter ((isnotnull(r_reason_desc#13) && (r_reason_desc#13 = reason 28)) && isnotnull(r_reason_sk#8)) + +- *(2) FileScan parquet default.reason[r_reason_sk#8,r_reason_desc#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28), IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt index 81de31a44..6b2505ff0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt @@ -1,29 +1,24 @@ -TakeOrderedAndProject [sumsales,ss_customer_sk] - WholeStageCodegen (4) - HashAggregate [ss_customer_sk,sum,isEmpty] [sum(act_sales),sumsales,sum,isEmpty] +TakeOrderedAndProject [ss_customer_sk,sumsales] + WholeStageCodegen + HashAggregate [ss_customer_sk,sum,sum(act_sales)] [sum,sum(act_sales),sumsales] InputAdapter Exchange [ss_customer_sk] #1 - WholeStageCodegen (3) - HashAggregate [ss_customer_sk,act_sales] [sum,isEmpty,sum,isEmpty] - Project [ss_customer_sk,sr_return_quantity,ss_quantity,ss_sales_price] - BroadcastHashJoin [sr_reason_sk,r_reason_sk] - Project [ss_customer_sk,ss_quantity,ss_sales_price,sr_reason_sk,sr_return_quantity] - BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price] + WholeStageCodegen + HashAggregate [act_sales,ss_customer_sk,sum,sum] [sum,sum] + Project [sr_return_quantity,ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [r_reason_sk,sr_reason_sk] + Project [sr_reason_sk,sr_return_quantity,ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_ticket_number] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [sr_item_sk,sr_ticket_number,sr_reason_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] + WholeStageCodegen + Project [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] + Filter [sr_item_sk,sr_reason_sk,sr_ticket_number] + Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [r_reason_sk] Filter [r_reason_desc,r_reason_sk] - ColumnarToRow - InputAdapter - Scan parquet default.reason [r_reason_sk,r_reason_desc] + Scan parquet default.reason [r_reason_desc,r_reason_sk] [r_reason_desc,r_reason_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt index 2abbe4f9b..2e6006488 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt @@ -1,235 +1,37 @@ == Physical Plan == -* Sort (41) -+- * HashAggregate (40) - +- Exchange (39) - +- * HashAggregate (38) - +- * HashAggregate (37) - +- Exchange (36) - +- * HashAggregate (35) - +- * Project (34) - +- * BroadcastHashJoin Inner BuildRight (33) - :- * Project (27) - : +- * BroadcastHashJoin Inner BuildRight (26) - : :- * Project (20) - : : +- * BroadcastHashJoin Inner BuildRight (19) - : : :- * BroadcastHashJoin LeftAnti BuildRight (13) - : : : :- * Project (9) - : : : : +- * BroadcastHashJoin LeftSemi BuildRight (8) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.web_sales (1) - : : : : +- BroadcastExchange (7) - : : : : +- * Project (6) - : : : : +- * ColumnarToRow (5) - : : : : +- Scan parquet default.web_sales (4) - : : : +- BroadcastExchange (12) - : : : +- * ColumnarToRow (11) - : : : +- Scan parquet default.web_returns (10) - : : +- BroadcastExchange (18) - : : +- * Project (17) - : : +- * Filter (16) - : : +- * ColumnarToRow (15) - : : +- Scan parquet default.date_dim (14) - : +- BroadcastExchange (25) - : +- * Project (24) - : +- * Filter (23) - : +- * ColumnarToRow (22) - : +- Scan parquet default.customer_address (21) - +- BroadcastExchange (32) - +- * Project (31) - +- * Filter (30) - +- * ColumnarToRow (29) - +- Scan parquet default.web_site (28) - - -(1) Scan parquet default.web_sales -Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 6] -Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] - -(3) Filter [codegen id : 6] -Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) - -(4) Scan parquet default.web_sales -Output [2]: [ws_warehouse_sk#4, ws_order_number#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [ws_warehouse_sk#4, ws_order_number#5] - -(6) Project [codegen id : 1] -Output [2]: [ws_warehouse_sk#4 AS ws_warehouse_sk#4#8, ws_order_number#5 AS ws_order_number#5#9] -Input [2]: [ws_warehouse_sk#4, ws_order_number#5] - -(7) BroadcastExchange -Input [2]: [ws_warehouse_sk#4#8, ws_order_number#5#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#10] - -(8) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_order_number#5] -Right keys [1]: [ws_order_number#5#9] -Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#4#8) - -(9) Project [codegen id : 6] -Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] - -(10) Scan parquet default.web_returns -Output [1]: [wr_order_number#11] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [1]: [wr_order_number#11] - -(12) BroadcastExchange -Input [1]: [wr_order_number#11] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#12] - -(13) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cast(ws_order_number#5 as bigint)] -Right keys [1]: [wr_order_number#11] -Join condition: None - -(14) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_date#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] -ReadSchema: struct - -(15) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#13, d_date#14] - -(16) Filter [codegen id : 3] -Input [2]: [d_date_sk#13, d_date#14] -Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 10623)) AND (d_date#14 <= 10683)) AND isnotnull(d_date_sk#13)) - -(17) Project [codegen id : 3] -Output [1]: [d_date_sk#13] -Input [2]: [d_date_sk#13, d_date#14] - -(18) BroadcastExchange -Input [1]: [d_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] - -(19) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_ship_date_sk#1] -Right keys [1]: [d_date_sk#13] -Join condition: None - -(20) Project [codegen id : 6] -Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#13] - -(21) Scan parquet default.customer_address -Output [2]: [ca_address_sk#16, ca_state#17] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(22) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#16, ca_state#17] - -(23) Filter [codegen id : 4] -Input [2]: [ca_address_sk#16, ca_state#17] -Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = IL)) AND isnotnull(ca_address_sk#16)) - -(24) Project [codegen id : 4] -Output [1]: [ca_address_sk#16] -Input [2]: [ca_address_sk#16, ca_state#17] - -(25) BroadcastExchange -Input [1]: [ca_address_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] - -(26) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_ship_addr_sk#2] -Right keys [1]: [ca_address_sk#16] -Join condition: None - -(27) Project [codegen id : 6] -Output [4]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#16] - -(28) Scan parquet default.web_site -Output [2]: [web_site_sk#19, web_company_name#20] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_site] -PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 5] -Input [2]: [web_site_sk#19, web_company_name#20] - -(30) Filter [codegen id : 5] -Input [2]: [web_site_sk#19, web_company_name#20] -Condition : ((isnotnull(web_company_name#20) AND (web_company_name#20 = pri)) AND isnotnull(web_site_sk#19)) - -(31) Project [codegen id : 5] -Output [1]: [web_site_sk#19] -Input [2]: [web_site_sk#19, web_company_name#20] - -(32) BroadcastExchange -Input [1]: [web_site_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] - -(33) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_web_site_sk#3] -Right keys [1]: [web_site_sk#19] -Join condition: None - -(34) Project [codegen id : 6] -Output [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Input [5]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#19] - -(35) HashAggregate [codegen id : 6] -Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Keys [1]: [ws_order_number#5] -Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23] -Results [3]: [ws_order_number#5, sum#24, sum#25] - -(36) Exchange -Input [3]: [ws_order_number#5, sum#24, sum#25] -Arguments: hashpartitioning(ws_order_number#5, 5), true, [id=#26] - -(37) HashAggregate [codegen id : 7] -Input [3]: [ws_order_number#5, sum#24, sum#25] -Keys [1]: [ws_order_number#5] -Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23] -Results [3]: [ws_order_number#5, sum#24, sum#25] - -(38) HashAggregate [codegen id : 7] -Input [3]: [ws_order_number#5, sum#24, sum#25] -Keys: [] -Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27] -Results [3]: [sum#24, sum#25, count#28] - -(39) Exchange -Input [3]: [sum#24, sum#25, count#28] -Arguments: SinglePartition, true, [id=#29] - -(40) HashAggregate [codegen id : 8] -Input [3]: [sum#24, sum#25, count#28] -Keys: [] -Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27] -Results [3]: [count(ws_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #32] - -(41) Sort [codegen id : 8] -Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: [order count #30 ASC NULLS FIRST], true, 0 - +TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) ++- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_ship_cost#4)), sum(UnscaledValue(ws_net_profit#5)), count(distinct ws_order_number#6)]) + +- Exchange SinglePartition + +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) + +- *(7) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) + +- Exchange hashpartitioning(ws_order_number#6, 5) + +- *(6) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) + +- *(6) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + +- *(6) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight + :- *(6) Project [ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : +- *(6) BroadcastHashJoin [ws_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight + : :- *(6) Project [ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : +- *(6) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : :- *(6) BroadcastHashJoin [cast(ws_order_number#6 as bigint)], [wr_order_number#13], LeftAnti, BuildRight + : : : :- *(6) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : : : +- *(6) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight, NOT (ws_warehouse_sk#15 = ws_warehouse_sk#15#16) + : : : : :- *(6) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_warehouse_sk#15, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : : : : +- *(6) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) + : : : : : +- *(6) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_warehouse_sk#15,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) + : : : +- *(2) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(3) Project [d_date_sk#12] + : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 1999-02-01)) && (d_date#17 <= 10683)) && isnotnull(d_date_sk#12)) + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(4) Project [ca_address_sk#10] + : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = IL)) && isnotnull(ca_address_sk#10)) + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(5) Project [web_site_sk#8] + +- *(5) Filter ((isnotnull(web_company_name#19) && (web_company_name#19 = pri)) && isnotnull(web_site_sk#8)) + +- *(5) FileScan parquet default.web_site[web_site_sk#8,web_company_name#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt index 5e7d7db5c..c7a5e5d90 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt @@ -1,62 +1,51 @@ -WholeStageCodegen (8) - Sort [order count ] - HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] + WholeStageCodegen + HashAggregate [count,count(ws_order_number),sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] InputAdapter Exchange #1 - WholeStageCodegen (7) - HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] - HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + WholeStageCodegen + HashAggregate [count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] InputAdapter Exchange [ws_order_number] #2 - WholeStageCodegen (6) - HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] - Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] - BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] - BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] - Project [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] - BroadcastHashJoin [ws_ship_date_sk,d_date_sk] - BroadcastHashJoin [ws_order_number,wr_order_number] - Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + WholeStageCodegen + HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_web_site_sk] + BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + BroadcastHashJoin [wr_order_number,ws_order_number] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] + Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (1) - Project [ws_warehouse_sk,ws_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] + WholeStageCodegen + Project [ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (2) - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_order_number] + WholeStageCodegen + Scan parquet default.web_returns [wr_order_number] [wr_order_number] InputAdapter BroadcastExchange #5 - WholeStageCodegen (3) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen (4) + WholeStageCodegen Project [ca_address_sk] - Filter [ca_state,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_state] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] InputAdapter BroadcastExchange #7 - WholeStageCodegen (5) + WholeStageCodegen Project [web_site_sk] Filter [web_company_name,web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_company_name] + Scan parquet default.web_site [web_company_name,web_site_sk] [web_company_name,web_site_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt index 1cc99e296..2d497b75e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt @@ -1,318 +1,54 @@ == Physical Plan == -* Sort (56) -+- * HashAggregate (55) - +- Exchange (54) - +- * HashAggregate (53) - +- * HashAggregate (52) - +- Exchange (51) - +- * HashAggregate (50) - +- * Project (49) - +- * BroadcastHashJoin Inner BuildRight (48) - :- * Project (42) - : +- * BroadcastHashJoin Inner BuildRight (41) - : :- * Project (35) - : : +- * BroadcastHashJoin Inner BuildRight (34) - : : :- * BroadcastHashJoin LeftSemi BuildRight (28) - : : : :- * BroadcastHashJoin LeftSemi BuildRight (14) - : : : : :- * Filter (3) - : : : : : +- * ColumnarToRow (2) - : : : : : +- Scan parquet default.web_sales (1) - : : : : +- BroadcastExchange (13) - : : : : +- * Project (12) - : : : : +- * BroadcastHashJoin Inner BuildRight (11) - : : : : :- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.web_sales (4) - : : : : +- BroadcastExchange (10) - : : : : +- * Filter (9) - : : : : +- * ColumnarToRow (8) - : : : : +- Scan parquet default.web_sales (7) - : : : +- BroadcastExchange (27) - : : : +- * Project (26) - : : : +- * BroadcastHashJoin Inner BuildRight (25) - : : : :- * Filter (17) - : : : : +- * ColumnarToRow (16) - : : : : +- Scan parquet default.web_returns (15) - : : : +- BroadcastExchange (24) - : : : +- * Project (23) - : : : +- * BroadcastHashJoin Inner BuildRight (22) - : : : :- * Filter (20) - : : : : +- * ColumnarToRow (19) - : : : : +- Scan parquet default.web_sales (18) - : : : +- ReusedExchange (21) - : : +- BroadcastExchange (33) - : : +- * Project (32) - : : +- * Filter (31) - : : +- * ColumnarToRow (30) - : : +- Scan parquet default.date_dim (29) - : +- BroadcastExchange (40) - : +- * Project (39) - : +- * Filter (38) - : +- * ColumnarToRow (37) - : +- Scan parquet default.customer_address (36) - +- BroadcastExchange (47) - +- * Project (46) - +- * Filter (45) - +- * ColumnarToRow (44) - +- Scan parquet default.web_site (43) - - -(1) Scan parquet default.web_sales -Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 9] -Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] - -(3) Filter [codegen id : 9] -Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) - -(4) Scan parquet default.web_sales -Output [2]: [ws_warehouse_sk#7, ws_order_number#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 2] -Input [2]: [ws_warehouse_sk#7, ws_order_number#4] - -(6) Filter [codegen id : 2] -Input [2]: [ws_warehouse_sk#7, ws_order_number#4] -Condition : (isnotnull(ws_order_number#4) AND isnotnull(ws_warehouse_sk#7)) - -(7) Scan parquet default.web_sales -Output [2]: [ws_warehouse_sk#8, ws_order_number#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] -ReadSchema: struct - -(8) ColumnarToRow [codegen id : 1] -Input [2]: [ws_warehouse_sk#8, ws_order_number#9] - -(9) Filter [codegen id : 1] -Input [2]: [ws_warehouse_sk#8, ws_order_number#9] -Condition : (isnotnull(ws_order_number#9) AND isnotnull(ws_warehouse_sk#8)) - -(10) BroadcastExchange -Input [2]: [ws_warehouse_sk#8, ws_order_number#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#10] - -(11) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ws_order_number#4] -Right keys [1]: [ws_order_number#9] -Join condition: NOT (ws_warehouse_sk#7 = ws_warehouse_sk#8) - -(12) Project [codegen id : 2] -Output [1]: [ws_order_number#4 AS ws_order_number#4#11] -Input [4]: [ws_warehouse_sk#7, ws_order_number#4, ws_warehouse_sk#8, ws_order_number#9] - -(13) BroadcastExchange -Input [1]: [ws_order_number#4#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] - -(14) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_order_number#4] -Right keys [1]: [ws_order_number#4#11] -Join condition: None - -(15) Scan parquet default.web_returns -Output [1]: [wr_order_number#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -PushedFilters: [IsNotNull(wr_order_number)] -ReadSchema: struct - -(16) ColumnarToRow [codegen id : 5] -Input [1]: [wr_order_number#13] - -(17) Filter [codegen id : 5] -Input [1]: [wr_order_number#13] -Condition : isnotnull(wr_order_number#13) - -(18) Scan parquet default.web_sales -Output [2]: [ws_warehouse_sk#7, ws_order_number#4] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 4] -Input [2]: [ws_warehouse_sk#7, ws_order_number#4] - -(20) Filter [codegen id : 4] -Input [2]: [ws_warehouse_sk#7, ws_order_number#4] -Condition : (isnotnull(ws_order_number#4) AND isnotnull(ws_warehouse_sk#7)) - -(21) ReusedExchange [Reuses operator id: 10] -Output [2]: [ws_warehouse_sk#14, ws_order_number#15] - -(22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_order_number#4] -Right keys [1]: [ws_order_number#15] -Join condition: NOT (ws_warehouse_sk#7 = ws_warehouse_sk#14) - -(23) Project [codegen id : 4] -Output [1]: [ws_order_number#4] -Input [4]: [ws_warehouse_sk#7, ws_order_number#4, ws_warehouse_sk#14, ws_order_number#15] - -(24) BroadcastExchange -Input [1]: [ws_order_number#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] - -(25) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [wr_order_number#13] -Right keys [1]: [cast(ws_order_number#4 as bigint)] -Join condition: None - -(26) Project [codegen id : 5] -Output [1]: [wr_order_number#13] -Input [2]: [wr_order_number#13, ws_order_number#4] - -(27) BroadcastExchange -Input [1]: [wr_order_number#13] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#17] - -(28) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [cast(ws_order_number#4 as bigint)] -Right keys [1]: [wr_order_number#13] -Join condition: None - -(29) Scan parquet default.date_dim -Output [2]: [d_date_sk#18, d_date#19] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] -ReadSchema: struct - -(30) ColumnarToRow [codegen id : 6] -Input [2]: [d_date_sk#18, d_date#19] - -(31) Filter [codegen id : 6] -Input [2]: [d_date_sk#18, d_date#19] -Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 10623)) AND (d_date#19 <= 10683)) AND isnotnull(d_date_sk#18)) - -(32) Project [codegen id : 6] -Output [1]: [d_date_sk#18] -Input [2]: [d_date_sk#18, d_date#19] - -(33) BroadcastExchange -Input [1]: [d_date_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] - -(34) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_ship_date_sk#1] -Right keys [1]: [d_date_sk#18] -Join condition: None - -(35) Project [codegen id : 9] -Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#18] - -(36) Scan parquet default.customer_address -Output [2]: [ca_address_sk#21, ca_state#22] -Batched: true -Location [not included in comparison]/{warehouse_dir}/customer_address] -PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] -ReadSchema: struct - -(37) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#21, ca_state#22] - -(38) Filter [codegen id : 7] -Input [2]: [ca_address_sk#21, ca_state#22] -Condition : ((isnotnull(ca_state#22) AND (ca_state#22 = IL)) AND isnotnull(ca_address_sk#21)) - -(39) Project [codegen id : 7] -Output [1]: [ca_address_sk#21] -Input [2]: [ca_address_sk#21, ca_state#22] - -(40) BroadcastExchange -Input [1]: [ca_address_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] - -(41) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_ship_addr_sk#2] -Right keys [1]: [ca_address_sk#21] -Join condition: None - -(42) Project [codegen id : 9] -Output [4]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#21] - -(43) Scan parquet default.web_site -Output [2]: [web_site_sk#24, web_company_name#25] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_site] -PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)] -ReadSchema: struct - -(44) ColumnarToRow [codegen id : 8] -Input [2]: [web_site_sk#24, web_company_name#25] - -(45) Filter [codegen id : 8] -Input [2]: [web_site_sk#24, web_company_name#25] -Condition : ((isnotnull(web_company_name#25) AND (web_company_name#25 = pri)) AND isnotnull(web_site_sk#24)) - -(46) Project [codegen id : 8] -Output [1]: [web_site_sk#24] -Input [2]: [web_site_sk#24, web_company_name#25] - -(47) BroadcastExchange -Input [1]: [web_site_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] - -(48) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_web_site_sk#3] -Right keys [1]: [web_site_sk#24] -Join condition: None - -(49) Project [codegen id : 9] -Output [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Input [5]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#24] - -(50) HashAggregate [codegen id : 9] -Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Keys [1]: [ws_order_number#4] -Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] -Results [3]: [ws_order_number#4, sum#29, sum#30] - -(51) Exchange -Input [3]: [ws_order_number#4, sum#29, sum#30] -Arguments: hashpartitioning(ws_order_number#4, 5), true, [id=#31] - -(52) HashAggregate [codegen id : 10] -Input [3]: [ws_order_number#4, sum#29, sum#30] -Keys [1]: [ws_order_number#4] -Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] -Results [3]: [ws_order_number#4, sum#29, sum#30] - -(53) HashAggregate [codegen id : 10] -Input [3]: [ws_order_number#4, sum#29, sum#30] -Keys: [] -Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32] -Results [3]: [sum#29, sum#30, count#33] - -(54) Exchange -Input [3]: [sum#29, sum#30, count#33] -Arguments: SinglePartition, true, [id=#34] - -(55) HashAggregate [codegen id : 11] -Input [3]: [sum#29, sum#30, count#33] -Keys: [] -Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32] -Results [3]: [count(ws_order_number#4)#32 AS order count #35, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #36, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #37] - -(56) Sort [codegen id : 11] -Input [3]: [order count #35, total shipping cost #36, total net profit #37] -Arguments: [order count #35 ASC NULLS FIRST], true, 0 - +TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) ++- *(11) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_ship_cost#4)), sum(UnscaledValue(ws_net_profit#5)), count(distinct ws_order_number#6)]) + +- Exchange SinglePartition + +- *(10) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) + +- *(10) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) + +- Exchange hashpartitioning(ws_order_number#6, 5) + +- *(9) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) + +- *(9) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + +- *(9) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight + :- *(9) Project [ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : +- *(9) BroadcastHashJoin [ws_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight + : :- *(9) Project [ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : +- *(9) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight + : : :- *(9) BroadcastHashJoin [cast(ws_order_number#6 as bigint)], [wr_order_number#13], LeftSemi, BuildRight + : : : :- *(9) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight + : : : : :- *(9) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] + : : : : : +- *(9) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) + : : : : : +- *(9) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct + : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : : : +- *(1) Project [ws_warehouse_sk#17, ws_order_number#15] + : : : : +- *(1) Filter (isnotnull(ws_order_number#15) && isnotnull(ws_warehouse_sk#17)) + : : : : +- *(1) FileScan parquet default.web_sales[ws_warehouse_sk#17,ws_order_number#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) + : : : +- *(5) Project [wr_order_number#13] + : : : +- *(5) BroadcastHashJoin [wr_order_number#13], [cast(ws_order_number#6 as bigint)], Inner, BuildRight + : : : :- *(5) Project [wr_order_number#13] + : : : : +- *(5) Filter isnotnull(wr_order_number#13) + : : : : +- *(5) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_order_number)], ReadSchema: struct + : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : : +- *(4) Project [ws_order_number#6] + : : : +- *(4) BroadcastHashJoin [ws_order_number#6], [ws_order_number#15], Inner, BuildRight, NOT (ws_warehouse_sk#16 = ws_warehouse_sk#17) + : : : :- *(4) Project [ws_warehouse_sk#16, ws_order_number#6] + : : : : +- *(4) Filter (isnotnull(ws_order_number#6) && isnotnull(ws_warehouse_sk#16)) + : : : : +- *(4) FileScan parquet default.web_sales[ws_warehouse_sk#16,ws_order_number#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct + : : : +- ReusedExchange [ws_warehouse_sk#17, ws_order_number#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(6) Project [d_date_sk#12] + : : +- *(6) Filter (((isnotnull(d_date#18) && (cast(d_date#18 as string) >= 1999-02-01)) && (d_date#18 <= 10683)) && isnotnull(d_date_sk#12)) + : : +- *(6) FileScan parquet default.date_dim[d_date_sk#12,d_date#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(7) Project [ca_address_sk#10] + : +- *(7) Filter ((isnotnull(ca_state#19) && (ca_state#19 = IL)) && isnotnull(ca_address_sk#10)) + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(8) Project [web_site_sk#8] + +- *(8) Filter ((isnotnull(web_company_name#20) && (web_company_name#20 = pri)) && isnotnull(web_site_sk#8)) + +- *(8) FileScan parquet default.web_site[web_site_sk#8,web_company_name#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt index 191ff22c1..343982d5a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt @@ -1,84 +1,73 @@ -WholeStageCodegen (11) - Sort [order count ] - HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] +TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] + WholeStageCodegen + HashAggregate [count,count(ws_order_number),sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] InputAdapter Exchange #1 - WholeStageCodegen (10) - HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] - HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + WholeStageCodegen + HashAggregate [count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] InputAdapter Exchange [ws_order_number] #2 - WholeStageCodegen (9) - HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] - Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] - BroadcastHashJoin [ws_web_site_sk,web_site_sk] - Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] - BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] - Project [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] - BroadcastHashJoin [ws_ship_date_sk,d_date_sk] - BroadcastHashJoin [ws_order_number,wr_order_number] + WholeStageCodegen + HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] + BroadcastHashJoin [web_site_sk,ws_web_site_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_web_site_sk] + BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_web_site_sk] + BroadcastHashJoin [d_date_sk,ws_ship_date_sk] + BroadcastHashJoin [wr_order_number,ws_order_number] BroadcastHashJoin [ws_order_number,ws_order_number] - Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [ws_order_number] BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Filter [ws_order_number,ws_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] + Project [ws_order_number,ws_warehouse_sk] + Filter [ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (1) - Filter [ws_order_number,ws_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] + WholeStageCodegen + Project [ws_order_number,ws_warehouse_sk] + Filter [ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (5) + WholeStageCodegen Project [wr_order_number] BroadcastHashJoin [wr_order_number,ws_order_number] - Filter [wr_order_number] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_order_number] + Project [wr_order_number] + Filter [wr_order_number] + Scan parquet default.web_returns [wr_order_number] [wr_order_number] InputAdapter BroadcastExchange #6 - WholeStageCodegen (4) + WholeStageCodegen Project [ws_order_number] BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Filter [ws_order_number,ws_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] + Project [ws_order_number,ws_warehouse_sk] + Filter [ws_order_number,ws_warehouse_sk] + Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] InputAdapter - ReusedExchange [ws_warehouse_sk,ws_order_number] #4 + ReusedExchange [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] #4 InputAdapter BroadcastExchange #7 - WholeStageCodegen (6) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] InputAdapter BroadcastExchange #8 - WholeStageCodegen (7) + WholeStageCodegen Project [ca_address_sk] - Filter [ca_state,ca_address_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer_address [ca_address_sk,ca_state] + Filter [ca_address_sk,ca_state] + Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] InputAdapter BroadcastExchange #9 - WholeStageCodegen (8) + WholeStageCodegen Project [web_site_sk] Filter [web_company_name,web_site_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_site [web_site_sk,web_company_name] + Scan parquet default.web_site [web_company_name,web_site_sk] [web_company_name,web_site_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt index 6729910d9..c66e7e331 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt @@ -1,160 +1,26 @@ == Physical Plan == -* Sort (28) -+- * HashAggregate (27) - +- Exchange (26) - +- * HashAggregate (25) - +- * Project (24) - +- * BroadcastHashJoin Inner BuildRight (23) - :- * Project (17) - : +- * BroadcastHashJoin Inner BuildRight (16) - : :- * Project (10) - : : +- * BroadcastHashJoin Inner BuildRight (9) - : : :- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.store_sales (1) - : : +- BroadcastExchange (8) - : : +- * Project (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.household_demographics (4) - : +- BroadcastExchange (15) - : +- * Project (14) - : +- * Filter (13) - : +- * ColumnarToRow (12) - : +- Scan parquet default.time_dim (11) - +- BroadcastExchange (22) - +- * Project (21) - +- * Filter (20) - +- * ColumnarToRow (19) - +- Scan parquet default.store (18) - - -(1) Scan parquet default.store_sales -Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 4] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] - -(3) Filter [codegen id : 4] -Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] -Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) - -(4) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#4, hd_dep_count#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [hd_demo_sk#4, hd_dep_count#5] - -(6) Filter [codegen id : 1] -Input [2]: [hd_demo_sk#4, hd_dep_count#5] -Condition : ((isnotnull(hd_dep_count#5) AND (hd_dep_count#5 = 7)) AND isnotnull(hd_demo_sk#4)) - -(7) Project [codegen id : 1] -Output [1]: [hd_demo_sk#4] -Input [2]: [hd_demo_sk#4, hd_dep_count#5] - -(8) BroadcastExchange -Input [1]: [hd_demo_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] - -(9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#4] -Join condition: None - -(10) Project [codegen id : 4] -Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] -Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] - -(11) Scan parquet default.time_dim -Output [3]: [t_time_sk#7, t_hour#8, t_minute#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/time_dim] -PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] - -(13) Filter [codegen id : 2] -Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] -Condition : ((((isnotnull(t_hour#8) AND isnotnull(t_minute#9)) AND (t_hour#8 = 20)) AND (t_minute#9 >= 30)) AND isnotnull(t_time_sk#7)) - -(14) Project [codegen id : 2] -Output [1]: [t_time_sk#7] -Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] - -(15) BroadcastExchange -Input [1]: [t_time_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] - -(16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_time_sk#1] -Right keys [1]: [t_time_sk#7] -Join condition: None - -(17) Project [codegen id : 4] -Output [1]: [ss_store_sk#3] -Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#7] - -(18) Scan parquet default.store -Output [2]: [s_store_sk#11, s_store_name#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store] -PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] -ReadSchema: struct - -(19) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#11, s_store_name#12] - -(20) Filter [codegen id : 3] -Input [2]: [s_store_sk#11, s_store_name#12] -Condition : ((isnotnull(s_store_name#12) AND (s_store_name#12 = ese)) AND isnotnull(s_store_sk#11)) - -(21) Project [codegen id : 3] -Output [1]: [s_store_sk#11] -Input [2]: [s_store_sk#11, s_store_name#12] - -(22) BroadcastExchange -Input [1]: [s_store_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(23) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#11] -Join condition: None - -(24) Project [codegen id : 4] -Output: [] -Input [2]: [ss_store_sk#3, s_store_sk#11] - -(25) HashAggregate [codegen id : 4] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#14] -Results [1]: [count#15] - -(26) Exchange -Input [1]: [count#15] -Arguments: SinglePartition, true, [id=#16] - -(27) HashAggregate [codegen id : 5] -Input [1]: [count#15] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#17] -Results [1]: [count(1)#17 AS count(1)#18] - -(28) Sort [codegen id : 5] -Input [1]: [count(1)#18] -Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 - +TakeOrderedAndProject(limit=100, orderBy=[count(1)#1 ASC NULLS FIRST], output=[count(1)#1]) ++- *(5) HashAggregate(keys=[], functions=[count(1)]) + +- Exchange SinglePartition + +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) + +- *(4) Project + +- *(4) BroadcastHashJoin [ss_store_sk#2], [s_store_sk#3], Inner, BuildRight + :- *(4) Project [ss_store_sk#2] + : +- *(4) BroadcastHashJoin [ss_sold_time_sk#4], [t_time_sk#5], Inner, BuildRight + : :- *(4) Project [ss_sold_time_sk#4, ss_store_sk#2] + : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#6], [hd_demo_sk#7], Inner, BuildRight + : : :- *(4) Project [ss_sold_time_sk#4, ss_hdemo_sk#6, ss_store_sk#2] + : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#6) && isnotnull(ss_sold_time_sk#4)) && isnotnull(ss_store_sk#2)) + : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#4,ss_hdemo_sk#6,ss_store_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(1) Project [hd_demo_sk#7] + : : +- *(1) Filter ((isnotnull(hd_dep_count#8) && (hd_dep_count#8 = 7)) && isnotnull(hd_demo_sk#7)) + : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#7,hd_dep_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(2) Project [t_time_sk#5] + : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 20)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#5)) + : +- *(2) FileScan parquet default.time_dim[t_time_sk#5,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(3) Project [s_store_sk#3] + +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#3)) + +- *(3) FileScan parquet default.store[s_store_sk#3,s_store_name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt index 45400b6c5..7f1a66829 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt @@ -1,41 +1,34 @@ -WholeStageCodegen (5) - Sort [count(1)] - HashAggregate [count] [count(1),count(1),count] +TakeOrderedAndProject [count(1)] + WholeStageCodegen + HashAggregate [count,count(1)] [count,count(1),count(1)] InputAdapter Exchange #1 - WholeStageCodegen (4) - HashAggregate [count,count] + WholeStageCodegen + HashAggregate [count,count] [count,count] Project - BroadcastHashJoin [ss_store_sk,s_store_sk] + BroadcastHashJoin [s_store_sk,ss_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] + BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) + WholeStageCodegen Project [hd_demo_sk] - Filter [hd_dep_count,hd_demo_sk] - ColumnarToRow - InputAdapter - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] + Filter [hd_demo_sk,hd_dep_count] + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) + WholeStageCodegen Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - ColumnarToRow - InputAdapter - Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] + Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) + WholeStageCodegen Project [s_store_sk] Filter [s_store_name,s_store_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store [s_store_sk,s_store_name] + Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt index e904ad94d..0d528d336 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt @@ -1,174 +1,32 @@ == Physical Plan == -* HashAggregate (29) -+- Exchange (28) - +- * HashAggregate (27) - +- * Project (26) - +- SortMergeJoin FullOuter (25) - :- * Sort (14) - : +- * HashAggregate (13) - : +- Exchange (12) - : +- * HashAggregate (11) - : +- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.date_dim (4) - +- * Sort (24) - +- * HashAggregate (23) - +- Exchange (22) - +- * HashAggregate (21) - +- * Project (20) - +- * BroadcastHashJoin Inner BuildRight (19) - :- * Filter (17) - : +- * ColumnarToRow (16) - : +- Scan parquet default.catalog_sales (15) - +- ReusedExchange (18) - - -(1) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 2] -Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] - -(3) Filter [codegen id : 2] -Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] -Condition : isnotnull(ss_sold_date_sk#1) - -(4) Scan parquet default.date_dim -Output [2]: [d_date_sk#4, d_month_seq#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#4, d_month_seq#5] - -(6) Filter [codegen id : 1] -Input [2]: [d_date_sk#4, d_month_seq#5] -Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) - -(7) Project [codegen id : 1] -Output [1]: [d_date_sk#4] -Input [2]: [d_date_sk#4, d_month_seq#5] - -(8) BroadcastExchange -Input [1]: [d_date_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] - -(9) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#4] -Join condition: None - -(10) Project [codegen id : 2] -Output [2]: [ss_item_sk#2, ss_customer_sk#3] -Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, d_date_sk#4] - -(11) HashAggregate [codegen id : 2] -Input [2]: [ss_item_sk#2, ss_customer_sk#3] -Keys [2]: [ss_customer_sk#3, ss_item_sk#2] -Functions: [] -Aggregate Attributes: [] -Results [2]: [ss_customer_sk#3, ss_item_sk#2] - -(12) Exchange -Input [2]: [ss_customer_sk#3, ss_item_sk#2] -Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), true, [id=#7] - -(13) HashAggregate [codegen id : 3] -Input [2]: [ss_customer_sk#3, ss_item_sk#2] -Keys [2]: [ss_customer_sk#3, ss_item_sk#2] -Functions: [] -Aggregate Attributes: [] -Results [2]: [ss_customer_sk#3 AS customer_sk#8, ss_item_sk#2 AS item_sk#9] - -(14) Sort [codegen id : 3] -Input [2]: [customer_sk#8, item_sk#9] -Arguments: [customer_sk#8 ASC NULLS FIRST, item_sk#9 ASC NULLS FIRST], false, 0 - -(15) Scan parquet default.catalog_sales -Output [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(16) ColumnarToRow [codegen id : 5] -Input [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] - -(17) Filter [codegen id : 5] -Input [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] -Condition : isnotnull(cs_sold_date_sk#10) - -(18) ReusedExchange [Reuses operator id: 8] -Output [1]: [d_date_sk#4] - -(19) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#10] -Right keys [1]: [d_date_sk#4] -Join condition: None - -(20) Project [codegen id : 5] -Output [2]: [cs_bill_customer_sk#11, cs_item_sk#12] -Input [4]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12, d_date_sk#4] - -(21) HashAggregate [codegen id : 5] -Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] -Keys [2]: [cs_bill_customer_sk#11, cs_item_sk#12] -Functions: [] -Aggregate Attributes: [] -Results [2]: [cs_bill_customer_sk#11, cs_item_sk#12] - -(22) Exchange -Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] -Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), true, [id=#13] - -(23) HashAggregate [codegen id : 6] -Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] -Keys [2]: [cs_bill_customer_sk#11, cs_item_sk#12] -Functions: [] -Aggregate Attributes: [] -Results [2]: [cs_bill_customer_sk#11 AS customer_sk#14, cs_item_sk#12 AS item_sk#15] - -(24) Sort [codegen id : 6] -Input [2]: [customer_sk#14, item_sk#15] -Arguments: [customer_sk#14 ASC NULLS FIRST, item_sk#15 ASC NULLS FIRST], false, 0 - -(25) SortMergeJoin -Left keys [2]: [customer_sk#8, item_sk#9] -Right keys [2]: [customer_sk#14, item_sk#15] -Join condition: None - -(26) Project [codegen id : 7] -Output [2]: [customer_sk#8, customer_sk#14] -Input [4]: [customer_sk#8, item_sk#9, customer_sk#14, item_sk#15] - -(27) HashAggregate [codegen id : 7] -Input [2]: [customer_sk#8, customer_sk#14] -Keys: [] -Functions [3]: [partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [3]: [sum#16, sum#17, sum#18] -Results [3]: [sum#19, sum#20, sum#21] - -(28) Exchange -Input [3]: [sum#19, sum#20, sum#21] -Arguments: SinglePartition, true, [id=#22] - -(29) HashAggregate [codegen id : 8] -Input [3]: [sum#19, sum#20, sum#21] -Keys: [] -Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] -Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] - +CollectLimit 100 ++- *(10) HashAggregate(keys=[], functions=[sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint))]) + +- Exchange SinglePartition + +- *(9) HashAggregate(keys=[], functions=[partial_sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint))]) + +- *(9) Project [customer_sk#1, customer_sk#2] + +- SortMergeJoin [customer_sk#1, item_sk#3], [customer_sk#2, item_sk#4], FullOuter + :- *(4) Sort [customer_sk#1 ASC NULLS FIRST, item_sk#3 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(customer_sk#1, item_sk#3, 5) + : +- *(3) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) + : +- Exchange hashpartitioning(ss_customer_sk#5, ss_item_sk#6, 5) + : +- *(2) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) + : +- *(2) Project [ss_item_sk#6, ss_customer_sk#5] + : +- *(2) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight + : :- *(2) Project [ss_sold_date_sk#7, ss_item_sk#6, ss_customer_sk#5] + : : +- *(2) Filter isnotnull(ss_sold_date_sk#7) + : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#6,ss_customer_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [d_date_sk#8] + : +- *(1) Filter (((isnotnull(d_month_seq#9) && (d_month_seq#9 >= 1200)) && (d_month_seq#9 <= 1211)) && isnotnull(d_date_sk#8)) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + +- *(8) Sort [customer_sk#2 ASC NULLS FIRST, item_sk#4 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(customer_sk#2, item_sk#4, 5) + +- *(7) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) + +- Exchange hashpartitioning(cs_bill_customer_sk#10, cs_item_sk#11, 5) + +- *(6) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) + +- *(6) Project [cs_bill_customer_sk#10, cs_item_sk#11] + +- *(6) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#8], Inner, BuildRight + :- *(6) Project [cs_sold_date_sk#12, cs_bill_customer_sk#10, cs_item_sk#11] + : +- *(6) Filter isnotnull(cs_sold_date_sk#12) + : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_bill_customer_sk#10,cs_item_sk#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + +- ReusedExchange [d_date_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt index c5921a11c..40d75add7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt @@ -1,45 +1,48 @@ -WholeStageCodegen (8) - HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] - InputAdapter - Exchange #1 - WholeStageCodegen (7) - HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] - Project [customer_sk,customer_sk] - InputAdapter - SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] - WholeStageCodegen (3) - Sort [customer_sk,item_sk] - HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] +CollectLimit + WholeStageCodegen + HashAggregate [sum,sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] [catalog_only,store_and_catalog,store_only,sum,sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] + InputAdapter + Exchange #1 + WholeStageCodegen + HashAggregate [customer_sk,customer_sk,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,customer_sk,item_sk,item_sk] + WholeStageCodegen + Sort [customer_sk,item_sk] InputAdapter - Exchange [ss_customer_sk,ss_item_sk] #2 - WholeStageCodegen (2) - HashAggregate [ss_customer_sk,ss_item_sk] - Project [ss_item_sk,ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] - WholeStageCodegen (6) - Sort [customer_sk,item_sk] - HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + Exchange [customer_sk,item_sk] #2 + WholeStageCodegen + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #3 + WholeStageCodegen + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_customer_sk,ss_item_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen + Project [d_date_sk] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + WholeStageCodegen + Sort [customer_sk,item_sk] InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk] #4 - WholeStageCodegen (5) - HashAggregate [cs_bill_customer_sk,cs_item_sk] - Project [cs_bill_customer_sk,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - InputAdapter - ReusedExchange [d_date_sk] #3 + Exchange [customer_sk,item_sk] #5 + WholeStageCodegen + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #6 + WholeStageCodegen + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + Filter [cs_sold_date_sk] + Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + InputAdapter + ReusedExchange [d_date_sk] [d_date_sk] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt index 11519207d..2ffc242b5 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt @@ -1,147 +1,26 @@ == Physical Plan == -* Project (26) -+- * Sort (25) - +- Exchange (24) - +- * Project (23) - +- Window (22) - +- * Sort (21) - +- Exchange (20) - +- * HashAggregate (19) - +- Exchange (18) - +- * HashAggregate (17) - +- * Project (16) - +- * BroadcastHashJoin Inner BuildRight (15) - :- * Project (9) - : +- * BroadcastHashJoin Inner BuildRight (8) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.item (4) - +- BroadcastExchange (14) - +- * Project (13) - +- * Filter (12) - +- * ColumnarToRow (11) - +- Scan parquet default.date_dim (10) - - -(1) Scan parquet default.store_sales -Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] - -(3) Filter [codegen id : 3] -Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] -Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) - -(4) Scan parquet default.item -Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/item] -PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] - -(6) Filter [codegen id : 1] -Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) - -(7) BroadcastExchange -Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] - -(8) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#2] -Right keys [1]: [i_item_sk#4] -Join condition: None - -(9) Project [codegen id : 3] -Output [7]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] - -(10) Scan parquet default.date_dim -Output [2]: [d_date_sk#11, d_date#12] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#11, d_date#12] - -(12) Filter [codegen id : 2] -Input [2]: [d_date_sk#11, d_date#12] -Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) - -(13) Project [codegen id : 2] -Output [1]: [d_date_sk#11] -Input [2]: [d_date_sk#11, d_date#12] - -(14) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] - -(15) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#1] -Right keys [1]: [d_date_sk#11] -Join condition: None - -(16) Project [codegen id : 3] -Output [6]: [ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Input [8]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] - -(17) HashAggregate [codegen id : 3] -Input [6]: [ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] -Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#14] -Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] - -(18) Exchange -Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] -Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] - -(19) HashAggregate [codegen id : 4] -Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] -Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#17] -Results [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS _w1#20, i_item_id#5] - -(20) Exchange -Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] -Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] - -(21) Sort [codegen id : 5] -Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] -Arguments: [i_class#8 ASC NULLS FIRST], false, 0 - -(22) Window -Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] -Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] - -(23) Project [codegen id : 6] -Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#5] -Input [9]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5, _we0#22] - -(24) Exchange -Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] -Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST, 5), true, [id=#24] - -(25) Sort [codegen id : 7] -Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] -Arguments: [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], true, 0 - -(26) Project [codegen id : 7] -Output [6]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] -Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] - +*(7) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, revenueratio#6] ++- *(7) Sort [i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST], true, 0 + +- Exchange rangepartitioning(i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST, 5) + +- *(6) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#6, i_item_id#7] + +- Window [sum(_w1#10) windowspecdefinition(i_class#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#3] + +- *(5) Sort [i_class#3 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(i_class#3, 5) + +- *(4) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[sum(UnscaledValue(ss_ext_sales_price#11))]) + +- Exchange hashpartitioning(i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4, 5) + +- *(3) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#11))]) + +- *(3) Project [ss_ext_sales_price#11, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] + +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight + :- *(3) Project [ss_sold_date_sk#12, ss_ext_sales_price#11, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] + : +- *(3) BroadcastHashJoin [ss_item_sk#14], [i_item_sk#15], Inner, BuildRight + : :- *(3) Project [ss_sold_date_sk#12, ss_item_sk#14, ss_ext_sales_price#11] + : : +- *(3) Filter (isnotnull(ss_item_sk#14) && isnotnull(ss_sold_date_sk#12)) + : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_item_sk#14,ss_ext_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(1) Project [i_item_sk#15, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] + : +- *(1) Filter (i_category#2 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) + : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#7,i_item_desc#1,i_current_price#4,i_class#3,i_category#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt index 2af712236..914d8934b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt @@ -1,42 +1,38 @@ -WholeStageCodegen (7) - Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio] - Sort [i_category,i_class,i_item_id,i_item_desc,revenueratio] +WholeStageCodegen + Project [i_category,i_class,i_current_price,i_item_desc,itemrevenue,revenueratio] + Sort [i_category,i_class,i_item_desc,i_item_id,revenueratio] InputAdapter - Exchange [i_category,i_class,i_item_id,i_item_desc,revenueratio] #1 - WholeStageCodegen (6) - Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] + Exchange [i_category,i_class,i_item_desc,i_item_id,revenueratio] #1 + WholeStageCodegen + Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] InputAdapter Window [_w1,i_class] - WholeStageCodegen (5) + WholeStageCodegen Sort [i_class] InputAdapter Exchange [i_class] #2 - WholeStageCodegen (4) - HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,_w1,sum] + WholeStageCodegen + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ss_ext_sales_price))] InputAdapter - Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3 - WholeStageCodegen (3) - HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ss_ext_sales_price] [sum,sum] - Project [ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Filter [ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] + Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #3 + WholeStageCodegen + HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen (1) - Filter [i_category,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] + WholeStageCodegen + Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + Filter [i_category,i_item_sk] + Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen (2) + WholeStageCodegen Project [d_date_sk] Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt index 595cb2984..2106af4de 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt @@ -1,183 +1,32 @@ == Physical Plan == -TakeOrderedAndProject (32) -+- * HashAggregate (31) - +- Exchange (30) - +- * HashAggregate (29) - +- * Project (28) - +- * BroadcastHashJoin Inner BuildRight (27) - :- * Project (21) - : +- * BroadcastHashJoin Inner BuildRight (20) - : :- * Project (15) - : : +- * BroadcastHashJoin Inner BuildRight (14) - : : :- * Project (9) - : : : +- * BroadcastHashJoin Inner BuildRight (8) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.catalog_sales (1) - : : : +- BroadcastExchange (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.warehouse (4) - : : +- BroadcastExchange (13) - : : +- * Filter (12) - : : +- * ColumnarToRow (11) - : : +- Scan parquet default.ship_mode (10) - : +- BroadcastExchange (19) - : +- * Filter (18) - : +- * ColumnarToRow (17) - : +- Scan parquet default.call_center (16) - +- BroadcastExchange (26) - +- * Project (25) - +- * Filter (24) - +- * ColumnarToRow (23) - +- Scan parquet default.date_dim (22) - - -(1) Scan parquet default.catalog_sales -Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(cs_ship_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 5] -Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] - -(3) Filter [codegen id : 5] -Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] -Condition : (((isnotnull(cs_warehouse_sk#5) AND isnotnull(cs_ship_mode_sk#4)) AND isnotnull(cs_call_center_sk#3)) AND isnotnull(cs_ship_date_sk#2)) - -(4) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] -Batched: true -Location [not included in comparison]/{warehouse_dir}/warehouse] -PushedFilters: [IsNotNull(w_warehouse_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] - -(6) Filter [codegen id : 1] -Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] -Condition : isnotnull(w_warehouse_sk#6) - -(7) BroadcastExchange -Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] - -(8) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_warehouse_sk#5] -Right keys [1]: [w_warehouse_sk#6] -Join condition: None - -(9) Project [codegen id : 5] -Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, w_warehouse_name#7] -Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5, w_warehouse_sk#6, w_warehouse_name#7] - -(10) Scan parquet default.ship_mode -Output [2]: [sm_ship_mode_sk#9, sm_type#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/ship_mode] -PushedFilters: [IsNotNull(sm_ship_mode_sk)] -ReadSchema: struct - -(11) ColumnarToRow [codegen id : 2] -Input [2]: [sm_ship_mode_sk#9, sm_type#10] - -(12) Filter [codegen id : 2] -Input [2]: [sm_ship_mode_sk#9, sm_type#10] -Condition : isnotnull(sm_ship_mode_sk#9) - -(13) BroadcastExchange -Input [2]: [sm_ship_mode_sk#9, sm_type#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] - -(14) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_ship_mode_sk#4] -Right keys [1]: [sm_ship_mode_sk#9] -Join condition: None - -(15) Project [codegen id : 5] -Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, w_warehouse_name#7, sm_type#10] -Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, w_warehouse_name#7, sm_ship_mode_sk#9, sm_type#10] - -(16) Scan parquet default.call_center -Output [2]: [cc_call_center_sk#12, cc_name#13] -Batched: true -Location [not included in comparison]/{warehouse_dir}/call_center] -PushedFilters: [IsNotNull(cc_call_center_sk)] -ReadSchema: struct - -(17) ColumnarToRow [codegen id : 3] -Input [2]: [cc_call_center_sk#12, cc_name#13] - -(18) Filter [codegen id : 3] -Input [2]: [cc_call_center_sk#12, cc_name#13] -Condition : isnotnull(cc_call_center_sk#12) - -(19) BroadcastExchange -Input [2]: [cc_call_center_sk#12, cc_name#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] - -(20) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_call_center_sk#3] -Right keys [1]: [cc_call_center_sk#12] -Join condition: None - -(21) Project [codegen id : 5] -Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13] -Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, w_warehouse_name#7, sm_type#10, cc_call_center_sk#12, cc_name#13] - -(22) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_month_seq#16] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] -ReadSchema: struct - -(23) ColumnarToRow [codegen id : 4] -Input [2]: [d_date_sk#15, d_month_seq#16] - -(24) Filter [codegen id : 4] -Input [2]: [d_date_sk#15, d_month_seq#16] -Condition : (((isnotnull(d_month_seq#16) AND (d_month_seq#16 >= 1200)) AND (d_month_seq#16 <= 1211)) AND isnotnull(d_date_sk#15)) - -(25) Project [codegen id : 4] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_month_seq#16] - -(26) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] - -(27) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_ship_date_sk#2] -Right keys [1]: [d_date_sk#15] -Join condition: None - -(28) Project [codegen id : 5] -Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13] -Input [6]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13, d_date_sk#15] - -(29) HashAggregate [codegen id : 5] -Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13] -Keys [3]: [substr(w_warehouse_name#7, 1, 20) AS substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13] -Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] -Results [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] - -(30) Exchange -Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, 5), true, [id=#29] - -(31) HashAggregate [codegen id : 6] -Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Keys [3]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13] -Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] -Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] -Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] - -(32) TakeOrderedAndProject -Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] -Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#10 ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] - +TakeOrderedAndProject(limit=100, orderBy=[substring(w_warehouse_name, 1, 20)#1 ASC NULLS FIRST,sm_type#2 ASC NULLS FIRST,cc_name#3 ASC NULLS FIRST], output=[substring(w_warehouse_name, 1, 20)#1,sm_type#2,cc_name#3,30 days #4,31 - 60 days #5,61 - 90 days #6,91 - 120 days #7,>120 days #8]) ++- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3, 5) + +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) + +- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, w_warehouse_name#9, sm_type#2, cc_name#3] + +- *(5) BroadcastHashJoin [cs_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight + :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, w_warehouse_name#9, sm_type#2, cc_name#3] + : +- *(5) BroadcastHashJoin [cs_call_center_sk#14], [cc_call_center_sk#15], Inner, BuildRight + : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, w_warehouse_name#9, sm_type#2] + : : +- *(5) BroadcastHashJoin [cs_ship_mode_sk#16], [sm_ship_mode_sk#17], Inner, BuildRight + : : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, cs_ship_mode_sk#16, w_warehouse_name#9] + : : : +- *(5) BroadcastHashJoin [cs_warehouse_sk#18], [w_warehouse_sk#19], Inner, BuildRight + : : : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, cs_ship_mode_sk#16, cs_warehouse_sk#18] + : : : : +- *(5) Filter (((isnotnull(cs_warehouse_sk#18) && isnotnull(cs_ship_mode_sk#16)) && isnotnull(cs_call_center_sk#14)) && isnotnull(cs_ship_date_sk#11)) + : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_ship_date_sk#11,cs_call_center_sk#14,cs_ship_mode_sk#16,cs_warehouse_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(..., ReadSchema: struct + : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] + : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) + : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + : +- *(3) Project [cc_call_center_sk#15, cc_name#3] + : +- *(3) Filter isnotnull(cc_call_center_sk#15) + : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#15,cc_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(4) Project [d_date_sk#13] + +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) + +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt index 9ebaaac52..4e22db2fe 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt @@ -1,48 +1,42 @@ -TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] - WholeStageCodegen (6) - HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] +TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,cc_name,sm_type,substring(w_warehouse_name, 1, 20)] + WholeStageCodegen + HashAggregate [cc_name,sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] InputAdapter - Exchange [substr(w_warehouse_name, 1, 20),sm_type,cc_name] #1 - WholeStageCodegen (5) - HashAggregate [w_warehouse_name,sm_type,cc_name,cs_ship_date_sk,cs_sold_date_sk] [sum,sum,sum,sum,sum,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum] - Project [cs_sold_date_sk,cs_ship_date_sk,w_warehouse_name,sm_type,cc_name] + Exchange [cc_name,sm_type,substring(w_warehouse_name, 1, 20)] #1 + WholeStageCodegen + HashAggregate [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name] [substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,w_warehouse_name,sm_type,cc_name] - BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,w_warehouse_name,sm_type] + Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] + BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] + Project [cs_call_center_sk,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] - Project [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,w_warehouse_name] + Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk] + Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] + Filter [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_warehouse_sk] + Scan parquet default.catalog_sales [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen (1) - Filter [w_warehouse_sk] - ColumnarToRow - InputAdapter - Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] + WholeStageCodegen + Project [w_warehouse_name,w_warehouse_sk] + Filter [w_warehouse_sk] + Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen (2) - Filter [sm_ship_mode_sk] - ColumnarToRow - InputAdapter - Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] + WholeStageCodegen + Project [sm_ship_mode_sk,sm_type] + Filter [sm_ship_mode_sk] + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] [sm_ship_mode_sk,sm_type] InputAdapter BroadcastExchange #4 - WholeStageCodegen (3) - Filter [cc_call_center_sk] - ColumnarToRow - InputAdapter - Scan parquet default.call_center [cc_call_center_sk,cc_name] + WholeStageCodegen + Project [cc_call_center_sk,cc_name] + Filter [cc_call_center_sk] + Scan parquet default.call_center [cc_call_center_sk,cc_name] [cc_call_center_sk,cc_name] InputAdapter BroadcastExchange #5 - WholeStageCodegen (4) + WholeStageCodegen Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] + Filter [d_date_sk,d_month_seq] + Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] From 32f5899758f9780fd1c05334cac11a4d00523367 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Mon, 15 Mar 2021 14:28:32 -0700 Subject: [PATCH 22/31] normalize location: fix --- .../approved-plans-v1_4/q1/explain.txt | 10 +- .../approved-plans-v1_4/q10/explain.txt | 14 +- .../approved-plans-v1_4/q11/explain.txt | 16 +- .../approved-plans-v1_4/q12/explain.txt | 6 +- .../approved-plans-v1_4/q13/explain.txt | 12 +- .../approved-plans-v1_4/q14a/explain.txt | 48 +- .../approved-plans-v1_4/q14b/explain.txt | 48 +- .../approved-plans-v1_4/q15/explain.txt | 8 +- .../approved-plans-v1_4/q16/explain.txt | 12 +- .../approved-plans-v1_4/q17/explain.txt | 16 +- .../approved-plans-v1_4/q18/explain.txt | 14 +- .../approved-plans-v1_4/q19/explain.txt | 12 +- .../approved-plans-v1_4/q2/explain.txt | 10 +- .../approved-plans-v1_4/q20/explain.txt | 6 +- .../approved-plans-v1_4/q21/explain.txt | 8 +- .../approved-plans-v1_4/q22/explain.txt | 8 +- .../approved-plans-v1_4/q23a/explain.txt | 22 +- .../approved-plans-v1_4/q23b/explain.txt | 24 +- .../approved-plans-v1_4/q24a/explain.txt | 24 +- .../approved-plans-v1_4/q24b/explain.txt | 24 +- .../approved-plans-v1_4/q25/explain.txt | 16 +- .../approved-plans-v1_4/q26/explain.txt | 10 +- .../approved-plans-v1_4/q27/explain.txt | 10 +- .../approved-plans-v1_4/q28/explain.txt | 12 +- .../approved-plans-v1_4/q29/explain.txt | 16 +- .../approved-plans-v1_4/q3/explain.txt | 6 +- .../approved-plans-v1_4/q30/explain.txt | 12 +- .../approved-plans-v1_4/q31/explain.txt | 20 +- .../approved-plans-v1_4/q32/explain.txt | 8 +- .../approved-plans-v1_4/q33/explain.txt | 14 +- .../approved-plans-v1_4/q34/explain.txt | 10 +- .../approved-plans-v1_4/q35/explain.txt | 14 +- .../approved-plans-v1_4/q36/explain.txt | 8 +- .../approved-plans-v1_4/q37/explain.txt | 8 +- .../approved-plans-v1_4/q38/explain.txt | 10 +- .../approved-plans-v1_4/q39a/explain.txt | 12 +- .../approved-plans-v1_4/q39b/explain.txt | 12 +- .../approved-plans-v1_4/q4/explain.txt | 22 +- .../approved-plans-v1_4/q40/explain.txt | 10 +- .../approved-plans-v1_4/q41/explain.txt | 4 +- .../approved-plans-v1_4/q42/explain.txt | 6 +- .../approved-plans-v1_4/q43/explain.txt | 6 +- .../approved-plans-v1_4/q44/explain.txt | 8 +- .../approved-plans-v1_4/q45/explain.txt | 12 +- .../approved-plans-v1_4/q46/explain.txt | 12 +- .../approved-plans-v1_4/q47/explain.txt | 8 +- .../approved-plans-v1_4/q48/explain.txt | 10 +- .../approved-plans-v1_4/q49/explain.txt | 433 ------------------ .../approved-plans-v1_4/q49/simplified.txt | 126 ----- .../approved-plans-v1_4/q5/explain.txt | 24 +- .../approved-plans-v1_4/q50/explain.txt | 10 +- .../approved-plans-v1_4/q51/explain.txt | 6 +- .../approved-plans-v1_4/q52/explain.txt | 6 +- .../approved-plans-v1_4/q53/explain.txt | 8 +- .../approved-plans-v1_4/q54/explain.txt | 26 +- .../approved-plans-v1_4/q55/explain.txt | 6 +- .../approved-plans-v1_4/q56/explain.txt | 14 +- .../approved-plans-v1_4/q57/explain.txt | 8 +- .../approved-plans-v1_4/q58/explain.txt | 32 +- .../approved-plans-v1_4/q59/explain.txt | 12 +- .../approved-plans-v1_4/q6/explain.txt | 16 +- .../approved-plans-v1_4/q60/explain.txt | 14 +- .../approved-plans-v1_4/q61/explain.txt | 16 +- .../approved-plans-v1_4/q62/explain.txt | 10 +- .../approved-plans-v1_4/q63/explain.txt | 8 +- .../approved-plans-v1_4/q64/explain.txt | 32 +- .../approved-plans-v1_4/q65/explain.txt | 10 +- .../approved-plans-v1_4/q66/explain.txt | 12 +- .../approved-plans-v1_4/q67/explain.txt | 8 +- .../approved-plans-v1_4/q68/explain.txt | 12 +- .../approved-plans-v1_4/q69/explain.txt | 14 +- .../approved-plans-v1_4/q7/explain.txt | 10 +- .../approved-plans-v1_4/q70/explain.txt | 10 +- .../approved-plans-v1_4/q71/explain.txt | 12 +- .../approved-plans-v1_4/q72/explain.txt | 22 +- .../approved-plans-v1_4/q73/explain.txt | 10 +- .../approved-plans-v1_4/q74/explain.txt | 16 +- .../approved-plans-v1_4/q75/explain.txt | 24 +- .../approved-plans-v1_4/q76/explain.txt | 10 +- .../approved-plans-v1_4/q77/explain.txt | 24 +- .../approved-plans-v1_4/q78/explain.txt | 14 +- .../approved-plans-v1_4/q79/explain.txt | 10 +- .../approved-plans-v1_4/q8/explain.txt | 12 +- .../approved-plans-v1_4/q80/explain.txt | 24 +- .../approved-plans-v1_4/q81/explain.txt | 12 +- .../approved-plans-v1_4/q82/explain.txt | 8 +- .../approved-plans-v1_4/q83/explain.txt | 18 +- .../approved-plans-v1_4/q84/explain.txt | 12 +- .../approved-plans-v1_4/q85/explain.txt | 16 +- .../approved-plans-v1_4/q86/explain.txt | 6 +- .../approved-plans-v1_4/q87/explain.txt | 10 +- .../approved-plans-v1_4/q88/explain.txt | 36 +- .../approved-plans-v1_4/q89/explain.txt | 8 +- .../approved-plans-v1_4/q9/explain.txt | 32 +- .../approved-plans-v1_4/q90/explain.txt | 12 +- .../approved-plans-v1_4/q91/explain.txt | 14 +- .../approved-plans-v1_4/q92/explain.txt | 8 +- .../approved-plans-v1_4/q93/explain.txt | 6 +- .../approved-plans-v1_4/q94/explain.txt | 12 +- .../approved-plans-v1_4/q95/explain.txt | 16 +- .../approved-plans-v1_4/q96/explain.txt | 8 +- .../approved-plans-v1_4/q97/explain.txt | 6 +- .../approved-plans-v1_4/q98/explain.txt | 6 +- .../approved-plans-v1_4/q99/explain.txt | 10 +- .../goldstandard/PlanStabilitySuite.scala | 2 +- 105 files changed, 699 insertions(+), 1258 deletions(-) delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt delete mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt index 1e2caffbe..7899a44d0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q1/explain.txt @@ -14,11 +14,11 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], outp : : : +- *(2) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight : : : :- *(2) Project [sr_returned_date_sk#12, sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] : : : : +- *(2) Filter ((isnotnull(sr_returned_date_sk#12) && isnotnull(sr_store_sk#10)) && isnotnull(sr_customer_sk#9)) - : : : : +- *(2) FileScan parquet default.store_returns[sr_returned_date_sk#12,sr_customer_sk#9,sr_store_sk#10,sr_return_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk), IsNotNull(sr_customer_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_year#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, bigint, true])) : : +- *(6) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#8) : : +- *(6) HashAggregate(keys=[ctr_store_sk#4], functions=[avg(ctr_total_return#7)]) @@ -31,13 +31,13 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], outp : : +- *(4) BroadcastHashJoin [sr_returned_date_sk#12], [cast(d_date_sk#13 as bigint)], Inner, BuildRight : : :- *(4) Project [sr_returned_date_sk#12, sr_customer_sk#9, sr_store_sk#10, sr_return_amt#11] : : : +- *(4) Filter (isnotnull(sr_returned_date_sk#12) && isnotnull(sr_store_sk#10)) - : : : +- *(4) FileScan parquet default.store_returns[sr_returned_date_sk#12,sr_customer_sk#9,sr_store_sk#10,sr_return_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)], ReadSchema: struct + : +- *(7) FileScan parquet default.store[s_store_sk#5,s_state#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(8) Project [c_customer_sk#3, c_customer_id#1] +- *(8) Filter isnotnull(c_customer_sk#3) - +- *(8) FileScan parquet default.customer[c_customer_sk#3,c_customer_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file + +- *(8) FileScan parquet default.customer[c_customer_sk#3,c_customer_id#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt index 678ed7254..1462b13af 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt @@ -14,36 +14,36 @@ TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital : : : :- *(9) BroadcastHashJoin [c_customer_sk#21], [ss_customer_sk#24], LeftSemi, BuildRight : : : : :- *(9) Project [c_customer_sk#21, c_current_cdemo_sk#15, c_current_addr_sk#17] : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#17) && isnotnull(c_current_cdemo_sk#15)) - : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#21,c_current_cdemo_sk#15,c_current_addr_sk#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct + : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#21,c_current_cdemo_sk#15,c_current_addr_sk#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(2) Project [ss_customer_sk#24] : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#25], [d_date_sk#26], Inner, BuildRight : : : : :- *(2) Project [ss_sold_date_sk#25, ss_customer_sk#24] : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#25) - : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#25,ss_customer_sk#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#25,ss_customer_sk#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(1) Project [d_date_sk#26] : : : : +- *(1) Filter (((((isnotnull(d_year#27) && isnotnull(d_moy#28)) && (d_year#27 = 2002)) && (d_moy#28 >= 1)) && (d_moy#28 <= 4)) && isnotnull(d_date_sk#26)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#26,d_year#27,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThan..., ReadSchema: struct + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#26,d_year#27,d_moy#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThan..., ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(4) Project [ws_bill_customer_sk#23] : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#26], Inner, BuildRight : : : :- *(4) Project [ws_sold_date_sk#29, ws_bill_customer_sk#23] : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#29) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_bill_customer_sk#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_bill_customer_sk#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(6) Project [cs_ship_customer_sk#22] : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#30], [d_date_sk#26], Inner, BuildRight : : :- *(6) Project [cs_sold_date_sk#30, cs_ship_customer_sk#22] : : : +- *(6) Filter isnotnull(cs_sold_date_sk#30) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_ship_customer_sk#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_ship_customer_sk#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(7) Project [ca_address_sk#18] : +- *(7) Filter (ca_county#31 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) && isnotnull(ca_address_sk#18)) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#18,ca_county#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_county, [Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County]), IsNo..., ReadSchema: struct + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#18,ca_county#31] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [In(ca_county, [Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County]), IsNo..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(8) Project [cd_demo_sk#16, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] +- *(8) Filter isnotnull(cd_demo_sk#16) - +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#16,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5,cd_dep_count#6,cd_dep_employed_count#7,cd_dep_college_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#21,d_year#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- LocalTableScan , [customer_id#24, year_total#25] : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : : +- Union @@ -38,12 +38,12 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_preferred_cust_flag#1 ASC NUL : : : : +- *(7) BroadcastHashJoin [c_customer_sk#22], [ss_customer_sk#23], Inner, BuildRight : : : : :- *(7) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] : : : : : +- *(7) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) - : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#21,d_year#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct : : +- LocalTableScan , [customer_id#24, customer_preferred_cust_flag#26, year_total#25] : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : +- Union @@ -58,11 +58,11 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_preferred_cust_flag#1 ASC NUL : : +- *(11) BroadcastHashJoin [c_customer_sk#22], [ws_bill_customer_sk#30], Inner, BuildRight : : :- *(11) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] : : : +- *(11) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) - : : : +- *(11) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#14,ws_ext_sales_price#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) - : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) - +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt index 1e61c9ee5..bf01d0881 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt @@ -14,24 +14,24 @@ : : : : +- *(6) BroadcastHashJoin [ss_store_sk#18], [s_store_sk#19], Inner, BuildRight : : : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_addr_sk#14, ss_store_sk#18, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, ss_net_profit#17] : : : : : +- *(6) Filter ((((isnotnull(ss_store_sk#18) && isnotnull(ss_addr_sk#14)) && isnotnull(ss_sold_date_sk#12)) && isnotnull(ss_cdemo_sk#10)) && isnotnull(ss_hdemo_sk#4)) - : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_cdemo_sk#10,ss_hdemo_sk#4,ss_addr_sk#14,ss_store_sk#18,ss_quantity#1,ss_sales_price#8,ss_ext_sales_price#2,ss_ext_wholesale_cost#3,ss_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk..., ReadSchema: struct + : : : : +- *(1) FileScan parquet default.store[s_store_sk#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(2) Project [ca_address_sk#15, ca_state#16] : : : +- *(2) Filter ((isnotnull(ca_country#20) && (ca_country#20 = United States)) && isnotnull(ca_address_sk#15)) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [d_date_sk#13] : : +- *(3) Filter ((isnotnull(d_year#21) && (d_year#21 = 2001)) && isnotnull(d_date_sk#13)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#13,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#13,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) Project [cd_demo_sk#11, cd_marital_status#6, cd_education_status#7] : +- *(4) Filter isnotnull(cd_demo_sk#11) - : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#6,cd_education_status#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#6,cd_education_status#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(5) Project [hd_demo_sk#5, hd_dep_count#9] +- *(5) Filter isnotnull(hd_demo_sk#5) - +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#5,hd_dep_count#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file + +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#5,hd_dep_count#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt index 0acde64df..0e062f45f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt @@ -16,22 +16,22 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [d_date_sk#20] : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) - : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(26) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), count(1)]) : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) @@ -43,13 +43,13 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : :- *(25) BroadcastHashJoin [ss_item_sk#32], [ss_item_sk#34], LeftSemi, BuildRight : : : :- *(25) Project [ss_sold_date_sk#19, ss_item_sk#32, ss_quantity#14, ss_list_price#15] : : : : +- *(25) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) - : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(11) Project [i_item_sk#33 AS ss_item_sk#34] : : : +- *(11) BroadcastHashJoin [i_brand_id#11, i_class_id#12, i_category_id#13], [brand_id#35, class_id#36, category_id#37], Inner, BuildRight : : : :- *(11) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] : : : : +- *(11) Filter ((isnotnull(i_class_id#12) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) - : : : : +- *(11) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : +- *(11) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) @@ -64,15 +64,15 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#32], [i_item_sk#33], Inner, BuildRight : : : : : : :- *(6) Project [ss_sold_date_sk#19, ss_item_sk#32] : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) - : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : +- *(1) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#33) && isnotnull(i_class_id#12)) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) - : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(2) Project [d_date_sk#20] : : : : : +- *(2) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) : : : : +- *(5) Project [i_brand_id#11, i_class_id#12, i_category_id#13] : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight @@ -80,11 +80,11 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#38], [i_item_sk#33], Inner, BuildRight : : : : : :- *(5) Project [cs_sold_date_sk#26, cs_item_sk#38] : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) - : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(3) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] : : : : : +- *(3) Filter isnotnull(i_item_sk#33) - : : : : : +- *(3) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : : +- *(3) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) : : : +- *(9) Project [i_brand_id#11, i_class_id#12, i_category_id#13] @@ -93,19 +93,19 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : : +- *(9) BroadcastHashJoin [ws_item_sk#39], [i_item_sk#33], Inner, BuildRight : : : : :- *(9) Project [ws_sold_date_sk#31, ws_item_sk#39] : : : : : +- *(9) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) - : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(23) BroadcastHashJoin [i_item_sk#33], [ss_item_sk#34], LeftSemi, BuildRight : : :- *(23) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] : : : +- *(23) Filter isnotnull(i_item_sk#33) - : : : +- *(23) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : +- *(23) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(24) Project [d_date_sk#20] : +- *(24) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#40)) && (d_year#21 = 2001)) && (d_moy#40 = 11)) && isnotnull(d_date_sk#20)) - : +- *(24) FileScan parquet default.date_dim[d_date_sk#20,d_year#21,d_moy#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(24) FileScan parquet default.date_dim[d_date_sk#20,d_year#21,d_moy#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct :- *(52) Project [sales#41, number_sales#42, channel#43, i_brand_id#11, i_class_id#12, i_category_id#13] : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44) && (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44 as decimal(32,6)) > cast(Subquery subquery1667 as decimal(32,6)))) : : +- Subquery subquery1667 @@ -117,22 +117,22 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [d_date_sk#20] : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) - : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(52) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), count(1)]) : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) @@ -144,7 +144,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : :- *(51) BroadcastHashJoin [cs_item_sk#38], [ss_item_sk#34], LeftSemi, BuildRight : : : :- *(51) Project [cs_sold_date_sk#26, cs_item_sk#38, cs_quantity#22, cs_list_price#24] : : : : +- *(51) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) - : : : : +- *(51) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : +- *(51) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) @@ -159,22 +159,22 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) - : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [d_date_sk#20] : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) - : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) - : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(78) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), count(1)]) +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) @@ -186,7 +186,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : :- *(77) BroadcastHashJoin [ws_item_sk#39], [ss_item_sk#34], LeftSemi, BuildRight : : :- *(77) Project [ws_sold_date_sk#31, ws_item_sk#39, ws_quantity#27, ws_list_price#29] : : : +- *(77) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) - : : : +- *(77) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- *(77) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt index 858eda8d6..4d49d4d7a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt @@ -12,22 +12,22 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight : : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [d_date_sk#19] : : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct : : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight : : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) - : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight : : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] : : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(52) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) : +- Exchange hashpartitioning(i_brand_id#2, i_class_id#3, i_category_id#4, 5) @@ -39,13 +39,13 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : :- *(25) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight : : : :- *(25) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] : : : : +- *(25) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) - : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(11) Project [i_item_sk#32 AS ss_item_sk#33] : : : +- *(11) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [brand_id#34, class_id#35, category_id#36], Inner, BuildRight : : : :- *(11) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] : : : : +- *(11) Filter ((isnotnull(i_class_id#3) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) - : : : : +- *(11) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : +- *(11) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) @@ -60,15 +60,15 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#32], Inner, BuildRight : : : : : : :- *(6) Project [ss_sold_date_sk#18, ss_item_sk#31] : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) - : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : +- *(1) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) - : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct + : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(2) Project [d_date_sk#19] : : : : : +- *(2) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) - : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) : : : : +- *(5) Project [i_brand_id#2, i_class_id#3, i_category_id#4] : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight @@ -76,11 +76,11 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#37], [i_item_sk#32], Inner, BuildRight : : : : : :- *(5) Project [cs_sold_date_sk#25, cs_item_sk#37] : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#37) && isnotnull(cs_sold_date_sk#25)) - : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_item_sk#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_item_sk#37] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(3) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] : : : : : +- *(3) Filter isnotnull(i_item_sk#32) - : : : : : +- *(3) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : : +- *(3) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) : : : +- *(9) Project [i_brand_id#2, i_class_id#3, i_category_id#4] @@ -89,14 +89,14 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : : : +- *(9) BroadcastHashJoin [ws_item_sk#38], [i_item_sk#32], Inner, BuildRight : : : : :- *(9) Project [ws_sold_date_sk#30, ws_item_sk#38] : : : : : +- *(9) Filter (isnotnull(ws_item_sk#38) && isnotnull(ws_sold_date_sk#30)) - : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_item_sk#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_item_sk#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : : : +- ReusedExchange [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(23) BroadcastHashJoin [i_item_sk#32], [ss_item_sk#33], LeftSemi, BuildRight : : :- *(23) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] : : : +- *(23) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_category_id#4)) && isnotnull(i_brand_id#2)) - : : : +- *(23) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)], ReadSchema: struct + : : : +- *(23) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)], ReadSchema: struct : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(24) Project [d_date_sk#19] @@ -104,12 +104,12 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : +- Subquery subquery1883 : : +- *(1) Project [d_week_seq#39] : : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct - : +- *(24) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + : +- *(24) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct : +- Subquery subquery1883 : +- *(1) Project [d_week_seq#39] : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true])) +- *(51) Project [channel#7, i_brand_id#8, i_class_id#9, i_category_id#10, sales#11, number_sales#12] +- *(51) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42 as decimal(32,6)) > cast(Subquery subquery1890 as decimal(32,6)))) @@ -122,22 +122,22 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) - : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [d_date_sk#19] : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) - : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) - : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(51) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) +- Exchange hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, 5) @@ -149,7 +149,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : :- *(50) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight : : :- *(50) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] : : : +- *(50) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) - : : : +- *(50) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- *(50) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [i_item_sk#43, i_brand_id#8, i_class_id#9, i_category_id#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) @@ -158,9 +158,9 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 : +- Subquery subquery1889 : +- *(1) Project [d_week_seq#39] : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct - +- *(49) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct + +- *(49) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct +- Subquery subquery1889 +- *(1) Project [d_week_seq#39] +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct \ No newline at end of file + +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt index d70d7905c..f77fe16bb 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt @@ -11,16 +11,16 @@ TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST], output=[ca_ : : +- *(4) BroadcastHashJoin [cs_bill_customer_sk#9], [c_customer_sk#10], Inner, BuildRight : : :- *(4) Project [cs_sold_date_sk#4, cs_bill_customer_sk#9, cs_sales_price#3] : : : +- *(4) Filter (isnotnull(cs_bill_customer_sk#9) && isnotnull(cs_sold_date_sk#4)) - : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#9,cs_sales_price#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#9,cs_sales_price#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [c_customer_sk#10, c_current_addr_sk#6] : : +- *(1) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#6)) - : : +- *(1) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [ca_address_sk#7, ca_state#8, ca_zip#1] : +- *(2) Filter isnotnull(ca_address_sk#7) - : +- *(2) FileScan parquet default.customer_address[ca_address_sk#7,ca_state#8,ca_zip#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : +- *(2) FileScan parquet default.customer_address[ca_address_sk#7,ca_state#8,ca_zip#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [d_date_sk#5] +- *(3) Filter ((((isnotnull(d_qoy#11) && isnotnull(d_year#12)) && (d_qoy#11 = 2)) && (d_year#12 = 2001)) && isnotnull(d_date_sk#5)) - +- *(3) FileScan parquet default.date_dim[d_date_sk#5,d_year#12,d_qoy#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.date_dim[d_date_sk#5,d_year#12,d_qoy#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt index 589464c44..ff12892ff 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt @@ -17,21 +17,21 @@ TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], outpu : : : : +- *(6) BroadcastHashJoin [cs_order_number#6], [cs_order_number#6#14], LeftSemi, BuildRight, NOT (cs_warehouse_sk#15 = cs_warehouse_sk#15#16) : : : : :- *(6) Project [cs_ship_date_sk#11, cs_ship_addr_sk#9, cs_call_center_sk#7, cs_warehouse_sk#15, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] : : : : : +- *(6) Filter ((isnotnull(cs_ship_date_sk#11) && isnotnull(cs_ship_addr_sk#9)) && isnotnull(cs_call_center_sk#7)) - : : : : : +- *(6) FileScan parquet default.catalog_sales[cs_ship_date_sk#11,cs_ship_addr_sk#9,cs_call_center_sk#7,cs_warehouse_sk#15,cs_order_number#6,cs_ext_ship_cost#4,cs_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.catalog_sales[cs_warehouse_sk#15,cs_order_number#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) FileScan parquet default.catalog_returns[cr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : : +- *(2) FileScan parquet default.catalog_returns[cr_order_number#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [d_date_sk#12] : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 2002-02-01)) && (d_date#17 <= 11779)) && isnotnull(d_date_sk#12)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) Project [ca_address_sk#10] : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = GA)) && isnotnull(ca_address_sk#10)) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(5) Project [cc_call_center_sk#8] +- *(5) Filter ((isnotnull(cc_county#19) && (cc_county#19 = Williamson County)) && isnotnull(cc_call_center_sk#8)) - +- *(5) FileScan parquet default.call_center[cc_call_center_sk#8,cc_county#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)], ReadSchema: struct \ No newline at end of file + +- *(5) FileScan parquet default.call_center[cc_call_center_sk#8,cc_county#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt index 5d0ff39f9..8c8502f7f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt @@ -19,32 +19,32 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_des : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#33 as bigint), cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#34 as bigint)], [sr_customer_sk#29, sr_item_sk#30, sr_ticket_number#35], Inner, BuildRight : : : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_customer_sk#33, ss_store_sk#21, ss_ticket_number#34, ss_quantity#16] : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#34) && isnotnull(ss_item_sk#19)) && isnotnull(ss_customer_sk#33)) && isnotnull(ss_sold_date_sk#27)) && isnotnull(ss_store_sk#21)) - : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#27,ss_item_sk#19,ss_customer_sk#33,ss_store_sk#21,ss_ticket_number#34,ss_quantity#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#23,cs_bill_customer_sk#31,cs_item_sk#32,cs_quantity#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [d_date_sk#28] : : : : +- *(3) Filter ((isnotnull(d_quarter_name#36) && (d_quarter_name#36 = 2001Q1)) && isnotnull(d_date_sk#28)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#28,d_quarter_name#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#28,d_quarter_name#36] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(4) Project [d_date_sk#26] : : : +- *(4) Filter (d_quarter_name#37 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#26)) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#26,d_quarter_name#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#26,d_quarter_name#37] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [d_date_sk#24] : : +- *(5) Filter (d_quarter_name#38 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#24)) - : : +- *(5) FileScan parquet default.date_dim[d_date_sk#24,d_quarter_name#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#24,d_quarter_name#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(6) Project [s_store_sk#22, s_state#3] : +- *(6) Filter isnotnull(s_store_sk#22) - : +- *(6) FileScan parquet default.store[s_store_sk#22,s_state#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- *(6) FileScan parquet default.store[s_store_sk#22,s_state#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(7) Project [i_item_sk#20, i_item_id#1, i_item_desc#2] +- *(7) Filter isnotnull(i_item_sk#20) - +- *(7) FileScan parquet default.item[i_item_sk#20,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(7) FileScan parquet default.item[i_item_sk#20,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt index fd9786c06..c13386440 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt @@ -18,28 +18,28 @@ TakeOrderedAndProject(limit=100, orderBy=[ca_country#1 ASC NULLS FIRST,ca_state# : : : : : +- *(7) BroadcastHashJoin [cs_bill_cdemo_sk#38], [cd_demo_sk#39], Inner, BuildRight : : : : : :- *(7) Project [cs_sold_date_sk#30, cs_bill_customer_sk#36, cs_bill_cdemo_sk#38, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17] : : : : : : +- *(7) Filter (((isnotnull(cs_bill_cdemo_sk#38) && isnotnull(cs_bill_customer_sk#36)) && isnotnull(cs_sold_date_sk#30)) && isnotnull(cs_item_sk#28)) - : : : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_bill_customer_sk#36,cs_bill_cdemo_sk#38,cs_item_sk#28,cs_quantity#13,cs_list_price#14,cs_sales_price#16,cs_coupon_amt#15,cs_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNu..., ReadSchema: struct + : : : : : +- *(1) FileScan parquet default.customer_demographics[cd_demo_sk#39,cd_gender#40,cd_education_status#41,cd_dep_count#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_education..., ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(2) Project [c_customer_sk#37, c_current_cdemo_sk#34, c_current_addr_sk#32, c_birth_year#18] : : : : +- *(2) Filter (((c_birth_month#42 IN (1,6,8,9,12,2) && isnotnull(c_customer_sk#37)) && isnotnull(c_current_cdemo_sk#34)) && isnotnull(c_current_addr_sk#32)) - : : : : +- *(2) FileScan parquet default.customer[c_customer_sk#37,c_current_cdemo_sk#34,c_current_addr_sk#32,c_birth_month#42,c_birth_year#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNo..., ReadSchema: struct + : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#35] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(4) Project [ca_address_sk#33, ca_county#27, ca_state#26, ca_country#25] : : +- *(4) Filter (ca_state#26 IN (MS,IN,ND,OK,NM,VA) && isnotnull(ca_address_sk#33)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#33,ca_county#27,ca_state#26,ca_country#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#33,ca_county#27,ca_state#26,ca_country#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(5) Project [d_date_sk#31] : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 1998)) && isnotnull(d_date_sk#31)) - : +- *(5) FileScan parquet default.date_dim[d_date_sk#31,d_year#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(5) FileScan parquet default.date_dim[d_date_sk#31,d_year#43] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(6) Project [i_item_sk#29, i_item_id#24] +- *(6) Filter isnotnull(i_item_sk#29) - +- *(6) FileScan parquet default.item[i_item_sk#29,i_item_id#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(6) FileScan parquet default.item[i_item_sk#29,i_item_id#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt index a3cb20be7..00e737c79 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt @@ -15,24 +15,24 @@ TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand#2 AS : : : : +- *(6) BroadcastHashJoin [d_date_sk#19], [ss_sold_date_sk#20], Inner, BuildRight : : : : :- *(6) Project [d_date_sk#19] : : : : : +- *(6) Filter ((((isnotnull(d_moy#21) && isnotnull(d_year#22)) && (d_moy#21 = 11)) && (d_year#22 = 1998)) && isnotnull(d_date_sk#19)) - : : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#19,d_year#22,d_moy#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#19,d_year#22,d_moy#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(1) Project [ss_sold_date_sk#20, ss_item_sk#17, ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8] : : : : +- *(1) Filter (((isnotnull(ss_sold_date_sk#20) && isnotnull(ss_item_sk#17)) && isnotnull(ss_customer_sk#15)) && isnotnull(ss_store_sk#9)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_item_sk#17,ss_customer_sk#15,ss_store_sk#9,ss_ext_sales_price#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store..., ReadSchema: struct + : : +- *(3) FileScan parquet default.customer[c_customer_sk#16,c_current_addr_sk#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) Project [ca_address_sk#14, ca_zip#11] : +- *(4) Filter (isnotnull(ca_address_sk#14) && isnotnull(ca_zip#11)) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#14,ca_zip#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)], ReadSchema: struct + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#14,ca_zip#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(5) Project [s_store_sk#10, s_zip#12] +- *(5) Filter (isnotnull(s_zip#12) && isnotnull(s_store_sk#10)) - +- *(5) FileScan parquet default.store[s_store_sk#10,s_zip#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file + +- *(5) FileScan parquet default.store[s_store_sk#10,s_zip#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt index 27134fbb9..6e455f962 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt @@ -13,18 +13,18 @@ : : :- Union : : : :- *(1) Project [ws_sold_date_sk#37 AS sold_date_sk#35, ws_ext_sales_price#38 AS sales_price#34] : : : : +- *(1) Filter isnotnull(ws_sold_date_sk#37) - : : : : +- *(1) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_ext_sales_price#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : : +- *(2) Project [cs_sold_date_sk#39 AS sold_date_sk#40, cs_ext_sales_price#41 AS sales_price#42] : : : +- *(2) Filter isnotnull(cs_sold_date_sk#39) - : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#39,cs_ext_sales_price#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#39,cs_ext_sales_price#41] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [d_date_sk#36, d_week_seq#24, d_day_name#33] : : +- *(3) Filter (isnotnull(d_date_sk#36) && isnotnull(d_week_seq#24)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#36,d_week_seq#24,d_day_name#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#36,d_week_seq#24,d_day_name#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(5) Project [d_week_seq#32] : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 2001)) && isnotnull(d_week_seq#32)) - : +- *(5) FileScan parquet default.date_dim[d_week_seq#32,d_year#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)], ReadSchema: struct + : +- *(5) FileScan parquet default.date_dim[d_week_seq#32,d_year#43] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint))) +- *(11) Project [d_week_seq#24 AS d_week_seq2#23, sun_sales#25 AS sun_sales2#3, mon_sales#26 AS mon_sales2#6, tue_sales#27 AS tue_sales2#9, wed_sales#28 AS wed_sales2#12, thu_sales#29 AS thu_sales2#15, fri_sales#30 AS fri_sales2#18, sat_sales#31 AS sat_sales2#21] +- *(11) BroadcastHashJoin [d_week_seq#24], [d_week_seq#44], Inner, BuildRight @@ -33,4 +33,4 @@ +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(10) Project [d_week_seq#44] +- *(10) Filter ((isnotnull(d_year#52) && (d_year#52 = 2002)) && isnotnull(d_week_seq#44)) - +- *(10) FileScan parquet default.date_dim[d_week_seq#44,d_year#52] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)], ReadSchema: struct \ No newline at end of file + +- *(10) FileScan parquet default.date_dim[d_week_seq#44,d_year#52] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt index f0c1bc89c..c1f528c67 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt @@ -13,12 +13,12 @@ TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 : +- *(3) BroadcastHashJoin [cs_item_sk#14], [i_item_sk#15], Inner, BuildRight : :- *(3) Project [cs_sold_date_sk#12, cs_item_sk#14, cs_ext_sales_price#11] : : +- *(3) Filter (isnotnull(cs_item_sk#14) && isnotnull(cs_sold_date_sk#12)) - : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_item_sk#14,cs_ext_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_item_sk#14,cs_ext_sales_price#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) - : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) - +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt index 7c1b02f29..86adb689b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt @@ -12,16 +12,16 @@ TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST,i_i : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#11], [w_warehouse_sk#12], Inner, BuildRight : : :- *(4) Project [inv_date_sk#7, inv_item_sk#9, inv_warehouse_sk#11, inv_quantity_on_hand#6] : : : +- *(4) Filter ((isnotnull(inv_warehouse_sk#11) && isnotnull(inv_item_sk#9)) && isnotnull(inv_date_sk#7)) - : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#7,inv_item_sk#9,inv_warehouse_sk#11,inv_quantity_on_hand#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#7,inv_item_sk#9,inv_warehouse_sk#11,inv_quantity_on_hand#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [w_warehouse_sk#12, w_warehouse_name#1] : : +- *(1) Filter isnotnull(w_warehouse_sk#12) - : : +- *(1) FileScan parquet default.warehouse[w_warehouse_sk#12,w_warehouse_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.warehouse[w_warehouse_sk#12,w_warehouse_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [i_item_sk#10, i_item_id#2] : +- *(2) Filter (((isnotnull(i_current_price#13) && (i_current_price#13 >= 0.99)) && (i_current_price#13 <= 1.49)) && isnotnull(i_item_sk#10)) - : +- *(2) FileScan parquet default.item[i_item_sk#10,i_item_id#2,i_current_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct + : +- *(2) FileScan parquet default.item[i_item_sk#10,i_item_id#2,i_current_price#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [d_date_sk#8, d_date#5] +- *(3) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#8)) - +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_date#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_date#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt index 171688e3a..1ec1ce194 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt @@ -12,16 +12,16 @@ TakeOrderedAndProject(limit=100, orderBy=[qoh#1 ASC NULLS FIRST,i_product_name#2 : : +- *(4) BroadcastHashJoin [inv_date_sk#20], [d_date_sk#21], Inner, BuildRight : : :- *(4) Project [inv_date_sk#20, inv_item_sk#18, inv_warehouse_sk#16, inv_quantity_on_hand#7] : : : +- *(4) Filter ((isnotnull(inv_date_sk#20) && isnotnull(inv_item_sk#18)) && isnotnull(inv_warehouse_sk#16)) - : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#20,inv_item_sk#18,inv_warehouse_sk#16,inv_quantity_on_hand#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)], ReadSchema: struct + : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#20,inv_item_sk#18,inv_warehouse_sk#16,inv_quantity_on_hand#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [d_date_sk#21] : : +- *(1) Filter (((isnotnull(d_month_seq#22) && (d_month_seq#22 >= 1200)) && (d_month_seq#22 <= 1211)) && isnotnull(d_date_sk#21)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_month_seq#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_month_seq#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [i_item_sk#19, i_brand#13, i_class#14, i_category#15, i_product_name#12] : +- *(2) Filter isnotnull(i_item_sk#19) - : +- *(2) FileScan parquet default.item[i_item_sk#19,i_brand#13,i_class#14,i_category#15,i_product_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- *(2) FileScan parquet default.item[i_item_sk#19,i_brand#13,i_class#14,i_category#15,i_product_name#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [w_warehouse_sk#17] +- *(3) Filter isnotnull(w_warehouse_sk#17) - +- *(3) FileScan parquet default.warehouse[w_warehouse_sk#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.warehouse[w_warehouse_sk#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt index cd1cae198..b415920d5 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt @@ -12,7 +12,7 @@ CollectLimit 100 : : : +- *(9) BroadcastHashJoin [cs_item_sk#8], [item_sk#9], LeftSemi, BuildRight : : : :- *(9) Project [cs_sold_date_sk#4, cs_bill_customer_sk#6, cs_item_sk#8, cs_quantity#2, cs_list_price#3] : : : : +- *(9) Filter isnotnull(cs_sold_date_sk#4) - : : : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#6,cs_item_sk#8,cs_quantity#2,cs_list_price#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct 4) @@ -25,15 +25,15 @@ CollectLimit 100 : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight : : : : :- *(3) Project [ss_sold_date_sk#16, ss_item_sk#15] : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#16) && isnotnull(ss_item_sk#15)) - : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_item_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_item_sk#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(1) Project [d_date_sk#5, d_date#14] : : : : +- *(1) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#5,d_date#14,d_year#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#5,d_date#14,d_year#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(2) Project [i_item_sk#13, i_item_desc#11] : : : +- *(2) Filter isnotnull(i_item_sk#13) - : : : +- *(2) FileScan parquet default.item[i_item_sk#13,i_item_desc#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.item[i_item_sk#13,i_item_desc#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(7) Project [c_customer_sk#7] : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3052 as decimal(32,6)))), DecimalType(38,8)))) @@ -50,15 +50,15 @@ CollectLimit 100 : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight : : : : :- *(3) Project [ss_sold_date_sk#16, ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#22) && isnotnull(ss_sold_date_sk#16)) - : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(1) Project [c_customer_sk#7] : : : : +- *(1) Filter isnotnull(c_customer_sk#7) - : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(2) Project [d_date_sk#5] : : : +- *(2) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#5,d_year#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#5,d_year#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct : : +- *(7) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) : : +- Exchange hashpartitioning(c_customer_sk#7, 5) : : +- *(6) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) @@ -66,15 +66,15 @@ CollectLimit 100 : : +- *(6) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight : : :- *(6) Project [ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] : : : +- *(6) Filter isnotnull(ss_customer_sk#22) - : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [c_customer_sk#7] : : +- *(5) Filter isnotnull(c_customer_sk#7) - : : +- *(5) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- *(5) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(8) Project [d_date_sk#5] : +- *(8) Filter ((((isnotnull(d_year#17) && isnotnull(d_moy#23)) && (d_year#17 = 2000)) && (d_moy#23 = 2)) && isnotnull(d_date_sk#5)) - : +- *(8) FileScan parquet default.date_dim[d_date_sk#5,d_year#17,d_moy#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(8) FileScan parquet default.date_dim[d_date_sk#5,d_year#17,d_moy#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct +- *(18) Project [CheckOverflow((promote_precision(cast(cast(ws_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#25 as decimal(12,2)))), DecimalType(18,2)) AS sales#26] +- *(18) BroadcastHashJoin [ws_sold_date_sk#27], [d_date_sk#5], Inner, BuildRight :- *(18) Project [ws_sold_date_sk#27, ws_quantity#24, ws_list_price#25] @@ -83,7 +83,7 @@ CollectLimit 100 : : +- *(18) BroadcastHashJoin [ws_item_sk#29], [item_sk#9], LeftSemi, BuildRight : : :- *(18) Project [ws_sold_date_sk#27, ws_item_sk#29, ws_bill_customer_sk#28, ws_quantity#24, ws_list_price#25] : : : +- *(18) Filter isnotnull(ws_sold_date_sk#27) - : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#27,ws_item_sk#29,ws_bill_customer_sk#28,ws_quantity#24,ws_list_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct 4) @@ -26,15 +26,15 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_item_sk#18] : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#19) && isnotnull(ss_item_sk#18)) - : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(1) Project [d_date_sk#7, d_date#17] : : : : : +- *(1) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) - : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#7,d_date#17,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#7,d_date#17,d_year#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(2) Project [i_item_sk#16, i_item_desc#14] : : : : +- *(2) Filter isnotnull(i_item_sk#16) - : : : : +- *(2) FileScan parquet default.item[i_item_sk#16,i_item_desc#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.item[i_item_sk#16,i_item_desc#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(7) Project [c_customer_sk#9 AS c_customer_sk#9#10] : : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3144 as decimal(32,6)))), DecimalType(38,8)))) @@ -51,15 +51,15 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ : : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] : : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#25) && isnotnull(ss_sold_date_sk#19)) - : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(1) Project [c_customer_sk#9] : : : : : +- *(1) Filter isnotnull(c_customer_sk#9) - : : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(2) Project [d_date_sk#7] : : : : +- *(2) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) - : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_year#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_year#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- *(7) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) : : : +- Exchange hashpartitioning(c_customer_sk#9, 5) : : : +- *(6) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) @@ -67,21 +67,21 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ : : : +- *(6) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight : : : :- *(6) Project [ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] : : : : +- *(6) Filter isnotnull(ss_customer_sk#25) - : : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(5) Project [c_customer_sk#9] : : : +- *(5) Filter isnotnull(c_customer_sk#9) - : : : +- *(5) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : +- *(5) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(11) BroadcastHashJoin [c_customer_sk#9], [c_customer_sk#9#10], LeftSemi, BuildRight : : :- *(11) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] : : : +- *(11) Filter isnotnull(c_customer_sk#9) - : : : +- *(11) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : +- *(11) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct : : +- ReusedExchange [c_customer_sk#9#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(12) Project [d_date_sk#7] : +- *(12) Filter ((((isnotnull(d_year#20) && isnotnull(d_moy#26)) && (d_year#20 = 2000)) && (d_moy#26 = 2)) && isnotnull(d_date_sk#7)) - : +- *(12) FileScan parquet default.date_dim[d_date_sk#7,d_year#20,d_moy#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(12) FileScan parquet default.date_dim[d_date_sk#7,d_year#20,d_moy#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct +- *(28) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 5) +- *(27) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) @@ -94,7 +94,7 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ : : : +- *(27) BroadcastHashJoin [ws_item_sk#32], [item_sk#12], LeftSemi, BuildRight : : : :- *(27) Project [ws_sold_date_sk#29, ws_item_sk#32, ws_bill_customer_sk#30, ws_quantity#27, ws_list_price#28] : : : : +- *(27) Filter (isnotnull(ws_bill_customer_sk#30) && isnotnull(ws_sold_date_sk#29)) - : : : : +- *(27) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_item_sk#32,ws_bill_customer_sk#30,ws_quantity#27,ws_list_price#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : : +- *(1) FileScan parquet default.store_returns[sr_item_sk#27,sr_ticket_number#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] : : : +- *(3) Filter isnotnull(i_item_sk#22) - : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- *(4) FileScan parquet default.customer[c_customer_sk#20,c_first_name#2,c_last_name#1,c_birth_country#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] : +- *(5) Filter isnotnull(ca_zip#18) - : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct + : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 5) +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) @@ -59,24 +59,24 @@ : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) - : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : +- *(1) FileScan parquet default.store_returns[sr_item_sk#27,sr_ticket_number#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = pale)) && isnotnull(i_item_sk#22)) - : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)], ReadSchema: struct + : +- *(4) FileScan parquet default.customer[c_customer_sk#20,c_first_name#2,c_last_name#1,c_birth_country#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] +- *(5) Filter isnotnull(ca_zip#18) - +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file + +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt index 35e443a34..dbaafd47b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt @@ -20,27 +20,27 @@ : : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight : : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] : : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) - : : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : : +- *(1) FileScan parquet default.store_returns[sr_item_sk#27,sr_ticket_number#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] : : : +- *(3) Filter isnotnull(i_item_sk#22) - : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- *(4) FileScan parquet default.customer[c_customer_sk#20,c_first_name#2,c_last_name#1,c_birth_country#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] : +- *(5) Filter isnotnull(ca_zip#18) - : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct + : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 5) +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) @@ -59,24 +59,24 @@ : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) - : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : +- *(1) FileScan parquet default.store_returns[sr_item_sk#27,sr_ticket_number#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = chiffon)) && isnotnull(i_item_sk#22)) - : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon), IsNotNull(i_item_sk)], ReadSchema: struct + : +- *(4) FileScan parquet default.customer[c_customer_sk#20,c_first_name#2,c_last_name#1,c_birth_country#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] +- *(5) Filter isnotnull(ca_zip#18) - +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file + +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt index c1f44f753..1a7d675da 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt @@ -19,32 +19,32 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_des : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#25 as bigint), cast(ss_item_sk#11 as bigint), cast(ss_ticket_number#26 as bigint)], [sr_customer_sk#21, sr_item_sk#22, sr_ticket_number#27], Inner, BuildRight : : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_customer_sk#25, ss_store_sk#13, ss_ticket_number#26, ss_net_profit#8] : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#26) && isnotnull(ss_item_sk#11)) && isnotnull(ss_customer_sk#25)) && isnotnull(ss_sold_date_sk#19)) && isnotnull(ss_store_sk#13)) - : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_net_profit#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#15,cs_bill_customer_sk#23,cs_item_sk#24,cs_net_profit#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [d_date_sk#20] : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 4)) && (d_year#29 = 2001)) && isnotnull(d_date_sk#20)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(4) Project [d_date_sk#18] : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 4)) && (d_moy#31 <= 10)) && (d_year#30 = 2001)) && isnotnull(d_date_sk#18)) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [d_date_sk#16] : : +- *(5) Filter (((((isnotnull(d_year#32) && isnotnull(d_moy#33)) && (d_moy#33 >= 4)) && (d_moy#33 <= 10)) && (d_year#32 = 2001)) && isnotnull(d_date_sk#16)) - : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32,d_moy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32,d_moy#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] : +- *(6) Filter isnotnull(s_store_sk#14) - : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] +- *(7) Filter isnotnull(i_item_sk#12) - +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt index c5c7da628..447971a14 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt @@ -13,20 +13,20 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[ : : : +- *(5) BroadcastHashJoin [cs_bill_cdemo_sk#16], [cd_demo_sk#17], Inner, BuildRight : : : :- *(5) Project [cs_sold_date_sk#14, cs_bill_cdemo_sk#16, cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] : : : : +- *(5) Filter (((isnotnull(cs_bill_cdemo_sk#16) && isnotnull(cs_sold_date_sk#14)) && isnotnull(cs_item_sk#12)) && isnotnull(cs_promo_sk#10)) - : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#14,cs_bill_cdemo_sk#16,cs_item_sk#12,cs_promo_sk#10,cs_quantity#6,cs_list_price#7,cs_sales_price#9,cs_coupon_amt#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_pro..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.customer_demographics[cd_demo_sk#17,cd_gender#18,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_g..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_date_sk#15] : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [i_item_sk#13, i_item_id#1] : +- *(3) Filter isnotnull(i_item_sk#13) - : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(4) Project [p_promo_sk#11] +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) - +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file + +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt index c166032cd..62dd35f34 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt @@ -14,20 +14,20 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,s_state#2 : : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#23], [cd_demo_sk#24], Inner, BuildRight : : : :- *(5) Project [ss_sold_date_sk#21, ss_item_sk#17, ss_cdemo_sk#23, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] : : : : +- *(5) Filter (((isnotnull(ss_cdemo_sk#23) && isnotnull(ss_sold_date_sk#21)) && isnotnull(ss_store_sk#19)) && isnotnull(ss_item_sk#17)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_item_sk#17,ss_cdemo_sk#23,ss_store_sk#19,ss_quantity#9,ss_list_price#10,ss_sales_price#12,ss_coupon_amt#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.customer_demographics[cd_demo_sk#24,cd_gender#25,cd_marital_status#26,cd_education_status#27] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_g..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_date_sk#22] : : +- *(2) Filter ((isnotnull(d_year#28) && (d_year#28 = 2002)) && isnotnull(d_date_sk#22)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#22,d_year#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#22,d_year#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [s_store_sk#20, s_state#16] : +- *(3) Filter ((isnotnull(s_state#16) && (s_state#16 = TN)) && isnotnull(s_store_sk#20)) - : +- *(3) FileScan parquet default.store[s_store_sk#20,s_state#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct + : +- *(3) FileScan parquet default.store[s_store_sk#20,s_state#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(4) Project [i_item_sk#18, i_item_id#15] +- *(4) Filter isnotnull(i_item_sk#18) - +- *(4) FileScan parquet default.item[i_item_sk#18,i_item_id#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(4) FileScan parquet default.item[i_item_sk#18,i_item_id#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt index b76609a84..698feb11f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt @@ -13,7 +13,7 @@ CollectLimit 100 : : : : : +- *(1) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) : : : : : +- *(1) Project [ss_list_price#1] : : : : : +- *(1) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 0)) && (ss_quantity#2 <= 5)) && ((((ss_list_price#1 >= 8.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 18.00)) || ((ss_coupon_amt#3 >= 459.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1459.00))) || ((ss_wholesale_cost#4 >= 57.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 77.00)))) - : : : : : +- *(1) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)], ReadSchema: struct= 6)) && (ss_quantity#2 <= 10)) && ((((ss_list_price#1 >= 90.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 100.00)) || ((ss_coupon_amt#3 >= 2323.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 3323.00))) || ((ss_wholesale_cost#4 >= 31.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 51.00)))) - : : : : +- *(4) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)], ReadSchema: struct= 11)) && (ss_quantity#2 <= 15)) && ((((ss_list_price#1 >= 142.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 152.00)) || ((ss_coupon_amt#3 >= 12214.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 13214.00))) || ((ss_wholesale_cost#4 >= 79.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 99.00)))) - : : : +- *(7) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)], ReadSchema: struct= 16)) && (ss_quantity#2 <= 20)) && ((((ss_list_price#1 >= 135.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 145.00)) || ((ss_coupon_amt#3 >= 6071.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 7071.00))) || ((ss_wholesale_cost#4 >= 38.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 58.00)))) - : : +- *(10) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct= 21)) && (ss_quantity#2 <= 25)) && ((((ss_list_price#1 >= 122.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 132.00)) || ((ss_coupon_amt#3 >= 836.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1836.00))) || ((ss_wholesale_cost#4 >= 17.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 37.00)))) - : +- *(13) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)], ReadSchema: struct= 26)) && (ss_quantity#2 <= 30)) && ((((ss_list_price#1 >= 154.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 164.00)) || ((ss_coupon_amt#3 >= 7326.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 8326.00))) || ((ss_wholesale_cost#4 >= 7.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 27.00)))) - +- *(16) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)], ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#15,cs_bill_customer_sk#23,cs_item_sk#24,cs_quantity#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [d_date_sk#20] : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 9)) && (d_year#29 = 1999)) && isnotnull(d_date_sk#20)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(4) Project [d_date_sk#18] : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 9)) && (d_moy#31 <= 12)) && (d_year#30 = 1999)) && isnotnull(d_date_sk#18)) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), Equ..., ReadSchema: struct + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), Equ..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [d_date_sk#16] : : +- *(5) Filter (d_year#32 IN (1999,2000,2001) && isnotnull(d_date_sk#16)) - : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] : +- *(6) Filter isnotnull(s_store_sk#14) - : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] +- *(7) Filter isnotnull(i_item_sk#12) - +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt index 55173cb25..7aaaa5fe6 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt @@ -9,12 +9,12 @@ TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,sum_agg#2 DES : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight : :- *(3) Project [d_date_sk#10, d_year#1] : : +- *(3) Filter ((isnotnull(d_moy#12) && (d_moy#12 = 11)) && isnotnull(d_date_sk#10)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] +- *(2) Filter ((isnotnull(i_manufact_id#13) && (i_manufact_id#13 = 128)) && isnotnull(i_item_sk#9)) - +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manufact_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manufact_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt index d96b8f5b0..039bb9953 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt @@ -16,15 +16,15 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salu : : : : +- *(3) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight : : : : :- *(3) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] : : : : : +- *(3) Filter ((isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) && isnotnull(wr_returning_customer_sk#21)) - : : : : : +- *(3) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer..., ReadSchema: struct + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#26,d_year#27] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(2) Project [ca_address_sk#15, ca_state#22] : : : +- *(2) Filter (isnotnull(ca_address_sk#15) && isnotnull(ca_state#22)) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) : : +- *(8) HashAggregate(keys=[ctr_state#18], functions=[avg(ctr_total_return#13)]) @@ -39,14 +39,14 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salu : : : +- *(6) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight : : : :- *(6) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] : : : : +- *(6) Filter (isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) - : : : : +- *(6) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk)], ReadSchema: struct \ No newline at end of file + +- *(10) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt index 991349307..7c12adfde 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt @@ -18,15 +18,15 @@ : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight : : : : : : :- *(3) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] : : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) - : : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : +- *(1) Project [d_date_sk#23, d_year#2, d_qoy#18] : : : : : : +- *(1) Filter ((((isnotnull(d_qoy#18) && isnotnull(d_year#2)) && (d_qoy#18 = 1)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#23)) - : : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#23,d_year#2,d_qoy#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#23,d_year#2,d_qoy#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(2) Project [ca_address_sk#21, ca_county#1] : : : : : +- *(2) Filter (isnotnull(ca_address_sk#21) && isnotnull(ca_county#1)) - : : : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#21,ca_county#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)], ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#21,ca_county#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : : : : +- *(7) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) : : : : +- Exchange hashpartitioning(ca_county#16, d_qoy#24, d_year#25, 5) @@ -37,11 +37,11 @@ : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#27], Inner, BuildRight : : : : : :- *(6) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] : : : : : : +- *(6) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) - : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(4) Project [d_date_sk#27, d_year#25, d_qoy#24] : : : : : +- *(4) Filter ((((isnotnull(d_qoy#24) && isnotnull(d_year#25)) && (d_qoy#24 = 2)) && (d_year#25 = 2000)) && isnotnull(d_date_sk#27)) - : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#27,d_year#25,d_qoy#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#27,d_year#25,d_qoy#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : : : : +- ReusedExchange [ca_address_sk#26, ca_county#16], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : : : +- *(11) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) @@ -53,11 +53,11 @@ : : : : +- *(10) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#31], Inner, BuildRight : : : : :- *(10) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] : : : : : +- *(10) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) - : : : : : +- *(10) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct + : : : : : +- *(10) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(8) Project [d_date_sk#31, d_year#29, d_qoy#28] : : : : +- *(8) Filter ((((isnotnull(d_qoy#28) && isnotnull(d_year#29)) && (d_qoy#28 = 3)) && (d_year#29 = 2000)) && isnotnull(d_date_sk#31)) - : : : : +- *(8) FileScan parquet default.date_dim[d_date_sk#31,d_year#29,d_qoy#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- *(8) FileScan parquet default.date_dim[d_date_sk#31,d_year#29,d_qoy#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- ReusedExchange [ca_address_sk#30, ca_county#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : : +- *(15) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) @@ -69,7 +69,7 @@ : : : +- *(14) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#38], Inner, BuildRight : : : :- *(14) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] : : : : +- *(14) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) - : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct : : : +- ReusedExchange [d_date_sk#38, d_year#33, d_qoy#32], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- ReusedExchange [ca_address_sk#36, ca_county#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) @@ -82,7 +82,7 @@ : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#42], Inner, BuildRight : : :- *(18) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) - : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#42, d_year#40, d_qoy#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [ca_address_sk#41, ca_county#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) @@ -95,6 +95,6 @@ : +- *(22) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#46], Inner, BuildRight : :- *(22) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] : : +- *(22) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) - : : +- *(22) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct + : : +- *(22) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#46, d_year#44, d_qoy#43], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- ReusedExchange [ca_address_sk#45, ca_county#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt index f677286fe..99470fde0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt @@ -8,11 +8,11 @@ CollectLimit 100 : : +- *(6) BroadcastHashJoin [cs_item_sk#5], [i_item_sk#4], Inner, BuildRight : : :- *(6) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] : : : +- *(6) Filter ((isnotnull(cs_item_sk#5) && isnotnull(cs_ext_discount_amt#7)) && isnotnull(cs_sold_date_sk#2)) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [i_item_sk#4] : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 977)) && isnotnull(i_item_sk#4)) - : : +- *(1) FileScan parquet default.item[i_item_sk#4,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.item[i_item_sk#4,i_manufact_id#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) : +- *(4) HashAggregate(keys=[cs_item_sk#5], functions=[avg(UnscaledValue(cs_ext_discount_amt#7))]) @@ -22,9 +22,9 @@ CollectLimit 100 : +- *(3) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight : :- *(3) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] : : +- *(3) Filter (isnotnull(cs_sold_date_sk#2) && isnotnull(cs_item_sk#5)) - : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [d_date_sk#3] : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#3)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#3,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(2) FileScan parquet default.date_dim[d_date_sk#3,d_date#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct +- ReusedExchange [d_date_sk#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt index 2989a8b23..63e591a7c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt @@ -15,24 +15,24 @@ TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [d_date_sk#10] : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 5)) && isnotnull(d_date_sk#10)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [ca_address_sk#8] : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) BroadcastHashJoin [i_manufact_id#2], [i_manufact_id#2#14], LeftSemi, BuildRight : :- *(4) Project [i_item_sk#6, i_manufact_id#2] : : +- *(4) Filter isnotnull(i_item_sk#6) - : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_manufact_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_manufact_id#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [i_manufact_id#2 AS i_manufact_id#2#14] : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Electronics)) - : +- *(3) FileScan parquet default.item[i_category#15,i_manufact_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)], ReadSchema: struct + : +- *(3) FileScan parquet default.item[i_category#15,i_manufact_id#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)], ReadSchema: struct :- *(12) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) : +- Exchange hashpartitioning(i_manufact_id#2, 5) : +- *(11) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) @@ -44,7 +44,7 @@ TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) @@ -59,7 +59,7 @@ TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) - : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt index 30008ef45..3dccb55ac 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt @@ -15,20 +15,20 @@ : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#5] : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [d_date_sk#14] : : : +- *(1) Filter (((((d_dom#15 >= 1) && (d_dom#15 <= 3)) || ((d_dom#15 >= 25) && (d_dom#15 <= 28))) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),Le..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),Le..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [s_store_sk#12] : : +- *(2) Filter ((isnotnull(s_county#17) && (s_county#17 = Williamson County)) && isnotnull(s_store_sk#12)) - : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [hd_demo_sk#10] : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.2)) && isnotnull(hd_demo_sk#10)) - : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(5) Project [c_customer_sk#8, c_salutation#3, c_first_name#2, c_last_name#1, c_preferred_cust_flag#4] +- *(5) Filter isnotnull(c_customer_sk#8) - +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#3,c_first_name#2,c_last_name#1,c_preferred_cust_flag#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#26,c_current_cdemo_sk#20,c_current_addr_sk#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(2) Project [ss_customer_sk#29] : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight : : : : :- *(2) Project [ss_sold_date_sk#30, ss_customer_sk#29] : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#30) - : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_customer_sk#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_customer_sk#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(1) Project [d_date_sk#31] : : : : +- *(1) Filter ((((isnotnull(d_year#32) && isnotnull(d_qoy#33)) && (d_year#32 = 2002)) && (d_qoy#33 < 4)) && isnotnull(d_date_sk#31)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#31,d_year#32,d_qoy#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#31,d_year#32,d_qoy#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(4) Project [ws_bill_customer_sk#28] : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#34], [d_date_sk#31], Inner, BuildRight : : : :- *(4) Project [ws_sold_date_sk#34, ws_bill_customer_sk#28] : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#34) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#34,ws_bill_customer_sk#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#34,ws_bill_customer_sk#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(6) Project [cs_ship_customer_sk#27] : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#35], [d_date_sk#31], Inner, BuildRight : : :- *(6) Project [cs_sold_date_sk#35, cs_ship_customer_sk#27] : : : +- *(6) Filter isnotnull(cs_sold_date_sk#35) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#35,cs_ship_customer_sk#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#35,cs_ship_customer_sk#27] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(7) Project [ca_address_sk#23, ca_state#1] : +- *(7) Filter isnotnull(ca_address_sk#23) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#23,ca_state#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#23,ca_state#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(8) Project [cd_demo_sk#21, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6] +- *(8) Filter isnotnull(cd_demo_sk#21) - +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_gender#2,cd_marital_status#3,cd_dep_count#19,cd_dep_employed_count#5,cd_dep_college_count#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_year#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [i_item_sk#19, i_class#15, i_category#14] : +- *(2) Filter isnotnull(i_item_sk#19) - : +- *(2) FileScan parquet default.item[i_item_sk#19,i_class#15,i_category#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- *(2) FileScan parquet default.item[i_item_sk#19,i_class#15,i_category#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [s_store_sk#17] +- *(3) Filter ((isnotnull(s_state#23) && (s_state#23 = TN)) && isnotnull(s_store_sk#17)) - +- *(3) FileScan parquet default.store[s_store_sk#17,s_state#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.store[s_store_sk#17,s_state#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt index 9ffa2c5ee..f0f1b6aeb 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt @@ -11,16 +11,16 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[ : : +- *(4) BroadcastHashJoin [i_item_sk#4], [inv_item_sk#8], Inner, BuildRight : : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] : : : +- *(4) Filter ((((isnotnull(i_current_price#3) && (i_current_price#3 >= 68.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 98.00)) && i_manufact_id#9 IN (677,940,694,808)) && isnotnull(i_item_sk#4)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), In(i_manufact_id, [677,94..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) - : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct + : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [d_date_sk#7] : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 10988)) && (d_date#11 <= 11048)) && isnotnull(d_date_sk#7)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), Is..., ReadSchema: struct + : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), Is..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [cs_item_sk#5] +- *(3) Filter isnotnull(cs_item_sk#5) - +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt index 8072eda0e..a49754fdf 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt @@ -18,15 +18,15 @@ CollectLimit 100 : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [d_date_sk#13, d_date#3] : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] : : +- *(2) Filter isnotnull(c_customer_sk#11) - : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 5) @@ -37,7 +37,7 @@ CollectLimit 100 : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) @@ -50,6 +50,6 @@ CollectLimit 100 : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) - : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt index 5455e1849..496999a76 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt @@ -15,19 +15,19 @@ : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [i_item_sk#2] : : : +- *(1) Filter isnotnull(i_item_sk#2) - : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] : : +- *(2) Filter isnotnull(w_warehouse_sk#1) - : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [d_date_sk#15, d_moy#3] : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) - : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) @@ -42,10 +42,10 @@ : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(7) Project [d_date_sk#21, d_moy#6] +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) - +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file + +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt index 3c845e77b..89e93bc62 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt @@ -15,19 +15,19 @@ : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [i_item_sk#2] : : : +- *(1) Filter isnotnull(i_item_sk#2) - : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] : : +- *(2) Filter isnotnull(w_warehouse_sk#1) - : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [d_date_sk#15, d_moy#3] : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) - : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) @@ -42,10 +42,10 @@ : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct + : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(7) Project [d_date_sk#21, d_moy#6] +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) - +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file + +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt index 16fa8a95c..098474466 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt @@ -20,15 +20,15 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight : : : : : : : :- *(3) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] : : : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#32,d_year#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct : : : : : :- LocalTableScan , [customer_id#35, year_total#36] : : : : : +- LocalTableScan , [customer_id#37, year_total#38] : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) @@ -42,12 +42,12 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : : : : : +- *(7) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight : : : : : : :- *(7) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] : : : : : : : +- *(7) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#32,d_year#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct : : : : :- LocalTableScan , [customer_id#35, customer_first_name#39, customer_last_name#40, customer_preferred_cust_flag#41, customer_birth_country#42, customer_login#43, customer_email_address#44, year_total#36] : : : : +- LocalTableScan , [customer_id#37, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#38] : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) @@ -63,11 +63,11 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : : : : +- *(11) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight : : : : : :- *(11) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] : : : : : : +- *(11) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : : +- *(11) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) @@ -82,7 +82,7 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : : : +- *(15) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight : : : : :- *(15) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] : : : : : +- *(15) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : +- *(15) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] @@ -100,11 +100,11 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : +- *(19) BroadcastHashJoin [c_customer_sk#33], [ws_bill_customer_sk#62], Inner, BuildRight : : :- *(19) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] : : : +- *(19) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : +- *(19) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.catalog_returns[cr_item_sk#16,cr_order_number#15,cr_refunded_cash#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [w_warehouse_sk#13, w_state#1] : : +- *(2) Filter isnotnull(w_warehouse_sk#13) - : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#13,w_state#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#13,w_state#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [i_item_sk#11, i_item_id#2] : +- *(3) Filter (((isnotnull(i_current_price#17) && (i_current_price#17 >= 0.99)) && (i_current_price#17 <= 1.49)) && isnotnull(i_item_sk#11)) - : +- *(3) FileScan parquet default.item[i_item_sk#11,i_item_id#2,i_current_price#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct + : +- *(3) FileScan parquet default.item[i_item_sk#11,i_item_id#2,i_current_price#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(4) Project [d_date_sk#9, d_date#5] +- *(4) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#9)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#9,d_date#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file + +- *(4) FileScan parquet default.date_dim[d_date_sk#9,d_date#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt index 31bc2e514..146160760 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt @@ -7,7 +7,7 @@ TakeOrderedAndProject(limit=100, orderBy=[i_product_name#1 ASC NULLS FIRST], out +- *(3) BroadcastHashJoin [i_manufact#2], [i_manufact#2#3], Inner, BuildRight :- *(3) Project [i_manufact#2, i_product_name#1] : +- *(3) Filter (((isnotnull(i_manufact_id#4) && (i_manufact_id#4 >= 738)) && (i_manufact_id#4 <= 778)) && isnotnull(i_manufact#2)) - : +- *(3) FileScan parquet default.item[i_manufact_id#4,i_manufact#2,i_product_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,7..., ReadSchema: struct + : +- *(3) FileScan parquet default.item[i_manufact_id#4,i_manufact#2,i_product_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,7..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) +- *(2) Project [i_manufact#2#3] +- *(2) Filter (if (isnull(alwaysTrue#5)) 0 else item_cnt#6 > 0) @@ -16,4 +16,4 @@ TakeOrderedAndProject(limit=100, orderBy=[i_product_name#1 ASC NULLS FIRST], out +- *(1) HashAggregate(keys=[i_manufact#2], functions=[partial_count(1)]) +- *(1) Project [i_manufact#2] +- *(1) Filter (((((i_category#7 = Women) && (((((i_color#8 = powder) || (i_color#8 = khaki)) && ((i_units#9 = Ounce) || (i_units#9 = Oz))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = brown) || (i_color#8 = honeydew)) && ((i_units#9 = Bunch) || (i_units#9 = Ton))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = floral) || (i_color#8 = deep)) && ((i_units#9 = N/A) || (i_units#9 = Dozen))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = light) || (i_color#8 = cornflower)) && ((i_units#9 = Box) || (i_units#9 = Pound))) && ((i_size#10 = medium) || (i_size#10 = extra large)))))) || (((i_category#7 = Women) && (((((i_color#8 = midnight) || (i_color#8 = snow)) && ((i_units#9 = Pallet) || (i_units#9 = Gross))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = cyan) || (i_color#8 = papaya)) && ((i_units#9 = Cup) || (i_units#9 = Dram))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = orange) || (i_color#8 = frosted)) && ((i_units#9 = Each) || (i_units#9 = Tbl))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = forest) || (i_color#8 = ghost)) && ((i_units#9 = Lb) || (i_units#9 = Bundle))) && ((i_size#10 = medium) || (i_size#10 = extra large))))))) && isnotnull(i_manufact#2)) - +- *(1) FileScan parquet default.item[i_category#7,i_manufact#2,i_size#10,i_color#8,i_units#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,powder),EqualTo(i_color,khaki)..., ReadSchema: struct \ No newline at end of file + +- *(1) FileScan parquet default.item[i_category#7,i_manufact#2,i_size#10,i_color#8,i_units#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,powder),EqualTo(i_color,khaki)..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt index 1e560817c..932860551 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt @@ -9,12 +9,12 @@ TakeOrderedAndProject(limit=100, orderBy=[sum(ss_ext_sales_price)#1 DESC NULLS L : +- *(3) BroadcastHashJoin [d_date_sk#8], [ss_sold_date_sk#9], Inner, BuildRight : :- *(3) Project [d_date_sk#8, d_year#2] : : +- *(3) Filter ((((isnotnull(d_moy#10) && isnotnull(d_year#2)) && (d_moy#10 = 11)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#8)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_year#2,d_moy#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_year#2,d_moy#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#6, ss_ext_sales_price#5] : +- *(1) Filter (isnotnull(ss_sold_date_sk#9) && isnotnull(ss_item_sk#6)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#6,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#6,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(2) Project [i_item_sk#7, i_category_id#3, i_category#4] +- *(2) Filter ((isnotnull(i_manager_id#11) && (i_manager_id#11 = 1)) && isnotnull(i_item_sk#7)) - +- *(2) FileScan parquet default.item[i_item_sk#7,i_category_id#3,i_category#4,i_manager_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(2) FileScan parquet default.item[i_item_sk#7,i_category_id#3,i_category#4,i_manager_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt index b68348e44..c5ec4b687 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt @@ -9,12 +9,12 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_store : +- *(3) BroadcastHashJoin [d_date_sk#14], [ss_sold_date_sk#15], Inner, BuildRight : :- *(3) Project [d_date_sk#14, d_day_name#10] : : +- *(3) Filter ((isnotnull(d_year#16) && (d_year#16 = 2000)) && isnotnull(d_date_sk#14)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_day_name#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_day_name#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [ss_sold_date_sk#15, ss_store_sk#12, ss_sales_price#11] : +- *(1) Filter (isnotnull(ss_sold_date_sk#15) && isnotnull(ss_store_sk#12)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#15,ss_store_sk#12,ss_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#15,ss_store_sk#12,ss_sales_price#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(2) Project [s_store_sk#13, s_store_id#2, s_store_name#1] +- *(2) Filter ((isnotnull(s_gmt_offset#17) && (s_gmt_offset#17 = -5.00)) && isnotnull(s_store_sk#13)) - +- *(2) FileScan parquet default.store[s_store_sk#13,s_store_id#2,s_store_name#1,s_gmt_offset#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file + +- *(2) FileScan parquet default.store[s_store_sk#13,s_store_id#2,s_store_name#1,s_gmt_offset#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt index 99c27e16a..1abd6c8f3 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt @@ -19,13 +19,13 @@ TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1, : : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) : : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] : : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct : : : +- *(2) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) : : : +- Exchange hashpartitioning(ss_item_sk#16, 5) : : : +- *(1) HashAggregate(keys=[ss_item_sk#16], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) : : : +- *(1) Project [ss_item_sk#16, ss_net_profit#12] : : : +- *(1) Filter (isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) - : : : +- *(1) FileScan parquet default.store_sales[ss_item_sk#16,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.store_sales[ss_item_sk#16,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : +- *(7) Project [item_sk#6, rnk#10] : : +- *(7) Filter ((isnotnull(rnk#10) && (rnk#10 < 11)) && isnotnull(item_sk#6)) @@ -40,11 +40,11 @@ TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1, : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) - : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct : : +- *(5) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) : : +- ReusedExchange [ss_item_sk#16, sum#19, count#20], Exchange hashpartitioning(ss_item_sk#16, 5) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(8) Project [i_item_sk#9, i_product_name#4] : +- *(8) Filter isnotnull(i_item_sk#9) - : +- *(8) FileScan parquet default.item[i_item_sk#9,i_product_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- *(8) FileScan parquet default.item[i_item_sk#9,i_product_name#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct +- ReusedExchange [i_item_sk#7, i_product_name#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt index ad97e7d1a..87aa5f5b6 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt @@ -16,24 +16,24 @@ TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST,ca_city#2 ASC : : : : +- *(6) BroadcastHashJoin [ws_bill_customer_sk#14], [c_customer_sk#15], Inner, BuildRight : : : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_bill_customer_sk#14, ws_sales_price#4] : : : : : +- *(6) Filter ((isnotnull(ws_bill_customer_sk#14) && isnotnull(ws_sold_date_sk#10)) && isnotnull(ws_item_sk#8)) - : : : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#10,ws_item_sk#8,ws_bill_customer_sk#14,ws_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#10,ws_item_sk#8,ws_bill_customer_sk#14,ws_sales_price#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(1) Project [c_customer_sk#15, c_current_addr_sk#12] : : : : +- *(1) Filter (isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) - : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#15,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#15,c_current_addr_sk#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(2) Project [ca_address_sk#13, ca_city#2, ca_zip#1] : : : +- *(2) Filter isnotnull(ca_address_sk#13) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#13,ca_city#2,ca_zip#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#13,ca_city#2,ca_zip#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [d_date_sk#11] : : +- *(3) Filter ((((isnotnull(d_qoy#16) && isnotnull(d_year#17)) && (d_qoy#16 = 2)) && (d_year#17 = 2001)) && isnotnull(d_date_sk#11)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#11,d_year#17,d_qoy#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#11,d_year#17,d_qoy#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) Project [i_item_sk#9, i_item_id#6] : +- *(4) Filter isnotnull(i_item_sk#9) - : +- *(4) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- *(4) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) +- *(5) Project [i_item_id#6 AS i_item_id#6#7] +- *(5) Filter i_item_sk#9 IN (2,3,5,7,11,13,17,19,23,29) - +- *(5) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_item_sk, [2,3,5,7,11,13,17,19,23,29])], ReadSchema: struct \ No newline at end of file + +- *(5) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [In(i_item_sk, [2,3,5,7,11,13,17,19,23,29])], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt index a5d4817de..05a0c9bb1 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt @@ -17,25 +17,25 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight : : : : : :- *(5) Project [ss_sold_date_sk#19, ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_store_sk#17, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#19) && isnotnull(ss_store_sk#17)) && isnotnull(ss_hdemo_sk#15)) && isnotnull(ss_addr_sk#12)) && isnotnull(ss_customer_sk#10)) - : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#10,ss_hdemo_sk#15,ss_addr_sk#12,ss_store_sk#17,ss_ticket_number#5,ss_coupon_amt#13,ss_net_profit#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct + : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#22,d_dow#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_dow, [6,0]), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(2) Project [s_store_sk#18] : : : : +- *(2) Filter (s_city#23 IN (Fairview,Midway) && isnotnull(s_store_sk#18)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#18,s_city#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.store[s_store_sk#18,s_city#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(3) Project [hd_demo_sk#16] : : : +- *(3) Filter (((hd_dep_count#24 = 4) || (hd_vehicle_count#25 = 3)) && isnotnull(hd_demo_sk#16)) - : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#16,hd_dep_count#24,hd_vehicle_count#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#16,hd_dep_count#24,hd_vehicle_count#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(4) Project [ca_address_sk#9, ca_city#3] : : +- *(4) Filter (isnotnull(ca_address_sk#9) && isnotnull(ca_city#3)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#9,ca_city#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#9,ca_city#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(6) Project [c_customer_sk#11, c_current_addr_sk#8, c_first_name#2, c_last_name#1] : +- *(6) Filter (isnotnull(c_customer_sk#11) && isnotnull(c_current_addr_sk#8)) - : +- *(6) FileScan parquet default.customer[c_customer_sk#11,c_current_addr_sk#8,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : +- *(6) FileScan parquet default.customer[c_customer_sk#11,c_current_addr_sk#8,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct +- ReusedExchange [ca_address_sk#9, ca_city#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt index e7dcf37e4..134ab3c96 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt @@ -22,19 +22,19 @@ TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast( : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) - : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct + : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1))) : +- *(14) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] : +- *(14) Filter isnotnull(rn#23) diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt index 735a739a8..a7bcb203c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt @@ -12,20 +12,20 @@ : : : +- *(5) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight : : : :- *(5) Project [ss_sold_date_sk#2, ss_cdemo_sk#8, ss_addr_sk#4, ss_store_sk#13, ss_quantity#1, ss_sales_price#12, ss_net_profit#7] : : : : +- *(5) Filter (((isnotnull(ss_store_sk#13) && isnotnull(ss_cdemo_sk#8)) && isnotnull(ss_addr_sk#4)) && isnotnull(ss_sold_date_sk#2)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#2,ss_cdemo_sk#8,ss_addr_sk#4,ss_store_sk#13,ss_quantity#1,ss_sales_price#12,ss_net_profit#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.store[s_store_sk#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] : : +- *(2) Filter isnotnull(cd_demo_sk#9) - : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#9,cd_marital_status#10,cd_education_status#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#9,cd_marital_status#10,cd_education_status#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [ca_address_sk#5, ca_state#6] : +- *(3) Filter ((isnotnull(ca_country#15) && (ca_country#15 = United States)) && isnotnull(ca_address_sk#5)) - : +- *(3) FileScan parquet default.customer_address[ca_address_sk#5,ca_state#6,ca_country#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- *(3) FileScan parquet default.customer_address[ca_address_sk#5,ca_state#6,ca_country#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(4) Project [d_date_sk#3] +- *(4) Filter ((isnotnull(d_year#16) && (d_year#16 = 2001)) && isnotnull(d_date_sk#3)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#3,d_year#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file + +- *(4) FileScan parquet default.date_dim[d_date_sk#3,d_year#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt deleted file mode 100644 index 8d10c1641..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt +++ /dev/null @@ -1,433 +0,0 @@ -== Physical Plan == -TakeOrderedAndProject (78) -+- * HashAggregate (77) - +- Exchange (76) - +- * HashAggregate (75) - +- Union (74) - :- * Project (27) - : +- * Filter (26) - : +- Window (25) - : +- * Sort (24) - : +- Window (23) - : +- * Sort (22) - : +- Exchange (21) - : +- * HashAggregate (20) - : +- Exchange (19) - : +- * HashAggregate (18) - : +- * Project (17) - : +- * BroadcastHashJoin Inner BuildRight (16) - : :- * Project (10) - : : +- * BroadcastHashJoin Inner BuildRight (9) - : : :- * Project (4) - : : : +- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.web_sales (1) - : : +- BroadcastExchange (8) - : : +- * Filter (7) - : : +- * ColumnarToRow (6) - : : +- Scan parquet default.web_returns (5) - : +- BroadcastExchange (15) - : +- * Project (14) - : +- * Filter (13) - : +- * ColumnarToRow (12) - : +- Scan parquet default.date_dim (11) - :- * Project (50) - : +- * Filter (49) - : +- Window (48) - : +- * Sort (47) - : +- Window (46) - : +- * Sort (45) - : +- Exchange (44) - : +- * HashAggregate (43) - : +- Exchange (42) - : +- * HashAggregate (41) - : +- * Project (40) - : +- * BroadcastHashJoin Inner BuildRight (39) - : :- * Project (37) - : : +- * BroadcastHashJoin Inner BuildRight (36) - : : :- * Project (31) - : : : +- * Filter (30) - : : : +- * ColumnarToRow (29) - : : : +- Scan parquet default.catalog_sales (28) - : : +- BroadcastExchange (35) - : : +- * Filter (34) - : : +- * ColumnarToRow (33) - : : +- Scan parquet default.catalog_returns (32) - : +- ReusedExchange (38) - +- * Project (73) - +- * Filter (72) - +- Window (71) - +- * Sort (70) - +- Window (69) - +- * Sort (68) - +- Exchange (67) - +- * HashAggregate (66) - +- Exchange (65) - +- * HashAggregate (64) - +- * Project (63) - +- * BroadcastHashJoin Inner BuildRight (62) - :- * Project (60) - : +- * BroadcastHashJoin Inner BuildRight (59) - : :- * Project (54) - : : +- * Filter (53) - : : +- * ColumnarToRow (52) - : : +- Scan parquet default.store_sales (51) - : +- BroadcastExchange (58) - : +- * Filter (57) - : +- * ColumnarToRow (56) - : +- Scan parquet default.store_returns (55) - +- ReusedExchange (61) - - -(1) Scan parquet default.web_sales -Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_sales] -PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_order_number), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] -ReadSchema: struct - -(2) ColumnarToRow [codegen id : 3] -Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] - -(3) Filter [codegen id : 3] -Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] -Condition : ((((((((isnotnull(ws_net_profit#6) AND isnotnull(ws_net_paid#5)) AND isnotnull(ws_quantity#4)) AND (ws_net_profit#6 > 1.00)) AND (ws_net_paid#5 > 0.00)) AND (ws_quantity#4 > 0)) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_item_sk#2)) AND isnotnull(ws_sold_date_sk#1)) - -(4) Project [codegen id : 3] -Output [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] -Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] - -(5) Scan parquet default.web_returns -Output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] -Batched: true -Location [not included in comparison]/{warehouse_dir}/web_returns] -PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] -ReadSchema: struct - -(6) ColumnarToRow [codegen id : 1] -Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] - -(7) Filter [codegen id : 1] -Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] -Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) - -(8) BroadcastExchange -Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#11] - -(9) BroadcastHashJoin [codegen id : 3] -Left keys [2]: [cast(ws_order_number#3 as bigint), cast(ws_item_sk#2 as bigint)] -Right keys [2]: [wr_order_number#8, wr_item_sk#7] -Join condition: None - -(10) Project [codegen id : 3] -Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] -Input [9]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] - -(11) Scan parquet default.date_dim -Output [3]: [d_date_sk#12, d_year#13, d_moy#14] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] -ReadSchema: struct - -(12) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#12, d_year#13, d_moy#14] - -(13) Filter [codegen id : 2] -Input [3]: [d_date_sk#12, d_year#13, d_moy#14] -Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) - -(14) Project [codegen id : 2] -Output [1]: [d_date_sk#12] -Input [3]: [d_date_sk#12, d_year#13, d_moy#14] - -(15) BroadcastExchange -Input [1]: [d_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] - -(16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_sold_date_sk#1] -Right keys [1]: [d_date_sk#12] -Join condition: None - -(17) Project [codegen id : 3] -Output [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] -Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] - -(18) HashAggregate [codegen id : 3] -Input [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] -Keys [1]: [ws_item_sk#2] -Functions [4]: [partial_sum(cast(coalesce(wr_return_quantity#9, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#4, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#16, sum#17, sum#18, isEmpty#19, sum#20, isEmpty#21] -Results [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] - -(19) Exchange -Input [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] -Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#28] - -(20) HashAggregate [codegen id : 4] -Input [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] -Keys [1]: [ws_item_sk#2] -Functions [4]: [sum(cast(coalesce(wr_return_quantity#9, 0) as bigint)), sum(cast(coalesce(ws_quantity#4, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(cast(coalesce(wr_return_quantity#9, 0) as bigint))#29, sum(cast(coalesce(ws_quantity#4, 0) as bigint))#30, sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#31, sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#32] -Results [3]: [ws_item_sk#2 AS item#33, CheckOverflow((promote_precision(cast(sum(cast(coalesce(wr_return_quantity#9, 0) as bigint))#29 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ws_quantity#4, 0) as bigint))#30 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#34, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#31 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#32 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#35] - -(21) Exchange -Input [3]: [item#33, return_ratio#34, currency_ratio#35] -Arguments: SinglePartition, true, [id=#36] - -(22) Sort [codegen id : 5] -Input [3]: [item#33, return_ratio#34, currency_ratio#35] -Arguments: [return_ratio#34 ASC NULLS FIRST], false, 0 - -(23) Window -Input [3]: [item#33, return_ratio#34, currency_ratio#35] -Arguments: [rank(return_ratio#34) windowspecdefinition(return_ratio#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#37], [return_ratio#34 ASC NULLS FIRST] - -(24) Sort [codegen id : 6] -Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] -Arguments: [currency_ratio#35 ASC NULLS FIRST], false, 0 - -(25) Window -Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] -Arguments: [rank(currency_ratio#35) windowspecdefinition(currency_ratio#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#38], [currency_ratio#35 ASC NULLS FIRST] - -(26) Filter [codegen id : 7] -Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] -Condition : ((return_rank#37 <= 10) OR (currency_rank#38 <= 10)) - -(27) Project [codegen id : 7] -Output [5]: [web AS channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] - -(28) Scan parquet default.catalog_sales -Output [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_sales] -PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] -ReadSchema: struct - -(29) ColumnarToRow [codegen id : 10] -Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] - -(30) Filter [codegen id : 10] -Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] -Condition : ((((((((isnotnull(cs_net_profit#45) AND isnotnull(cs_net_paid#44)) AND isnotnull(cs_quantity#43)) AND (cs_net_profit#45 > 1.00)) AND (cs_net_paid#44 > 0.00)) AND (cs_quantity#43 > 0)) AND isnotnull(cs_order_number#42)) AND isnotnull(cs_item_sk#41)) AND isnotnull(cs_sold_date_sk#40)) - -(31) Project [codegen id : 10] -Output [5]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44] -Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] - -(32) Scan parquet default.catalog_returns -Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] -Batched: true -Location [not included in comparison]/{warehouse_dir}/catalog_returns] -PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] -ReadSchema: struct - -(33) ColumnarToRow [codegen id : 8] -Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] - -(34) Filter [codegen id : 8] -Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] -Condition : (((isnotnull(cr_return_amount#49) AND (cr_return_amount#49 > 10000.00)) AND isnotnull(cr_order_number#47)) AND isnotnull(cr_item_sk#46)) - -(35) BroadcastExchange -Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#50] - -(36) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [cs_order_number#42, cs_item_sk#41] -Right keys [2]: [cr_order_number#47, cr_item_sk#46] -Join condition: None - -(37) Project [codegen id : 10] -Output [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] -Input [9]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] - -(38) ReusedExchange [Reuses operator id: 15] -Output [1]: [d_date_sk#12] - -(39) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#40] -Right keys [1]: [d_date_sk#12] -Join condition: None - -(40) Project [codegen id : 10] -Output [5]: [cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] -Input [7]: [cs_sold_date_sk#40, cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49, d_date_sk#12] - -(41) HashAggregate [codegen id : 10] -Input [5]: [cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] -Keys [1]: [cs_item_sk#41] -Functions [4]: [partial_sum(cast(coalesce(cr_return_quantity#48, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#43, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#51, sum#52, sum#53, isEmpty#54, sum#55, isEmpty#56] -Results [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] - -(42) Exchange -Input [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] -Arguments: hashpartitioning(cs_item_sk#41, 5), true, [id=#63] - -(43) HashAggregate [codegen id : 11] -Input [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] -Keys [1]: [cs_item_sk#41] -Functions [4]: [sum(cast(coalesce(cr_return_quantity#48, 0) as bigint)), sum(cast(coalesce(cs_quantity#43, 0) as bigint)), sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(cast(coalesce(cr_return_quantity#48, 0) as bigint))#64, sum(cast(coalesce(cs_quantity#43, 0) as bigint))#65, sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#66, sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))#67] -Results [3]: [cs_item_sk#41 AS item#68, CheckOverflow((promote_precision(cast(sum(cast(coalesce(cr_return_quantity#48, 0) as bigint))#64 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(cs_quantity#43, 0) as bigint))#65 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#69, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#66 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))#67 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#70] - -(44) Exchange -Input [3]: [item#68, return_ratio#69, currency_ratio#70] -Arguments: SinglePartition, true, [id=#71] - -(45) Sort [codegen id : 12] -Input [3]: [item#68, return_ratio#69, currency_ratio#70] -Arguments: [return_ratio#69 ASC NULLS FIRST], false, 0 - -(46) Window -Input [3]: [item#68, return_ratio#69, currency_ratio#70] -Arguments: [rank(return_ratio#69) windowspecdefinition(return_ratio#69 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#72], [return_ratio#69 ASC NULLS FIRST] - -(47) Sort [codegen id : 13] -Input [4]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72] -Arguments: [currency_ratio#70 ASC NULLS FIRST], false, 0 - -(48) Window -Input [4]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72] -Arguments: [rank(currency_ratio#70) windowspecdefinition(currency_ratio#70 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#73], [currency_ratio#70 ASC NULLS FIRST] - -(49) Filter [codegen id : 14] -Input [5]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72, currency_rank#73] -Condition : ((return_rank#72 <= 10) OR (currency_rank#73 <= 10)) - -(50) Project [codegen id : 14] -Output [5]: [catalog AS channel#74, item#68, return_ratio#69, return_rank#72, currency_rank#73] -Input [5]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72, currency_rank#73] - -(51) Scan parquet default.store_sales -Output [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] -ReadSchema: struct - -(52) ColumnarToRow [codegen id : 17] -Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] - -(53) Filter [codegen id : 17] -Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] -Condition : ((((((((isnotnull(ss_net_profit#80) AND isnotnull(ss_net_paid#79)) AND isnotnull(ss_quantity#78)) AND (ss_net_profit#80 > 1.00)) AND (ss_net_paid#79 > 0.00)) AND (ss_quantity#78 > 0)) AND isnotnull(ss_ticket_number#77)) AND isnotnull(ss_item_sk#76)) AND isnotnull(ss_sold_date_sk#75)) - -(54) Project [codegen id : 17] -Output [5]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79] -Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] - -(55) Scan parquet default.store_returns -Output [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_returns] -PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] -ReadSchema: struct - -(56) ColumnarToRow [codegen id : 15] -Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] - -(57) Filter [codegen id : 15] -Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] -Condition : (((isnotnull(sr_return_amt#84) AND (sr_return_amt#84 > 10000.00)) AND isnotnull(sr_ticket_number#82)) AND isnotnull(sr_item_sk#81)) - -(58) BroadcastExchange -Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#85] - -(59) BroadcastHashJoin [codegen id : 17] -Left keys [2]: [cast(ss_ticket_number#77 as bigint), cast(ss_item_sk#76 as bigint)] -Right keys [2]: [sr_ticket_number#82, sr_item_sk#81] -Join condition: None - -(60) Project [codegen id : 17] -Output [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] -Input [9]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] - -(61) ReusedExchange [Reuses operator id: 15] -Output [1]: [d_date_sk#12] - -(62) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ss_sold_date_sk#75] -Right keys [1]: [d_date_sk#12] -Join condition: None - -(63) Project [codegen id : 17] -Output [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] -Input [7]: [ss_sold_date_sk#75, ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84, d_date_sk#12] - -(64) HashAggregate [codegen id : 17] -Input [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] -Keys [1]: [ss_item_sk#76] -Functions [4]: [partial_sum(cast(coalesce(sr_return_quantity#83, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#78, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#86, sum#87, sum#88, isEmpty#89, sum#90, isEmpty#91] -Results [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] - -(65) Exchange -Input [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] -Arguments: hashpartitioning(ss_item_sk#76, 5), true, [id=#98] - -(66) HashAggregate [codegen id : 18] -Input [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] -Keys [1]: [ss_item_sk#76] -Functions [4]: [sum(cast(coalesce(sr_return_quantity#83, 0) as bigint)), sum(cast(coalesce(ss_quantity#78, 0) as bigint)), sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(cast(coalesce(sr_return_quantity#83, 0) as bigint))#99, sum(cast(coalesce(ss_quantity#78, 0) as bigint))#100, sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00))#101, sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#102] -Results [3]: [ss_item_sk#76 AS item#103, CheckOverflow((promote_precision(cast(sum(cast(coalesce(sr_return_quantity#83, 0) as bigint))#99 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ss_quantity#78, 0) as bigint))#100 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#104, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00))#101 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#102 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#105] - -(67) Exchange -Input [3]: [item#103, return_ratio#104, currency_ratio#105] -Arguments: SinglePartition, true, [id=#106] - -(68) Sort [codegen id : 19] -Input [3]: [item#103, return_ratio#104, currency_ratio#105] -Arguments: [return_ratio#104 ASC NULLS FIRST], false, 0 - -(69) Window -Input [3]: [item#103, return_ratio#104, currency_ratio#105] -Arguments: [rank(return_ratio#104) windowspecdefinition(return_ratio#104 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#107], [return_ratio#104 ASC NULLS FIRST] - -(70) Sort [codegen id : 20] -Input [4]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107] -Arguments: [currency_ratio#105 ASC NULLS FIRST], false, 0 - -(71) Window -Input [4]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107] -Arguments: [rank(currency_ratio#105) windowspecdefinition(currency_ratio#105 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#108], [currency_ratio#105 ASC NULLS FIRST] - -(72) Filter [codegen id : 21] -Input [5]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107, currency_rank#108] -Condition : ((return_rank#107 <= 10) OR (currency_rank#108 <= 10)) - -(73) Project [codegen id : 21] -Output [5]: [store AS channel#109, item#103, return_ratio#104, return_rank#107, currency_rank#108] -Input [5]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107, currency_rank#108] - -(74) Union - -(75) HashAggregate [codegen id : 22] -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] - -(76) Exchange -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Arguments: hashpartitioning(channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38, 5), true, [id=#110] - -(77) HashAggregate [codegen id : 23] -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Functions: [] -Aggregate Attributes: [] -Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] - -(78) TakeOrderedAndProject -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Arguments: 100, [channel#39 ASC NULLS FIRST, return_rank#37 ASC NULLS FIRST, currency_rank#38 ASC NULLS FIRST], [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] - diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt deleted file mode 100644 index c15f2394e..000000000 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt +++ /dev/null @@ -1,126 +0,0 @@ -TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio] - WholeStageCodegen (23) - HashAggregate [channel,item,return_ratio,return_rank,currency_rank] - InputAdapter - Exchange [channel,item,return_ratio,return_rank,currency_rank] #1 - WholeStageCodegen (22) - HashAggregate [channel,item,return_ratio,return_rank,currency_rank] - InputAdapter - Union - WholeStageCodegen (7) - Project [item,return_ratio,return_rank,currency_rank] - Filter [return_rank,currency_rank] - InputAdapter - Window [currency_ratio] - WholeStageCodegen (6) - Sort [currency_ratio] - InputAdapter - Window [return_ratio] - WholeStageCodegen (5) - Sort [return_ratio] - InputAdapter - Exchange #2 - WholeStageCodegen (4) - HashAggregate [ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [ws_item_sk] #3 - WholeStageCodegen (3) - HashAggregate [ws_item_sk,wr_return_quantity,ws_quantity,wr_return_amt,ws_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] - Project [ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] - BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] - Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_net_paid] - Filter [ws_net_profit,ws_net_paid,ws_quantity,ws_order_number,ws_item_sk,ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (1) - Filter [wr_return_amt,wr_order_number,wr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (2) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - WholeStageCodegen (14) - Project [item,return_ratio,return_rank,currency_rank] - Filter [return_rank,currency_rank] - InputAdapter - Window [currency_ratio] - WholeStageCodegen (13) - Sort [currency_ratio] - InputAdapter - Window [return_ratio] - WholeStageCodegen (12) - Sort [return_ratio] - InputAdapter - Exchange #6 - WholeStageCodegen (11) - HashAggregate [cs_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [cs_item_sk] #7 - WholeStageCodegen (10) - HashAggregate [cs_item_sk,cr_return_quantity,cs_quantity,cr_return_amount,cs_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] - Project [cs_item_sk,cs_quantity,cs_net_paid,cr_return_quantity,cr_return_amount] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_net_paid,cr_return_quantity,cr_return_amount] - BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] - Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_net_paid] - Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_order_number,cs_item_sk,cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen (8) - Filter [cr_return_amount,cr_order_number,cr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] - InputAdapter - ReusedExchange [d_date_sk] #5 - WholeStageCodegen (21) - Project [item,return_ratio,return_rank,currency_rank] - Filter [return_rank,currency_rank] - InputAdapter - Window [currency_ratio] - WholeStageCodegen (20) - Sort [currency_ratio] - InputAdapter - Window [return_ratio] - WholeStageCodegen (19) - Sort [return_ratio] - InputAdapter - Exchange #9 - WholeStageCodegen (18) - HashAggregate [ss_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] - InputAdapter - Exchange [ss_item_sk] #10 - WholeStageCodegen (17) - HashAggregate [ss_item_sk,sr_return_quantity,ss_quantity,sr_return_amt,ss_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] - Project [ss_item_sk,ss_quantity,ss_net_paid,sr_return_quantity,sr_return_amt] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_net_paid,sr_return_quantity,sr_return_amt] - BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] - Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid] - Filter [ss_net_profit,ss_net_paid,ss_quantity,ss_ticket_number,ss_item_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen (15) - Filter [sr_return_amt,sr_ticket_number,sr_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] - InputAdapter - ReusedExchange [d_date_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt index 0bad61dd2..73563859c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt @@ -15,18 +15,18 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : : :- Union : : : :- *(1) Project [cast(ss_store_sk#21 as bigint) AS store_sk#17, cast(ss_sold_date_sk#22 as bigint) AS date_sk#19, ss_ext_sales_price#23 AS sales_price#13, ss_net_profit#24 AS profit#15, 0.00 AS return_amt#14, 0.00 AS net_loss#16] : : : : +- *(1) Filter (isnotnull(cast(ss_sold_date_sk#22 as bigint)) && isnotnull(cast(ss_store_sk#21 as bigint))) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_store_sk#21,ss_ext_sales_price#23,ss_net_profit#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) Project [s_store_sk#18, s_store_id#12] : +- *(4) Filter isnotnull(s_store_sk#18) - : +- *(4) FileScan parquet default.store[s_store_sk#18,s_store_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- *(4) FileScan parquet default.store[s_store_sk#18,s_store_id#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct :- *(12) HashAggregate(keys=[cp_catalog_page_id#36], functions=[sum(UnscaledValue(sales_price#37)), sum(UnscaledValue(return_amt#38)), sum(UnscaledValue(profit#39)), sum(UnscaledValue(net_loss#40))]) : +- Exchange hashpartitioning(cp_catalog_page_id#36, 5) : +- *(11) HashAggregate(keys=[cp_catalog_page_id#36], functions=[partial_sum(UnscaledValue(sales_price#37)), partial_sum(UnscaledValue(return_amt#38)), partial_sum(UnscaledValue(profit#39)), partial_sum(UnscaledValue(net_loss#40))]) @@ -37,18 +37,18 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : : :- Union : : : :- *(7) Project [cs_catalog_page_sk#44 AS page_sk#41, cs_sold_date_sk#45 AS date_sk#43, cs_ext_sales_price#46 AS sales_price#37, cs_net_profit#47 AS profit#39, 0.00 AS return_amt#38, 0.00 AS net_loss#40] : : : : +- *(7) Filter (isnotnull(cs_sold_date_sk#45) && isnotnull(cs_catalog_page_sk#44)) - : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#45,cs_catalog_page_sk#44,cs_ext_sales_price#46,cs_net_profit#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) - : : +- *(9) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct + : : +- *(9) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(10) Project [cp_catalog_page_sk#42, cp_catalog_page_id#36] : +- *(10) Filter isnotnull(cp_catalog_page_sk#42) - : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#42,cp_catalog_page_id#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct + : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#42,cp_catalog_page_id#36] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct +- *(19) HashAggregate(keys=[web_site_id#58], functions=[sum(UnscaledValue(sales_price#59)), sum(UnscaledValue(return_amt#60)), sum(UnscaledValue(profit#61)), sum(UnscaledValue(net_loss#62))]) +- Exchange hashpartitioning(web_site_id#58, 5) +- *(18) HashAggregate(keys=[web_site_id#58], functions=[partial_sum(UnscaledValue(sales_price#59)), partial_sum(UnscaledValue(return_amt#60)), partial_sum(UnscaledValue(profit#61)), partial_sum(UnscaledValue(net_loss#62))]) @@ -59,18 +59,18 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : :- Union : : :- *(13) Project [ws_web_site_sk#66 AS wsr_web_site_sk#63, cast(ws_sold_date_sk#67 as bigint) AS date_sk#65, ws_ext_sales_price#68 AS sales_price#59, ws_net_profit#69 AS profit#61, 0.00 AS return_amt#60, 0.00 AS net_loss#62] : : : +- *(13) Filter (isnotnull(cast(ws_sold_date_sk#67 as bigint)) && isnotnull(ws_web_site_sk#66)) - : : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#67,ws_web_site_sk#66,ws_ext_sales_price#68,ws_net_profit#69] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_web_site_sk)], ReadSchema: struct + : : +- *(14) FileScan parquet default.web_sales[ws_item_sk#81,ws_web_site_sk#66,ws_order_number#82] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(17) Project [web_site_sk#64, web_site_id#58] +- *(17) Filter isnotnull(web_site_sk#64) - +- *(17) FileScan parquet default.web_site[web_site_sk#64,web_site_id#58] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file + +- *(17) FileScan parquet default.web_site[web_site_sk#64,web_site_id#58] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt index 6716b98d6..5b8bf2898 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt @@ -13,20 +13,20 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_compa : : : +- *(5) BroadcastHashJoin [cast(ss_ticket_number#22 as bigint), cast(ss_item_sk#23 as bigint), cast(ss_customer_sk#24 as bigint)], [sr_ticket_number#25, sr_item_sk#26, sr_customer_sk#27], Inner, BuildRight : : : :- *(5) Project [ss_sold_date_sk#17, ss_item_sk#23, ss_customer_sk#24, ss_store_sk#20, ss_ticket_number#22] : : : : +- *(5) Filter ((((isnotnull(ss_ticket_number#22) && isnotnull(ss_item_sk#23)) && isnotnull(ss_customer_sk#24)) && isnotnull(ss_store_sk#20)) && isnotnull(ss_sold_date_sk#17)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_item_sk#23,ss_customer_sk#24,ss_store_sk#20,ss_ticket_number#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_stor..., ReadSchema: struct + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_item_sk#23,ss_customer_sk#24,ss_store_sk#20,ss_ticket_number#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_stor..., ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[3, bigint, true], input[1, bigint, true], input[2, bigint, true])) : : : +- *(1) Project [sr_returned_date_sk#16, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#25] : : : +- *(1) Filter (((isnotnull(sr_ticket_number#25) && isnotnull(sr_customer_sk#27)) && isnotnull(sr_item_sk#26)) && isnotnull(sr_returned_date_sk#16)) - : : : +- *(1) FileScan parquet default.store_returns[sr_returned_date_sk#16,sr_item_sk#26,sr_customer_sk#27,sr_ticket_number#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_retu..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.store_returns[sr_returned_date_sk#16,sr_item_sk#26,sr_customer_sk#27,sr_ticket_number#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_retu..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [s_store_sk#21, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] : : +- *(2) Filter isnotnull(s_store_sk#21) - : : +- *(2) FileScan parquet default.store[s_store_sk#21,s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- *(3) FileScan parquet default.date_dim[d_date_sk#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(4) Project [d_date_sk#18] +- *(4) Filter ((((isnotnull(d_year#28) && isnotnull(d_moy#29)) && (d_year#28 = 2001)) && (d_moy#29 = 8)) && isnotnull(d_date_sk#18)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#28,d_moy#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file + +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#28,d_moy#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt index 548263929..38a4ec309 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt @@ -19,11 +19,11 @@ TakeOrderedAndProject(limit=100, orderBy=[item_sk#1 ASC NULLS FIRST,d_date#2 ASC : +- *(2) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight : :- *(2) Project [ws_sold_date_sk#16, ws_item_sk#14, ws_sales_price#15] : : +- *(2) Filter (isnotnull(ws_item_sk#14) && isnotnull(ws_sold_date_sk#16)) - : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_item_sk#14,ws_sales_price#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_item_sk#14,ws_sales_price#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [d_date_sk#17, d_date#9] : +- *(1) Filter (((isnotnull(d_month_seq#18) && (d_month_seq#18 >= 1200)) && (d_month_seq#18 <= 1211)) && isnotnull(d_date_sk#17)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#17,d_date#9,d_month_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : +- *(1) FileScan parquet default.date_dim[d_date_sk#17,d_date#9,d_month_seq#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct +- *(12) Sort [item_sk#8 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 +- Exchange hashpartitioning(item_sk#8, d_date#10, 5) +- *(11) Project [item_sk#8, d_date#10, cume_sales#12] @@ -37,5 +37,5 @@ TakeOrderedAndProject(limit=100, orderBy=[item_sk#1 ASC NULLS FIRST,d_date#2 ASC +- *(8) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight :- *(8) Project [ss_sold_date_sk#22, ss_item_sk#20, ss_sales_price#21] : +- *(8) Filter (isnotnull(ss_item_sk#20) && isnotnull(ss_sold_date_sk#22)) - : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_item_sk#20,ss_sales_price#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_item_sk#20,ss_sales_price#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct +- ReusedExchange [d_date_sk#23, d_date#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt index 7a9e19e6c..59da37fb3 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt @@ -9,12 +9,12 @@ TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,ext_price#2 D : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight : :- *(3) Project [d_date_sk#10, d_year#1] : : +- *(3) Filter ((((isnotnull(d_moy#12) && isnotnull(d_year#1)) && (d_moy#12 = 11)) && (d_year#1 = 2000)) && isnotnull(d_date_sk#10)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 1)) && isnotnull(i_item_sk#9)) - +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manager_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manager_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt index f31d6ec98..eaf2bf3ee 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt @@ -16,16 +16,16 @@ TakeOrderedAndProject(limit=100, orderBy=[avg_quarterly_sales#1 ASC NULLS FIRST, : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight : : :- *(4) Project [i_item_sk#11, i_manufact_id#3] : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,reference,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manufact_id#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference..., ReadSchema: struct + : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manufact_id#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) - : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [d_date_sk#10, d_qoy#5] : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_qoy#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct + : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_qoy#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [s_store_sk#8] +- *(3) Filter isnotnull(s_store_sk#8) - +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt index b7f76879c..91c20f804 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt @@ -26,34 +26,34 @@ TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customer : : : : : : :- Union : : : : : : : :- *(1) Project [cs_sold_date_sk#19 AS sold_date_sk#16, cs_bill_customer_sk#20 AS customer_sk#15, cs_item_sk#21 AS item_sk#17] : : : : : : : : +- *(1) Filter ((isnotnull(cs_item_sk#21) && isnotnull(cs_sold_date_sk#19)) && isnotnull(cs_bill_customer_sk#20)) - : : : : : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_customer_sk#20,cs_item_sk#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : : : : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_customer_sk#20,cs_item_sk#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct : : : : : : : +- *(2) Project [ws_sold_date_sk#22 AS sold_date_sk#23, ws_bill_customer_sk#24 AS customer_sk#25, ws_item_sk#26 AS item_sk#27] : : : : : : : +- *(2) Filter ((isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#22)) && isnotnull(ws_bill_customer_sk#24)) - : : : : : : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#22,ws_item_sk#26,ws_bill_customer_sk#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : : : : : : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#22,ws_item_sk#26,ws_bill_customer_sk#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : +- *(3) Project [i_item_sk#18] : : : : : : +- *(3) Filter ((((isnotnull(i_category#28) && isnotnull(i_class#29)) && (i_category#28 = Women)) && (i_class#29 = maternity)) && isnotnull(i_item_sk#18)) - : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#18,i_class#29,i_category#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity)..., ReadSchema: struct + : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#18,i_class#29,i_category#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity)..., ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(4) Project [d_date_sk#7] : : : : : +- *(4) Filter ((((isnotnull(d_moy#30) && isnotnull(d_year#31)) && (d_moy#30 = 12)) && (d_year#31 = 1998)) && isnotnull(d_date_sk#7)) - : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#7,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#7,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(5) Project [c_customer_sk#4, c_current_addr_sk#12] : : : : +- *(5) Filter (isnotnull(c_customer_sk#4) && isnotnull(c_current_addr_sk#12)) - : : : : +- *(5) FileScan parquet default.customer[c_customer_sk#4,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : : : +- *(5) FileScan parquet default.customer[c_customer_sk#4,c_current_addr_sk#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : : +- *(7) Project [ss_sold_date_sk#6, ss_customer_sk#14, ss_ext_sales_price#5] : : : +- *(7) Filter (isnotnull(ss_customer_sk#14) && isnotnull(ss_sold_date_sk#6)) - : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#6,ss_customer_sk#14,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#6,ss_customer_sk#14,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(8) Project [ca_address_sk#13, ca_county#8, ca_state#9] : : +- *(8) Filter ((isnotnull(ca_address_sk#13) && isnotnull(ca_state#9)) && isnotnull(ca_county#8)) - : : +- *(8) FileScan parquet default.customer_address[ca_address_sk#13,ca_county#8,ca_state#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state), IsNotNull(ca_county)], ReadSchema: struct + : : +- *(8) FileScan parquet default.customer_address[ca_address_sk#13,ca_county#8,ca_state#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state), IsNotNull(ca_county)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true])) : +- *(9) Project [s_county#10, s_state#11] : +- *(9) Filter (isnotnull(s_state#11) && isnotnull(s_county#10)) - : +- *(9) FileScan parquet default.store[s_county#10,s_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)], ReadSchema: struct + : +- *(9) FileScan parquet default.store[s_county#10,s_state#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(10) Project [d_date_sk#7] +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery9801)) && (d_month_seq#32 <= Subquery subquery9802)) && isnotnull(d_date_sk#7)) @@ -63,26 +63,26 @@ TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customer : : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] : : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - : : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct : +- Subquery subquery9802 : +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) : +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) : +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) : +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - +- *(10) FileScan parquet default.date_dim[d_date_sk#7,d_month_seq#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + +- *(10) FileScan parquet default.date_dim[d_date_sk#7,d_month_seq#32] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct :- Subquery subquery9801 : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct + : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct +- Subquery subquery9802 +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct \ No newline at end of file + +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt index 32402fb5f..c950727b3 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt @@ -9,12 +9,12 @@ TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand_id#2 : +- *(3) BroadcastHashJoin [d_date_sk#9], [ss_sold_date_sk#10], Inner, BuildRight : :- *(3) Project [d_date_sk#9] : : +- *(3) Filter ((((isnotnull(d_moy#11) && isnotnull(d_year#12)) && (d_moy#11 = 11)) && (d_year#12 = 1999)) && isnotnull(d_date_sk#9)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#9,d_year#12,d_moy#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#9,d_year#12,d_moy#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [ss_sold_date_sk#10, ss_item_sk#7, ss_ext_sales_price#6] : +- *(1) Filter (isnotnull(ss_sold_date_sk#10) && isnotnull(ss_item_sk#7)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#10,ss_item_sk#7,ss_ext_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#10,ss_item_sk#7,ss_ext_sales_price#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(2) Project [i_item_sk#8, i_brand_id#5, i_brand#4] +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 28)) && isnotnull(i_item_sk#8)) - +- *(2) FileScan parquet default.item[i_item_sk#8,i_brand_id#5,i_brand#4,i_manager_id#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(2) FileScan parquet default.item[i_item_sk#8,i_brand_id#5,i_brand#4,i_manager_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt index 8d1ef0a64..dd20ce6d4 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt @@ -15,24 +15,24 @@ TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [d_date_sk#10] : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 2001)) && (d_moy#12 = 2)) && isnotnull(d_date_sk#10)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [ca_address_sk#8] : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) BroadcastHashJoin [i_item_id#2], [i_item_id#2#14], LeftSemi, BuildRight : :- *(4) Project [i_item_sk#6, i_item_id#2] : : +- *(4) Filter isnotnull(i_item_sk#6) - : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : +- *(3) Project [i_item_id#2 AS i_item_id#2#14] : +- *(3) Filter i_color#15 IN (slate,blanched,burnished) - : +- *(3) FileScan parquet default.item[i_item_id#2,i_color#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_color, [slate,blanched,burnished])], ReadSchema: struct + : +- *(3) FileScan parquet default.item[i_item_id#2,i_color#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [In(i_color, [slate,blanched,burnished])], ReadSchema: struct :- *(12) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) : +- Exchange hashpartitioning(i_item_id#2, 5) : +- *(11) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) @@ -44,7 +44,7 @@ TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) @@ -59,7 +59,7 @@ TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) - : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt index 275c06c2d..8092d1687 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt @@ -22,19 +22,19 @@ TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast( : : : : +- *(4) BroadcastHashJoin [i_item_sk#27], [cs_item_sk#28], Inner, BuildRight : : : : :- *(4) Project [i_item_sk#27, i_brand#5, i_category#4] : : : : : +- *(4) Filter ((isnotnull(i_item_sk#27) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#27,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct + : : : : : +- *(4) FileScan parquet default.item[i_item_sk#27,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) : : : : +- *(1) Project [cs_sold_date_sk#25, cs_call_center_sk#23, cs_item_sk#28, cs_sales_price#22] : : : : +- *(1) Filter ((isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#25)) && isnotnull(cs_call_center_sk#23)) - : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_call_center_sk#23,cs_item_sk#28,cs_sales_price#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_call_center_sk#23,cs_item_sk#28,cs_sales_price#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(2) Project [d_date_sk#26, d_year#6, d_moy#7] : : : +- *(2) Filter ((((d_year#6 = 1999) || ((d_year#6 = 1998) && (d_moy#7 = 12))) || ((d_year#6 = 2000) && (d_moy#7 = 1))) && isnotnull(d_date_sk#26)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#26,d_year#6,d_moy#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#26,d_year#6,d_moy#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [cc_call_center_sk#24, cc_name#3] : : +- *(3) Filter (isnotnull(cc_call_center_sk#24) && isnotnull(cc_name#3)) - : : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#24,cc_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)], ReadSchema: struct + : : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#24,cc_name#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] + 1))) : +- *(14) Project [i_category#17, i_brand#18, cc_name#19, sum_sales#10, rn#20] : +- *(14) Filter isnotnull(rn#20) diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt index 81c7f9d64..9b8184978 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt @@ -14,29 +14,29 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : : : +- *(4) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#16], Inner, BuildRight : : : :- *(4) Project [ss_sold_date_sk#13, ss_item_sk#15, ss_ext_sales_price#12] : : : : +- *(4) Filter (isnotnull(ss_item_sk#15) && isnotnull(ss_sold_date_sk#13)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#15,ss_ext_sales_price#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#15,ss_ext_sales_price#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [i_item_sk#16, i_item_id#11] : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) - : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [d_date_sk#14] : : +- *(3) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight : : :- *(3) Project [d_date_sk#14, d_date#17] : : : +- *(3) Filter isnotnull(d_date_sk#14) - : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) : : +- *(2) Project [d_date#17 AS d_date#17#18] : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12128)) : : : +- Subquery subquery12128 : : : +- *(1) Project [d_week_seq#19] : : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct : : +- Subquery subquery12128 : : +- *(1) Project [d_week_seq#19] : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : +- *(9) Filter isnotnull(cs_item_rev#4) : +- *(9) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(cs_ext_sales_price#20))]) @@ -48,26 +48,26 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : : +- *(8) BroadcastHashJoin [cs_item_sk#22], [i_item_sk#16], Inner, BuildRight : : :- *(8) Project [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#20] : : : +- *(8) Filter (isnotnull(cs_item_sk#22) && isnotnull(cs_sold_date_sk#21)) - : : : +- *(8) FileScan parquet default.catalog_sales[cs_sold_date_sk#21,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- *(8) FileScan parquet default.catalog_sales[cs_sold_date_sk#21,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(7) Project [d_date_sk#14] : +- *(7) BroadcastHashJoin [d_date#17], [d_date#17#23], LeftSemi, BuildRight : :- *(7) Project [d_date_sk#14, d_date#17] : : +- *(7) Filter isnotnull(d_date_sk#14) - : : +- *(7) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(7) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) : +- *(6) Project [d_date#17 AS d_date#17#23] : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12132)) : : +- Subquery subquery12132 : : +- *(1) Project [d_week_seq#19] : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - : +- *(6) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : +- *(6) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct : +- Subquery subquery12132 : +- *(1) Project [d_week_seq#19] : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) +- *(14) Filter isnotnull(ws_item_rev#6) +- *(14) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ws_ext_sales_price#24))]) @@ -79,23 +79,23 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : +- *(13) BroadcastHashJoin [ws_item_sk#26], [i_item_sk#16], Inner, BuildRight : :- *(13) Project [ws_sold_date_sk#25, ws_item_sk#26, ws_ext_sales_price#24] : : +- *(13) Filter (isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#25)) - : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_item_sk#26,ws_ext_sales_price#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_item_sk#26,ws_ext_sales_price#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(12) Project [d_date_sk#14] +- *(12) BroadcastHashJoin [d_date#17], [d_date#17#27], LeftSemi, BuildRight :- *(12) Project [d_date_sk#14, d_date#17] : +- *(12) Filter isnotnull(d_date_sk#14) - : +- *(12) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(12) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) +- *(11) Project [d_date#17 AS d_date#17#27] +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12136)) : +- Subquery subquery12136 : +- *(1) Project [d_week_seq#19] : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - +- *(11) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct + : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct + +- *(11) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct +- Subquery subquery12136 +- *(1) Project [d_week_seq#19] +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct \ No newline at end of file + +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt index 9236b71dd..1f9cdf6b6 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt @@ -13,19 +13,19 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name1#1 ASC NULLS FIRST,s_stor : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#42], [d_date_sk#43], Inner, BuildRight : : : :- *(2) Project [ss_sold_date_sk#42, ss_store_sk#38, ss_sales_price#41] : : : : +- *(2) Filter (isnotnull(ss_sold_date_sk#42) && isnotnull(ss_store_sk#38)) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#42,ss_store_sk#38,ss_sales_price#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#42,ss_store_sk#38,ss_sales_price#41] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [d_date_sk#43, d_week_seq#28, d_day_name#40] : : : +- *(1) Filter (isnotnull(d_date_sk#43) && isnotnull(d_week_seq#28)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#43,d_week_seq#28,d_day_name#40] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#43,d_week_seq#28,d_day_name#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [s_store_sk#39, s_store_id#29, s_store_name#27] : : +- *(3) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) - : : +- *(3) FileScan parquet default.store[s_store_sk#39,s_store_id#29,s_store_name#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct + : : +- *(3) FileScan parquet default.store[s_store_sk#39,s_store_id#29,s_store_name#27] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) Project [d_week_seq#37] : +- *(4) Filter (((isnotnull(d_month_seq#44) && (d_month_seq#44 >= 1212)) && (d_month_seq#44 <= 1223)) && isnotnull(d_week_seq#37)) - : +- *(4) FileScan parquet default.date_dim[d_month_seq#44,d_week_seq#37] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223),..., ReadSchema: struct + : +- *(4) FileScan parquet default.date_dim[d_month_seq#44,d_week_seq#37] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223),..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52))) +- *(9) Project [d_week_seq#28 AS d_week_seq2#26, s_store_id#29 AS s_store_id2#25, sun_sales#30 AS sun_sales2#12, mon_sales#31 AS mon_sales2#14, tue_sales#32 AS tue_sales2#16, wed_sales#33 AS wed_sales2#18, thu_sales#34 AS thu_sales2#20, fri_sales#35 AS fri_sales2#22, sat_sales#36 AS sat_sales2#24] +- *(9) BroadcastHashJoin [d_week_seq#28], [d_week_seq#45], Inner, BuildRight @@ -36,8 +36,8 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name1#1 ASC NULLS FIRST,s_stor : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(7) Project [s_store_sk#39, s_store_id#29] : +- *(7) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) - : +- *(7) FileScan parquet default.store[s_store_sk#39,s_store_id#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct + : +- *(7) FileScan parquet default.store[s_store_sk#39,s_store_id#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(8) Project [d_week_seq#45] +- *(8) Filter (((isnotnull(d_month_seq#53) && (d_month_seq#53 >= 1224)) && (d_month_seq#53 <= 1235)) && isnotnull(d_week_seq#45)) - +- *(8) FileScan parquet default.date_dim[d_month_seq#53,d_week_seq#45] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235),..., ReadSchema: struct \ No newline at end of file + +- *(8) FileScan parquet default.date_dim[d_month_seq#53,d_week_seq#45] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt index eb06bf9b7..68c396079 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt @@ -15,15 +15,15 @@ TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state# : : : +- *(7) BroadcastHashJoin [ca_address_sk#11], [c_current_addr_sk#12], Inner, BuildRight : : : :- *(7) Project [ca_address_sk#11, ca_state#4] : : : : +- *(7) Filter isnotnull(ca_address_sk#11) - : : : : +- *(7) FileScan parquet default.customer_address[ca_address_sk#11,ca_state#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : : : +- *(7) FileScan parquet default.customer_address[ca_address_sk#11,ca_state#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : : +- *(1) Project [c_customer_sk#9, c_current_addr_sk#12] : : : +- *(1) Filter (isnotnull(c_current_addr_sk#12) && isnotnull(c_customer_sk#9)) - : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9,c_current_addr_sk#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) : : +- *(2) Project [ss_sold_date_sk#7, ss_item_sk#5, ss_customer_sk#10] : : +- *(2) Filter ((isnotnull(ss_customer_sk#10) && isnotnull(ss_sold_date_sk#7)) && isnotnull(ss_item_sk#5)) - : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#5,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#5,ss_customer_sk#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [d_date_sk#8] : +- *(3) Filter ((isnotnull(d_month_seq#13) && (d_month_seq#13 = Subquery subquery982)) && isnotnull(d_date_sk#8)) @@ -33,26 +33,26 @@ TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state# : : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) : : +- *(1) Project [d_month_seq#13] : : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) - : : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct - : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct + : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct : +- Subquery subquery982 : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) : +- Exchange hashpartitioning(d_month_seq#13, 5) : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) : +- *(1) Project [d_month_seq#13] : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) - : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct + : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(6) Project [i_item_sk#6] +- *(6) Filter (cast(i_current_price#16 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#17)), DecimalType(14,7))) +- *(6) BroadcastHashJoin [i_category#18], [i_category#18#19], LeftOuter, BuildRight :- *(6) Project [i_item_sk#6, i_current_price#16, i_category#18] : +- *(6) Filter (isnotnull(i_current_price#16) && isnotnull(i_item_sk#6)) - : +- *(6) FileScan parquet default.item[i_item_sk#6,i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)], ReadSchema: struct + : +- *(6) FileScan parquet default.item[i_item_sk#6,i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) +- *(5) HashAggregate(keys=[i_category#18], functions=[avg(UnscaledValue(i_current_price#16))]) +- Exchange hashpartitioning(i_category#18, 5) +- *(4) HashAggregate(keys=[i_category#18], functions=[partial_avg(UnscaledValue(i_current_price#16))]) +- *(4) Project [i_current_price#16, i_category#18] +- *(4) Filter isnotnull(i_category#18) - +- *(4) FileScan parquet default.item[i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category)], ReadSchema: struct \ No newline at end of file + +- *(4) FileScan parquet default.item[i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt index e4e212165..cdee71697 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt @@ -15,24 +15,24 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,total_sale : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [d_date_sk#10] : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 9)) && isnotnull(d_date_sk#10)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [ca_address_sk#8] : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) BroadcastHashJoin [i_item_id#1], [i_item_id#1#14], LeftSemi, BuildRight : :- *(4) Project [i_item_sk#6, i_item_id#1] : : +- *(4) Filter isnotnull(i_item_sk#6) - : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : +- *(3) Project [i_item_id#1 AS i_item_id#1#14] : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Music)) - : +- *(3) FileScan parquet default.item[i_item_id#1,i_category#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)], ReadSchema: struct + : +- *(3) FileScan parquet default.item[i_item_id#1,i_category#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)], ReadSchema: struct :- *(12) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) : +- Exchange hashpartitioning(i_item_id#1, 5) : +- *(11) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) @@ -44,7 +44,7 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,total_sale : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct + : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) @@ -59,7 +59,7 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,total_sale : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) - : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt index 49bd37330..7197bef72 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt @@ -19,31 +19,31 @@ TakeOrderedAndProject(limit=100, orderBy=[promotions#1 ASC NULLS FIRST,total#2 A : : : : : : +- *(7) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight : : : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_promo_sk#13, ss_ext_sales_price#4] : : : : : : : +- *(7) Filter ((((isnotnull(ss_store_sk#15) && isnotnull(ss_promo_sk#13)) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) - : : : : : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_promo_sk#13,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : : : +- *(1) FileScan parquet default.store[s_store_sk#16,s_gmt_offset#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(2) Project [p_promo_sk#14] : : : : : +- *(2) Filter ((((p_channel_dmail#18 = Y) || (p_channel_email#19 = Y)) || (p_channel_tv#20 = Y)) && isnotnull(p_promo_sk#14)) - : : : : : +- *(2) FileScan parquet default.promotion[p_promo_sk#14,p_channel_dmail#18,p_channel_email#19,p_channel_tv#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull..., ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.promotion[p_promo_sk#14,p_channel_dmail#18,p_channel_email#19,p_channel_tv#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull..., ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [d_date_sk#12] : : : : +- *(3) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#22)) && (d_year#21 = 1998)) && (d_moy#22 = 11)) && isnotnull(d_date_sk#12)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_year#21,d_moy#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_year#21,d_moy#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(4) Project [c_customer_sk#10, c_current_addr_sk#7] : : : +- *(4) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#7)) - : : : +- *(4) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : : : +- *(4) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [ca_address_sk#8] : : +- *(5) Filter ((isnotnull(ca_gmt_offset#23) && (ca_gmt_offset#23 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(6) Project [i_item_sk#6] : +- *(6) Filter ((isnotnull(i_category#24) && (i_category#24 = Jewelry)) && isnotnull(i_item_sk#6)) - : +- *(6) FileScan parquet default.item[i_item_sk#6,i_category#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)], ReadSchema: struct + : +- *(6) FileScan parquet default.item[i_item_sk#6,i_category#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)], ReadSchema: struct +- BroadcastExchange IdentityBroadcastMode +- *(15) HashAggregate(keys=[], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) +- Exchange SinglePartition @@ -60,7 +60,7 @@ TakeOrderedAndProject(limit=100, orderBy=[promotions#1 ASC NULLS FIRST,total#2 A : : : : +- *(14) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight : : : : :- *(14) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_ext_sales_price#4] : : : : : +- *(14) Filter (((isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) - : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.warehouse[w_warehouse_sk#19,w_warehouse_name#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) - : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [web_site_sk#15, web_name#3] : +- *(3) Filter isnotnull(web_site_sk#15) - : +- *(3) FileScan parquet default.web_site[web_site_sk#15,web_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct + : +- *(3) FileScan parquet default.web_site[web_site_sk#15,web_name#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(4) Project [d_date_sk#13] +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file + +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt index 98e27704a..7025e4c24 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt @@ -16,16 +16,16 @@ TakeOrderedAndProject(limit=100, orderBy=[i_manager_id#1 ASC NULLS FIRST,avg_mon : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight : : :- *(4) Project [i_item_sk#11, i_manager_id#1] : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,refernece,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manager_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,refernece..., ReadSchema: struct + : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manager_id#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,refernece..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) - : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [d_date_sk#10, d_moy#5] : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_moy#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct + : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_moy#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [s_store_sk#8] +- *(3) Filter isnotnull(s_store_sk#8) - +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt index 9319775c4..c91d21f8b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt @@ -42,11 +42,11 @@ : : : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight : : : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] : : : : : : : : : : : : : : : : : : +- *(20) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) - : : : : : : : : : : : : : : : : : : +- *(20) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct + : : : : : : : : : : : : : : : : : +- *(1) FileScan parquet default.store_returns[sr_item_sk#77,sr_ticket_number#78] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)], ReadSchema: struct : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : : : : : : : : : +- *(4) Project [cs_item_sk#75] : : : : : : : : : : : : : : : : +- *(4) Filter (isnotnull(sum(cs_ext_list_price#79)#80) && (cast(sum(cs_ext_list_price#79)#80 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))#84)), DecimalType(21,2)))) @@ -57,56 +57,56 @@ : : : : : : : : : : : : : : : : +- *(3) BroadcastHashJoin [cs_item_sk#75, cs_order_number#85], [cr_item_sk#86, cr_order_number#87], Inner, BuildRight : : : : : : : : : : : : : : : : :- *(3) Project [cs_item_sk#75, cs_order_number#85, cs_ext_list_price#79] : : : : : : : : : : : : : : : : : +- *(3) Filter (isnotnull(cs_item_sk#75) && isnotnull(cs_order_number#85)) - : : : : : : : : : : : : : : : : : +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#75,cs_order_number#85,cs_ext_list_price#79] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)], ReadSchema: struct + : : : : : : : : : : : : : : : : : +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#75,cs_order_number#85,cs_ext_list_price#79] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)], ReadSchema: struct : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) : : : : : : : : : : : : : : : : +- *(2) Project [cr_item_sk#86, cr_order_number#87, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] : : : : : : : : : : : : : : : : +- *(2) Filter (isnotnull(cr_item_sk#86) && isnotnull(cr_order_number#87)) - : : : : : : : : : : : : : : : : +- *(2) FileScan parquet default.catalog_returns[cr_item_sk#86,cr_order_number#87,cr_refunded_cash#81,cr_reversed_charge#82,cr_store_credit#83] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct + : : : : : : : : : : : : : : : +- *(5) FileScan parquet default.date_dim[d_date_sk#74,d_year#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : : : : : : : +- *(6) Project [s_store_sk#72, s_store_name#28, s_zip#29] : : : : : : : : : : : : : : +- *(6) Filter ((isnotnull(s_store_sk#72) && isnotnull(s_zip#29)) && isnotnull(s_store_name#28)) - : : : : : : : : : : : : : : +- *(6) FileScan parquet default.store[s_store_sk#72,s_store_name#28,s_zip#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip), IsNotNull(s_store_name)], ReadSchema: struct + : : : : : : : : : : : : : : +- *(6) FileScan parquet default.store[s_store_sk#72,s_store_name#28,s_zip#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip), IsNotNull(s_store_name)], ReadSchema: struct : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : : : : : : +- *(7) Project [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] : : : : : : : : : : : : : +- *(7) Filter (((((isnotnull(c_customer_sk#70) && isnotnull(c_first_sales_date_sk#67)) && isnotnull(c_first_shipto_date_sk#65)) && isnotnull(c_current_cdemo_sk#59)) && isnotnull(c_current_hdemo_sk#53)) && isnotnull(c_current_addr_sk#49)) - : : : : : : : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#70,c_current_cdemo_sk#59,c_current_hdemo_sk#53,c_current_addr_sk#49,c_first_shipto_date_sk#65,c_first_sales_date_sk#67] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), I..., ReadSchema: struct + : : : : : : : : : : : : +- *(8) FileScan parquet default.date_dim[d_date_sk#68,d_year#39] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct : : : : : : : : : : : +- ReusedExchange [d_date_sk#66, d_year#40], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : : : +- *(10) Project [cd_demo_sk#64, cd_marital_status#61] : : : : : : : : : : +- *(10) Filter (isnotnull(cd_demo_sk#64) && isnotnull(cd_marital_status#61)) - : : : : : : : : : : +- *(10) FileScan parquet default.customer_demographics[cd_demo_sk#64,cd_marital_status#61] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)], ReadSchema: struct + : : : : : : : : : : +- *(10) FileScan parquet default.customer_demographics[cd_demo_sk#64,cd_marital_status#61] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)], ReadSchema: struct : : : : : : : : : +- ReusedExchange [cd_demo_sk#60, cd_marital_status#62], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : +- *(12) Project [p_promo_sk#58] : : : : : : : : +- *(12) Filter isnotnull(p_promo_sk#58) - : : : : : : : : +- *(12) FileScan parquet default.promotion[p_promo_sk#58] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct + : : : : : : : : +- *(12) FileScan parquet default.promotion[p_promo_sk#58] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : +- *(13) Project [hd_demo_sk#56, hd_income_band_sk#47] : : : : : : : +- *(13) Filter (isnotnull(hd_demo_sk#56) && isnotnull(hd_income_band_sk#47)) - : : : : : : : +- *(13) FileScan parquet default.household_demographics[hd_demo_sk#56,hd_income_band_sk#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct + : : : : : : : +- *(13) FileScan parquet default.household_demographics[hd_demo_sk#56,hd_income_band_sk#47] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct : : : : : : +- ReusedExchange [hd_demo_sk#54, hd_income_band_sk#45], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(15) Project [ca_address_sk#52, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] : : : : : +- *(15) Filter isnotnull(ca_address_sk#52) - : : : : : +- *(15) FileScan parquet default.customer_address[ca_address_sk#52,ca_street_number#30,ca_street_name#31,ca_city#32,ca_zip#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : : : : : +- *(15) FileScan parquet default.customer_address[ca_address_sk#52,ca_street_number#30,ca_street_name#31,ca_city#32,ca_zip#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct : : : : +- ReusedExchange [ca_address_sk#50, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(17) Project [ib_income_band_sk#48] : : : +- *(17) Filter isnotnull(ib_income_band_sk#48) - : : : +- *(17) FileScan parquet default.income_band[ib_income_band_sk#48] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_income_band_sk)], ReadSchema: struct + : : : +- *(17) FileScan parquet default.income_band[ib_income_band_sk#48] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_income_band_sk)], ReadSchema: struct : : +- ReusedExchange [ib_income_band_sk#46], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(19) Project [i_item_sk#27, i_product_name#26] : +- *(19) Filter ((((((isnotnull(i_current_price#88) && i_color#89 IN (purple,burlywood,indian,spring,floral,medium)) && (i_current_price#88 >= 64.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 74.00)) && (cast(i_current_price#88 as decimal(12,2)) >= 65.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 79.00)) && isnotnull(i_item_sk#27)) - : +- *(19) FileScan parquet default.item[i_item_sk#27,i_current_price#88,i_color#89,i_product_name#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), Greater..., ReadSchema: struct + : +- *(19) FileScan parquet default.item[i_item_sk#27,i_current_price#88,i_color#89,i_product_name#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), Greater..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, string, true], input[2, string, true])) +- *(41) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 5) @@ -147,13 +147,13 @@ : : : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight : : : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] : : : : : : : : : : : : : : : : : +- *(40) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) - : : : : : : : : : : : : : : : : : +- *(40) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct + : : : : : : : : : : : : : : +- *(25) FileScan parquet default.date_dim[d_date_sk#74,d_year#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : : : : : : : : : : : : : +- ReusedExchange [s_store_sk#72, s_store_name#28, s_zip#29], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : : : : : +- ReusedExchange [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : : : : +- ReusedExchange [d_date_sk#68, d_year#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt index 90e6d0221..f29686b9f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt @@ -8,7 +8,7 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,i_item_ : : +- *(9) BroadcastHashJoin [s_store_sk#12], [ss_store_sk#7], Inner, BuildRight : : :- *(9) Project [s_store_sk#12, s_store_name#1] : : : +- *(9) Filter isnotnull(s_store_sk#12) - : : : +- *(9) FileScan parquet default.store[s_store_sk#12,s_store_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : : : +- *(9) FileScan parquet default.store[s_store_sk#12,s_store_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Filter isnotnull(revenue#3) : : +- *(3) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[sum(UnscaledValue(ss_sales_price#13))]) @@ -18,15 +18,15 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,i_item_ : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight : : :- *(2) Project [ss_sold_date_sk#14, ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] : : : +- *(2) Filter ((isnotnull(ss_sold_date_sk#14) && isnotnull(ss_store_sk#7)) && isnotnull(ss_item_sk#10)) - : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#14,ss_item_sk#10,ss_store_sk#7,ss_sales_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#14,ss_item_sk#10,ss_store_sk#7,ss_sales_price#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [d_date_sk#15] : : +- *(1) Filter (((isnotnull(d_month_seq#16) && (d_month_seq#16 >= 1176)) && (d_month_seq#16 <= 1187)) && isnotnull(d_date_sk#15)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#15,d_month_seq#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187),..., ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#15,d_month_seq#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187),..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) Project [i_item_sk#11, i_item_desc#2, i_current_price#4, i_wholesale_cost#5, i_brand#6] : +- *(4) Filter isnotnull(i_item_sk#11) - : +- *(4) FileScan parquet default.item[i_item_sk#11,i_item_desc#2,i_current_price#4,i_wholesale_cost#5,i_brand#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_item_sk#18,ss_store_sk#8,ss_sales_price#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +- ReusedExchange [d_date_sk#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt index fb220ece8..497af0999 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt @@ -17,23 +17,23 @@ TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST], o : : : : +- *(5) BroadcastHashJoin [ws_warehouse_sk#48], [w_warehouse_sk#49], Inner, BuildRight : : : : :- *(5) Project [ws_sold_date_sk#46, ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_warehouse_sk#48, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41] : : : : : +- *(5) Filter (((isnotnull(ws_warehouse_sk#48) && isnotnull(ws_sold_date_sk#46)) && isnotnull(ws_sold_time_sk#44)) && isnotnull(ws_ship_mode_sk#42)) - : : : : : +- *(5) FileScan parquet default.web_sales[ws_sold_date_sk#46,ws_sold_time_sk#44,ws_ship_mode_sk#42,ws_warehouse_sk#48,ws_quantity#40,ws_ext_sales_price#39,ws_net_paid#41] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws..., ReadSchema: struct + : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#47,d_year#37,d_moy#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [t_time_sk#45] : : +- *(3) Filter (((isnotnull(t_time#50) && (t_time#50 >= 30838)) && (t_time#50 <= 59638)) && isnotnull(t_time_sk#45)) - : : +- *(3) FileScan parquet default.time_dim[t_time_sk#45,t_time#50] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_..., ReadSchema: struct + : : +- *(3) FileScan parquet default.time_dim[t_time_sk#45,t_time#50] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) Project [sm_ship_mode_sk#43] : +- *(4) Filter (sm_carrier#51 IN (DHL,BARIAN) && isnotnull(sm_ship_mode_sk#43)) - : +- *(4) FileScan parquet default.ship_mode[sm_ship_mode_sk#43,sm_carrier#51] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + : +- *(4) FileScan parquet default.ship_mode[sm_ship_mode_sk#43,sm_carrier#51] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/ship_mode], PartitionFilters: [], PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)], ReadSchema: struct +- *(12) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 5) +- *(11) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) @@ -47,7 +47,7 @@ TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST], o : : : +- *(11) BroadcastHashJoin [cs_warehouse_sk#58], [w_warehouse_sk#49], Inner, BuildRight : : : :- *(11) Project [cs_sold_date_sk#57, cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_warehouse_sk#58, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54] : : : : +- *(11) Filter (((isnotnull(cs_warehouse_sk#58) && isnotnull(cs_sold_date_sk#57)) && isnotnull(cs_sold_time_sk#56)) && isnotnull(cs_ship_mode_sk#55)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#57,cs_sold_time_sk#56,cs_ship_mode_sk#55,cs_warehouse_sk#58,cs_quantity#53,cs_sales_price#52,cs_net_paid_inc_tax#54] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs..., ReadSchema: struct= 1200)) && (d_month_seq#33 <= 1211)) && isnotnull(d_date_sk#32)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#32,d_month_seq#33,d_year#18,d_moy#20,d_qoy#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#32,d_month_seq#33,d_year#18,d_moy#20,d_qoy#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [s_store_sk#30, s_store_id#26] : +- *(2) Filter isnotnull(s_store_sk#30) - : +- *(2) FileScan parquet default.store[s_store_sk#30,s_store_id#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- *(2) FileScan parquet default.store[s_store_sk#30,s_store_id#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [i_item_sk#28, i_brand#24, i_class#23, i_category#22, i_product_name#25] +- *(3) Filter isnotnull(i_item_sk#28) - +- *(3) FileScan parquet default.item[i_item_sk#28,i_brand#24,i_class#23,i_category#22,i_product_name#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.item[i_item_sk#28,i_brand#24,i_class#23,i_category#22,i_product_name#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt index 3d2fcd498..43c255c55 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt @@ -17,25 +17,25 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,ss_ticke : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight : : : : : :- *(5) Project [ss_sold_date_sk#21, ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_store_sk#19, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#21) && isnotnull(ss_store_sk#19)) && isnotnull(ss_hdemo_sk#17)) && isnotnull(ss_addr_sk#13)) && isnotnull(ss_customer_sk#11)) - : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_customer_sk#11,ss_hdemo_sk#17,ss_addr_sk#13,ss_store_sk#19,ss_ticket_number#2,ss_ext_sales_price#14,ss_ext_list_price#15,ss_ext_tax#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct= 1)) && (d_dom#23 <= 2)) && d_year#24 IN (1999,2000,2001)) && isnotnull(d_date_sk#22)) - : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#22,d_year#24,d_dom#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct + : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#22,d_year#24,d_dom#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(2) Project [s_store_sk#20] : : : : +- *(2) Filter (s_city#25 IN (Midway,Fairview) && isnotnull(s_store_sk#20)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#20,s_city#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.store[s_store_sk#20,s_city#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(3) Project [hd_demo_sk#18] : : : +- *(3) Filter (((hd_dep_count#26 = 4) || (hd_vehicle_count#27 = 3)) && isnotnull(hd_demo_sk#18)) - : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#18,hd_dep_count#26,hd_vehicle_count#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#18,hd_dep_count#26,hd_vehicle_count#27] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(4) Project [ca_address_sk#10, ca_city#4] : : +- *(4) Filter (isnotnull(ca_address_sk#10) && isnotnull(ca_city#4)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_city#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_city#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(6) Project [c_customer_sk#12, c_current_addr_sk#9, c_first_name#3, c_last_name#1] : +- *(6) Filter (isnotnull(c_customer_sk#12) && isnotnull(c_current_addr_sk#9)) - : +- *(6) FileScan parquet default.customer[c_customer_sk#12,c_current_addr_sk#9,c_first_name#3,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct + : +- *(6) FileScan parquet default.customer[c_customer_sk#12,c_current_addr_sk#9,c_first_name#3,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct +- ReusedExchange [ca_address_sk#10, ca_city#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt index ad4a155f0..9a777a055 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt @@ -13,36 +13,36 @@ TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital : : : :- *(9) BroadcastHashJoin [c_customer_sk#13], [ss_customer_sk#16], LeftSemi, BuildRight : : : : :- *(9) Project [c_customer_sk#13, c_current_cdemo_sk#9, c_current_addr_sk#11] : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#11) && isnotnull(c_current_cdemo_sk#9)) - : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#13,c_current_cdemo_sk#9,c_current_addr_sk#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct + : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#13,c_current_cdemo_sk#9,c_current_addr_sk#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(2) Project [ss_customer_sk#16] : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight : : : : :- *(2) Project [ss_sold_date_sk#17, ss_customer_sk#16] : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#17) - : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(1) Project [d_date_sk#18] : : : : +- *(1) Filter (((((isnotnull(d_year#19) && isnotnull(d_moy#20)) && (d_year#19 = 2001)) && (d_moy#20 >= 4)) && (d_moy#20 <= 6)) && isnotnull(d_date_sk#18)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_year#19,d_moy#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThan..., ReadSchema: struct + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_year#19,d_moy#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThan..., ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(4) Project [ws_bill_customer_sk#15] : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#18], Inner, BuildRight : : : :- *(4) Project [ws_sold_date_sk#21, ws_bill_customer_sk#15] : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#21) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(6) Project [cs_ship_customer_sk#14] : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#22], [d_date_sk#18], Inner, BuildRight : : :- *(6) Project [cs_sold_date_sk#22, cs_ship_customer_sk#14] : : : +- *(6) Filter isnotnull(cs_sold_date_sk#22) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#22,cs_ship_customer_sk#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#22,cs_ship_customer_sk#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(7) Project [ca_address_sk#12] : +- *(7) Filter (ca_state#23 IN (KY,GA,NM) && isnotnull(ca_address_sk#12)) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#12,ca_state#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [KY,GA,NM]), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#12,ca_state#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [KY,GA,NM]), IsNotNull(ca_address_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(8) Project [cd_demo_sk#10, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] +- *(8) Filter isnotnull(cd_demo_sk#10) - +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#10,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.customer_demographics[cd_demo_sk#17,cd_gender#18,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_g..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_date_sk#15] : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [i_item_sk#13, i_item_id#1] : +- *(3) Filter isnotnull(i_item_sk#13) - : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(4) Project [p_promo_sk#11] +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) - +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file + +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt index e0113e54d..f1b11f830 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt @@ -14,16 +14,16 @@ TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WH : +- *(8) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight : :- *(8) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] : : +- *(8) Filter (isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) - : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [d_date_sk#18] : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(7) BroadcastHashJoin [s_state#13], [s_state#20], LeftSemi, BuildRight :- *(7) Project [s_store_sk#16, s_county#14, s_state#13] : +- *(7) Filter isnotnull(s_store_sk#16) - : +- *(7) FileScan parquet default.store[s_store_sk#16,s_county#14,s_state#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- *(7) FileScan parquet default.store[s_store_sk#16,s_county#14,s_state#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) +- *(6) Project [s_state#20] +- *(6) Filter (isnotnull(ranking#21) && (ranking#21 <= 5)) @@ -38,9 +38,9 @@ TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WH : +- *(4) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight : :- *(4) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] : : +- *(4) Filter (isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#17)) - : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [s_store_sk#16, s_state#13] : +- *(2) Filter isnotnull(s_store_sk#16) - : +- *(2) FileScan parquet default.store[s_store_sk#16,s_state#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- *(2) FileScan parquet default.store[s_store_sk#16,s_state#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt index f11328994..ecda81c88 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt @@ -11,30 +11,30 @@ : :- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [i_item_sk#10, i_brand_id#4, i_brand#3] : : +- *(1) Filter ((isnotnull(i_manager_id#12) && (i_manager_id#12 = 1)) && isnotnull(i_item_sk#10)) - : : +- *(1) FileScan parquet default.item[i_item_sk#10,i_brand_id#4,i_brand#3,i_manager_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.item[i_item_sk#10,i_brand_id#4,i_brand#3,i_manager_id#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct : +- Union : :- *(3) Project [ws_ext_sales_price#13 AS ext_price#7, ws_item_sk#14 AS sold_item_sk#11, ws_sold_time_sk#15 AS time_sk#8] : : +- *(3) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight : : :- *(3) Project [ws_sold_date_sk#16, ws_sold_time_sk#15, ws_item_sk#14, ws_ext_sales_price#13] : : : +- *(3) Filter ((isnotnull(ws_sold_date_sk#16) && isnotnull(ws_item_sk#14)) && isnotnull(ws_sold_time_sk#15)) - : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_sold_time_sk#15,ws_item_sk#14,ws_ext_sales_price#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)], ReadSchema: struct + : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_sold_time_sk#15,ws_item_sk#14,ws_ext_sales_price#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_date_sk#17] : : +- *(2) Filter ((((isnotnull(d_moy#18) && isnotnull(d_year#19)) && (d_moy#18 = 11)) && (d_year#19 = 1999)) && isnotnull(d_date_sk#17)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#19,d_moy#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#19,d_moy#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct : :- *(5) Project [cs_ext_sales_price#20 AS ext_price#21, cs_item_sk#22 AS sold_item_sk#23, cs_sold_time_sk#24 AS time_sk#25] : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#17], Inner, BuildRight : : :- *(5) Project [cs_sold_date_sk#26, cs_sold_time_sk#24, cs_item_sk#22, cs_ext_sales_price#20] : : : +- *(5) Filter ((isnotnull(cs_sold_date_sk#26) && isnotnull(cs_item_sk#22)) && isnotnull(cs_sold_time_sk#24)) - : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_sold_time_sk#24,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)], ReadSchema: struct + : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_sold_time_sk#24,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(7) Project [ss_ext_sales_price#27 AS ext_price#28, ss_item_sk#29 AS sold_item_sk#30, ss_sold_time_sk#31 AS time_sk#32] : +- *(7) BroadcastHashJoin [ss_sold_date_sk#33], [d_date_sk#17], Inner, BuildRight : :- *(7) Project [ss_sold_date_sk#33, ss_sold_time_sk#31, ss_item_sk#29, ss_ext_sales_price#27] : : +- *(7) Filter ((isnotnull(ss_sold_date_sk#33) && isnotnull(ss_item_sk#29)) && isnotnull(ss_sold_time_sk#31)) - : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#33,ss_sold_time_sk#31,ss_item_sk#29,ss_ext_sales_price#27] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)], ReadSchema: struct + : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#33,ss_sold_time_sk#31,ss_item_sk#29,ss_ext_sales_price#27] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(8) Project [t_time_sk#9, t_hour#5, t_minute#6] +- *(8) Filter (((t_meal_time#34 = breakfast) || (t_meal_time#34 = dinner)) && isnotnull(t_time_sk#9)) - +- *(8) FileScan parquet default.time_dim[t_time_sk#9,t_hour#5,t_minute#6,t_meal_time#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [Or(EqualTo(t_meal_time,breakfast),EqualTo(t_meal_time,dinner)), IsNotNull(t_time_sk)], ReadSchema: struct \ No newline at end of file + +- *(8) FileScan parquet default.time_dim[t_time_sk#9,t_hour#5,t_minute#6,t_meal_time#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [Or(EqualTo(t_meal_time,breakfast),EqualTo(t_meal_time,dinner)), IsNotNull(t_time_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt index 40feaedbd..09f911c81 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt @@ -25,44 +25,44 @@ TakeOrderedAndProject(limit=100, orderBy=[total_cnt#1 DESC NULLS LAST,i_item_des : : : : : : : : : +- *(11) BroadcastHashJoin [cs_item_sk#7], [inv_item_sk#29], Inner, BuildRight, (inv_quantity_on_hand#30 < cs_quantity#31) : : : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, cs_quantity#31] : : : : : : : : : : +- *(11) Filter (((((isnotnull(cs_quantity#31) && isnotnull(cs_item_sk#7)) && isnotnull(cs_bill_cdemo_sk#24)) && isnotnull(cs_bill_hdemo_sk#22)) && isnotnull(cs_sold_date_sk#20)) && isnotnull(cs_ship_date_sk#13)) - : : : : : : : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#20,cs_ship_date_sk#13,cs_bill_cdemo_sk#24,cs_bill_hdemo_sk#22,cs_item_sk#7,cs_promo_sk#11,cs_order_number#8,cs_quantity#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hd..., ReadSchema: struct + : : : : : : : : : +- *(1) FileScan parquet default.inventory[inv_date_sk#17,inv_item_sk#29,inv_warehouse_sk#27,inv_quantity_on_hand#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(..., ReadSchema: struct : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : +- *(2) Project [w_warehouse_sk#28, w_warehouse_name#3] : : : : : : : : +- *(2) Filter isnotnull(w_warehouse_sk#28) - : : : : : : : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#28,w_warehouse_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct + : : : : : : : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#28,w_warehouse_name#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : +- *(3) Project [i_item_sk#26, i_item_desc#2] : : : : : : : +- *(3) Filter isnotnull(i_item_sk#26) - : : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#26,i_item_desc#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#26,i_item_desc#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : +- *(4) Project [cd_demo_sk#25] : : : : : : +- *(4) Filter ((isnotnull(cd_marital_status#32) && (cd_marital_status#32 = D)) && isnotnull(cd_demo_sk#25)) - : : : : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#32] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)], ReadSchema: struct + : : : : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#32] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(5) Project [hd_demo_sk#23] : : : : : +- *(5) Filter ((isnotnull(hd_buy_potential#33) && (hd_buy_potential#33 = >10000)) && isnotnull(hd_demo_sk#23)) - : : : : : +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#23,hd_buy_potential#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : : : : +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#23,hd_buy_potential#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(6) Project [d_date_sk#21, d_date#16, d_week_seq#4] : : : : +- *(6) Filter (((isnotnull(d_year#34) && (d_year#34 = 1999)) && isnotnull(d_date_sk#21)) && isnotnull(d_week_seq#4)) - : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#21,d_date#16,d_week_seq#4,d_year#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct + : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#21,d_date#16,d_week_seq#4,d_year#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) : : : +- *(7) Project [d_date_sk#19, d_week_seq#18] : : : +- *(7) Filter (isnotnull(d_week_seq#18) && isnotnull(d_date_sk#19)) - : : : +- *(7) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(7) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(8) Project [d_date_sk#14, d_date#15] : : +- *(8) Filter (isnotnull(d_date_sk#14) && isnotnull(d_date#15)) - : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_date)], ReadSchema: struct + : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_date)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(9) Project [p_promo_sk#12] : +- *(9) Filter isnotnull(p_promo_sk#12) - : +- *(9) FileScan parquet default.promotion[p_promo_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct + : +- *(9) FileScan parquet default.promotion[p_promo_sk#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) +- *(10) Project [cr_item_sk#9, cr_order_number#10] +- *(10) Filter (isnotnull(cr_item_sk#9) && isnotnull(cr_order_number#10)) - +- *(10) FileScan parquet default.catalog_returns[cr_item_sk#9,cr_order_number#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct \ No newline at end of file + +- *(10) FileScan parquet default.catalog_returns[cr_item_sk#9,cr_order_number#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt index 898ff3075..a444e7bf3 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt @@ -15,20 +15,20 @@ : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#6] : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [d_date_sk#14] : : : +- *(1) Filter ((((isnotnull(d_dom#15) && (d_dom#15 >= 1)) && (d_dom#15 <= 2)) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [s_store_sk#12] : : +- *(2) Filter (s_county#17 IN (Williamson County,Franklin Parish,Bronx County,Orange County) && isnotnull(s_store_sk#12)) - : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [In(s_county, [Williamson County,Franklin Parish,Bronx County,Orange County]), IsNotNull(s_store_..., ReadSchema: struct + : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [In(s_county, [Williamson County,Franklin Parish,Bronx County,Orange County]), IsNotNull(s_store_..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [hd_demo_sk#10] : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.0)) && isnotnull(hd_demo_sk#10)) - : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(5) Project [c_customer_sk#8, c_salutation#4, c_first_name#3, c_last_name#2, c_preferred_cust_flag#5] +- *(5) Filter isnotnull(c_customer_sk#8) - +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#4,c_first_name#3,c_last_name#2,c_preferred_cust_flag#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : : : : +- *(1) Project [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15] : : : : : +- *(1) Filter (isnotnull(ss_customer_sk#19) && isnotnull(ss_sold_date_sk#16)) - : : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#19,ss_net_paid#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#19,ss_net_paid#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(2) Project [d_date_sk#17, d_year#14] : : : : +- *(2) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2001)) && isnotnull(d_date_sk#17)) - : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- LocalTableScan , [customer_id#20, year_total#21] : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : : +- Union @@ -37,12 +37,12 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : : : +- *(7) BroadcastHashJoin [c_customer_sk#18], [ss_customer_sk#19], Inner, BuildRight : : : : :- *(7) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] : : : : : +- *(7) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) - : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct : : : : +- ReusedExchange [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(6) Project [d_date_sk#17, d_year#14] : : : +- *(6) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2002)) && isnotnull(d_date_sk#17)) - : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct : : +- LocalTableScan , [customer_id#20, customer_first_name#22, customer_last_name#23, year_total#21] : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : +- Union @@ -57,11 +57,11 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : : +- *(11) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight : : :- *(11) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] : : : +- *(11) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) - : : : +- *(11) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : : +- *(11) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : +- *(9) Project [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24] : : +- *(9) Filter (isnotnull(ws_bill_customer_sk#26) && isnotnull(ws_sold_date_sk#25)) - : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_bill_customer_sk#26,ws_net_paid#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_bill_customer_sk#26,ws_net_paid#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) +- Union @@ -75,6 +75,6 @@ TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer : +- *(15) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight : :- *(15) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] : : +- *(15) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) - : : +- *(15) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + : : +- *(15) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct : +- ReusedExchange [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt index 5f9929f2d..e230db432 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt @@ -21,19 +21,19 @@ TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], out : : : : : +- *(4) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#33], Inner, BuildRight : : : : : :- *(4) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] : : : : : : +- *(4) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) - : : : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#32,d_year#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) : : : +- *(3) Project [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26] : : : +- *(3) Filter (isnotnull(cr_item_sk#30) && isnotnull(cr_order_number#29)) - : : : +- *(3) FileScan parquet default.catalog_returns[cr_item_sk#30,cr_order_number#29,cr_return_quantity#24,cr_return_amount#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct + : : : +- *(3) FileScan parquet default.catalog_returns[cr_item_sk#30,cr_order_number#29,cr_return_quantity#24,cr_return_amount#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct : : +- *(8) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] : : +- *(8) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight : : :- *(8) Project [ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] @@ -42,13 +42,13 @@ TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], out : : : : +- *(8) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#33], Inner, BuildRight : : : : :- *(8) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] : : : : : +- *(8) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) - : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- *(7) FileScan parquet default.store_returns[sr_item_sk#44,sr_ticket_number#43,sr_return_quantity#36,sr_return_amt#39] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)], ReadSchema: struct : +- *(14) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ws_quantity#46 - coalesce(wr_return_quantity#47, 0)) AS sales_cnt#48, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#50, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#51] : +- *(14) BroadcastHashJoin [cast(ws_order_number#52 as bigint), cast(ws_item_sk#53 as bigint)], [wr_order_number#54, wr_item_sk#55], LeftOuter, BuildRight : :- *(14) Project [ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] @@ -57,13 +57,13 @@ TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], out : : : +- *(14) BroadcastHashJoin [ws_item_sk#53], [i_item_sk#33], Inner, BuildRight : : : :- *(14) Project [ws_sold_date_sk#56, ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49] : : : : +- *(14) Filter (isnotnull(ws_item_sk#53) && isnotnull(ws_sold_date_sk#56)) - : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#56,ws_item_sk#53,ws_order_number#52,ws_quantity#46,ws_ext_sales_price#49] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : +- *(13) FileScan parquet default.web_returns[wr_item_sk#55,wr_order_number#54,wr_return_quantity#47,wr_return_amt#50] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true])) +- *(33) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, 5) @@ -84,12 +84,12 @@ TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], out : : : : +- *(20) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#58], Inner, BuildRight : : : : :- *(20) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] : : : : : +- *(20) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) - : : : : : +- *(20) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : : +- *(18) FileScan parquet default.date_dim[d_date_sk#57,d_year#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct : : +- ReusedExchange [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26], BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) : +- *(24) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] : +- *(24) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight @@ -99,7 +99,7 @@ TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], out : : : +- *(24) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#58], Inner, BuildRight : : : :- *(24) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] : : : : +- *(24) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) - : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#13,ss_store_sk#9,ss_ext_sales_price#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [i_item_sk#14, i_category#5] : : +- *(1) Filter isnotnull(i_item_sk#14) - : : +- *(1) FileScan parquet default.item[i_item_sk#14,i_category#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.item[i_item_sk#14,i_category#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [d_date_sk#12, d_year#3, d_qoy#4] : +- *(2) Filter isnotnull(d_date_sk#12) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#12,d_year#3,d_qoy#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(2) FileScan parquet default.date_dim[d_date_sk#12,d_year#3,d_qoy#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct :- *(6) Project [web AS channel#15, ws_ship_customer_sk#16 AS col_name#17, d_year#3, d_qoy#4, i_category#5, ws_ext_sales_price#18 AS ext_sales_price#19] : +- *(6) BroadcastHashJoin [ws_sold_date_sk#20], [d_date_sk#12], Inner, BuildRight : :- *(6) Project [ws_sold_date_sk#20, ws_ship_customer_sk#16, ws_ext_sales_price#18, i_category#5] : : +- *(6) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#14], Inner, BuildRight : : :- *(6) Project [ws_sold_date_sk#20, ws_item_sk#21, ws_ship_customer_sk#16, ws_ext_sales_price#18] : : : +- *(6) Filter ((isnull(ws_ship_customer_sk#16) && isnotnull(ws_item_sk#21)) && isnotnull(ws_sold_date_sk#20)) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#20,ws_item_sk#21,ws_ship_customer_sk#16,ws_ext_sales_price#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#20,ws_item_sk#21,ws_ship_customer_sk#16,ws_ext_sales_price#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(9) Project [catalog AS channel#22, cs_ship_addr_sk#23 AS col_name#24, d_year#3, d_qoy#4, i_category#5, cs_ext_sales_price#25 AS ext_sales_price#26] @@ -34,6 +34,6 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,col_name#2 A : +- *(9) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#14], Inner, BuildRight : :- *(9) Project [cs_sold_date_sk#27, cs_ship_addr_sk#23, cs_item_sk#28, cs_ext_sales_price#25] : : +- *(9) Filter ((isnull(cs_ship_addr_sk#23) && isnotnull(cs_item_sk#28)) && isnotnull(cs_sold_date_sk#27)) - : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#27,cs_ship_addr_sk#23,cs_item_sk#28,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#27,cs_ship_addr_sk#23,cs_item_sk#28,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt index 0b9dd6982..1fa673f3a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt @@ -16,15 +16,15 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight : : : :- *(3) Project [ss_sold_date_sk#20, ss_store_sk#19, ss_ext_sales_price#17, ss_net_profit#18] : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#20) && isnotnull(ss_store_sk#19)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_store_sk#19,ss_ext_sales_price#17,ss_net_profit#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [s_store_sk#15] : : +- *(2) Filter isnotnull(s_store_sk#15) - : : +- *(2) FileScan parquet default.store[s_store_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.store[s_store_sk#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(7) HashAggregate(keys=[s_store_sk#16], functions=[sum(UnscaledValue(sr_return_amt#23)), sum(UnscaledValue(sr_net_loss#24))]) : +- Exchange hashpartitioning(s_store_sk#16, 5) @@ -35,15 +35,15 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : : +- *(6) BroadcastHashJoin [sr_returned_date_sk#26], [cast(d_date_sk#21 as bigint)], Inner, BuildRight : : :- *(6) Project [sr_returned_date_sk#26, sr_store_sk#25, sr_return_amt#23, sr_net_loss#24] : : : +- *(6) Filter (isnotnull(sr_returned_date_sk#26) && isnotnull(sr_store_sk#25)) - : : : +- *(6) FileScan parquet default.store_returns[sr_returned_date_sk#26,sr_store_sk#25,sr_return_amt#23,sr_net_loss#24] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) - : : +- *(4) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct + : : +- *(4) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(5) Project [s_store_sk#16] : +- *(5) Filter isnotnull(s_store_sk#16) - : +- *(5) FileScan parquet default.store[s_store_sk#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : +- *(5) FileScan parquet default.store[s_store_sk#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct :- *(15) Project [sales#27, returns#28, CheckOverflow((promote_precision(cast(profit#29 as decimal(18,2))) - promote_precision(cast(profit_loss#30 as decimal(18,2)))), DecimalType(18,2)) AS profit#31, catalog channel AS channel#32, cs_call_center_sk#33 AS id#34] : +- BroadcastNestedLoopJoin BuildLeft, Inner : :- BroadcastExchange IdentityBroadcastMode @@ -54,7 +54,7 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : : +- *(10) BroadcastHashJoin [cs_sold_date_sk#37], [d_date_sk#21], Inner, BuildRight : : :- *(10) Project [cs_sold_date_sk#37, cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] : : : +- *(10) Filter isnotnull(cs_sold_date_sk#37) - : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#37,cs_call_center_sk#33,cs_ext_sales_price#35,cs_net_profit#36] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : : +- *(13) FileScan parquet default.catalog_returns[cr_returned_date_sk#40,cr_return_amount#38,cr_net_loss#39] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#21], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(23) Project [sales#41, coalesce(returns#42, 0.00) AS returns#43, CheckOverflow((promote_precision(cast(profit#44 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#45, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#46, web channel AS channel#47, wp_web_page_sk#48 AS id#49] +- *(23) BroadcastHashJoin [wp_web_page_sk#48], [wp_web_page_sk#50], LeftOuter, BuildRight @@ -76,12 +76,12 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#54], [d_date_sk#21], Inner, BuildRight : : :- *(18) Project [ws_sold_date_sk#54, ws_web_page_sk#53, ws_ext_sales_price#51, ws_net_profit#52] : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#54) && isnotnull(ws_web_page_sk#53)) - : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#54,ws_web_page_sk#53,ws_ext_sales_price#51,ws_net_profit#52] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + : +- *(17) FileScan parquet default.web_page[wp_web_page_sk#48] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_web_page_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(22) HashAggregate(keys=[wp_web_page_sk#50], functions=[sum(UnscaledValue(wr_return_amt#55)), sum(UnscaledValue(wr_net_loss#56))]) +- Exchange hashpartitioning(wp_web_page_sk#50, 5) @@ -92,9 +92,9 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : +- *(21) BroadcastHashJoin [wr_returned_date_sk#58], [cast(d_date_sk#21 as bigint)], Inner, BuildRight : :- *(21) Project [wr_returned_date_sk#58, wr_web_page_sk#57, wr_return_amt#55, wr_net_loss#56] : : +- *(21) Filter (isnotnull(wr_returned_date_sk#58) && isnotnull(wr_web_page_sk#57)) - : : +- *(21) FileScan parquet default.web_returns[wr_returned_date_sk#58,wr_web_page_sk#57,wr_return_amt#55,wr_net_loss#56] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)], ReadSchema: struct \ No newline at end of file + +- *(20) FileScan parquet default.web_page[wp_web_page_sk#50] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_web_page_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt index 348b4f499..a10754cf1 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt @@ -14,15 +14,15 @@ TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC : : : +- *(3) BroadcastHashJoin [cast(ss_ticket_number#33 as bigint), cast(ss_item_sk#18 as bigint)], [sr_ticket_number#32, sr_item_sk#34], LeftOuter, BuildRight : : : :- *(3) Project [ss_sold_date_sk#30, ss_item_sk#18, ss_customer_sk#19, ss_ticket_number#33, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29] : : : : +- *(3) Filter ((isnotnull(ss_sold_date_sk#30) && isnotnull(ss_item_sk#18)) && isnotnull(ss_customer_sk#19)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_item_sk#18,ss_customer_sk#19,ss_ticket_number#33,ss_quantity#27,ss_wholesale_cost#28,ss_sales_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.store_returns[sr_item_sk#34,sr_ticket_number#32] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_date_sk#31, d_year#26] : : +- *(2) Filter ((isnotnull(d_year#26) && (d_year#26 = 2000)) && isnotnull(d_date_sk#31)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#31,d_year#26] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.date_dim[d_date_sk#31,d_year#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) : +- *(7) Filter (coalesce(ws_qty#8, 0) > 0) : +- *(7) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[sum(cast(ws_quantity#36 as bigint)), sum(UnscaledValue(ws_wholesale_cost#37)), sum(UnscaledValue(ws_sales_price#38))]) @@ -35,11 +35,11 @@ TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC : : +- *(6) BroadcastHashJoin [cast(ws_order_number#41 as bigint), cast(ws_item_sk#24 as bigint)], [wr_order_number#40, wr_item_sk#42], LeftOuter, BuildRight : : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_order_number#41, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] : : : +- *(6) Filter ((isnotnull(ws_sold_date_sk#39) && isnotnull(ws_item_sk#24)) && isnotnull(ws_bill_customer_sk#35)) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#39,ws_item_sk#24,ws_bill_customer_sk#35,ws_order_number#41,ws_quantity#36,ws_wholesale_cost#37,ws_sales_price#38] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : : +- *(4) FileScan parquet default.web_returns[wr_item_sk#42,wr_order_number#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) +- *(11) Filter (coalesce(cs_qty#9, 0) > 0) @@ -53,9 +53,9 @@ TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC : +- *(10) BroadcastHashJoin [cs_order_number#49, cs_item_sk#21], [cr_order_number#48, cr_item_sk#50], LeftOuter, BuildRight : :- *(10) Project [cs_sold_date_sk#47, cs_bill_customer_sk#43, cs_item_sk#21, cs_order_number#49, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46] : : +- *(10) Filter ((isnotnull(cs_sold_date_sk#47) && isnotnull(cs_item_sk#21)) && isnotnull(cs_bill_customer_sk#43)) - : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#47,cs_bill_customer_sk#43,cs_item_sk#21,cs_order_number#49,cs_quantity#44,cs_wholesale_cost#45,cs_sales_price#46] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#50,cr_order_number#48] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)], ReadSchema: struct +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt index 8e43f862c..716a1b741 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt @@ -13,20 +13,20 @@ TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_ : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight : : : :- *(4) Project [ss_sold_date_sk#17, ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_store_sk#15, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12] : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) && isnotnull(ss_hdemo_sk#13)) && isnotnull(ss_customer_sk#8)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#8,ss_hdemo_sk#13,ss_addr_sk#10,ss_store_sk#15,ss_ticket_number#6,ss_coupon_amt#11,ss_net_profit#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_year#20,d_dow#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [s_store_sk#16, s_city#3] : : +- *(2) Filter (((isnotnull(s_number_employees#21) && (s_number_employees#21 >= 200)) && (s_number_employees#21 <= 295)) && isnotnull(s_store_sk#16)) - : : +- *(2) FileScan parquet default.store[s_store_sk#16,s_number_employees#21,s_city#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_num..., ReadSchema: struct + : : +- *(2) FileScan parquet default.store[s_store_sk#16,s_number_employees#21,s_city#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_num..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [hd_demo_sk#14] : +- *(3) Filter (((hd_dep_count#22 = 6) || (hd_vehicle_count#23 > 2)) && isnotnull(hd_demo_sk#14)) - : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#14,hd_dep_count#22,hd_vehicle_count#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)], ReadSchema: struct + : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#14,hd_dep_count#22,hd_vehicle_count#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(5) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] +- *(5) Filter isnotnull(c_customer_sk#9) - +- *(5) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file + +- *(5) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt index 482275fd7..7fe2f14b3 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt @@ -11,15 +11,15 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST], outpu : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight : : :- *(8) Project [ss_sold_date_sk#8, ss_store_sk#6, ss_net_profit#3] : : : +- *(8) Filter (isnotnull(ss_sold_date_sk#8) && isnotnull(ss_store_sk#6)) - : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#8,ss_store_sk#6,ss_net_profit#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#8,ss_store_sk#6,ss_net_profit#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [d_date_sk#9] : : +- *(1) Filter ((((isnotnull(d_qoy#10) && isnotnull(d_year#11)) && (d_qoy#10 = 2)) && (d_year#11 = 1998)) && isnotnull(d_date_sk#9)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#9,d_year#11,d_qoy#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.date_dim[d_date_sk#9,d_year#11,d_qoy#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [s_store_sk#7, s_store_name#1, s_zip#4] : +- *(2) Filter (isnotnull(s_store_sk#7) && isnotnull(s_zip#4)) - : +- *(2) FileScan parquet default.store[s_store_sk#7,s_store_name#1,s_zip#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct + : +- *(2) FileScan parquet default.store[s_store_sk#7,s_store_name#1,s_zip#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(substring(input[0, string, true], 1, 2))) +- *(7) HashAggregate(keys=[ca_zip#5], functions=[]) +- Exchange hashpartitioning(ca_zip#5, 5) @@ -27,7 +27,7 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST], outpu +- *(6) BroadcastHashJoin [coalesce(ca_zip#5, )], [coalesce(ca_zip#12, )], LeftSemi, BuildRight, (ca_zip#5 <=> ca_zip#12) :- *(6) Project [substring(ca_zip#13, 1, 5) AS ca_zip#5] : +- *(6) Filter (substring(ca_zip#13, 1, 5) INSET (56910,69952,63792,39371,74351,11101,25003,97189,57834,73134,62377,51200,32754,22752,86379,14171,91110,40162,98569,28709,13394,66162,25733,25782,26065,18383,51949,87343,50298,83849,33786,64528,23470,67030,46136,25280,46820,77721,99076,18426,31880,17871,98235,45748,49156,18652,72013,51622,43848,78567,41248,13695,44165,67853,54917,53179,64034,10567,71791,68908,55565,59402,64147,85816,57855,61547,27700,68100,28810,58263,15723,83933,51103,58058,90578,82276,81096,81426,96451,77556,38607,76638,18906,62971,57047,48425,35576,11928,30625,83444,73520,51650,57647,60099,30122,94983,24128,10445,41368,26233,26859,21756,24676,19849,36420,38193,58470,39127,13595,87501,24317,15455,69399,98025,81019,48033,11376,39516,67875,92712,14867,38122,29741,42961,30469,51211,56458,15559,16021,33123,33282,33515,72823,54601,76698,56240,72175,60279,20004,68806,72325,28488,43933,50412,45200,22246,78668,79777,96765,67301,73273,49448,82636,23932,47305,29839,39192,18799,61265,37125,58943,64457,88424,24610,84935,89360,68893,30431,28898,10336,90257,59166,46081,26105,96888,36634,86284,35258,39972,22927,73241,53268,24206,27385,99543,31671,14663,30903,39861,24996,63089,88086,83921,21076,67897,66708,45721,60576,25103,52867,30450,36233,30010,96576,73171,56571,56575,64544,13955,78451,43285,18119,16725,83041,76107,79994,54364,35942,56691,19769,63435,34102,18845,22744,13354,75691,45549,23968,31387,83144,13375,15765,28577,88190,19736,73650,37930,25989,83926,94898,51798,39736,22437,55253,38415,71256,18376,42029,25858,44438,19515,38935,51649,71954,15882,18767,63193,25486,49130,37126,40604,34425,17043,12305,11634,26653,94167,36446,10516,67473,66864,72425,63981,18842,22461,42666,47770,69035,70372,28587,45266,15371,15798,45375,90225,16807,31016,68014,21337,19505,50016,10144,84093,21286,19430,34322,91068,94945,72305,24671,58048,65084,28545,21195,20548,22245,77191,96976,48583,76231,15734,61810,11356,68621,68786,98359,41367,26689,69913,76614,68101,88885,50308,79077,18270,28915,29178,53672,62878,10390,14922,68341,56529,41766,68309,56616,15126,61860,97789,11489,45692,41918,72151,72550,27156,36495,70738,17879,53535,17920,68880,78890,35850,14089,58078,65164,27068,26231,13376,57665,32213,77610,87816,21309,15146,86198,91137,55307,67467,40558,94627,82136,22351,89091,20260,23006,91393,47537,62496,98294,18840,71286,81312,31029,70466,35458,14060,22685,28286,25631,19512,40081,63837,14328,35474,22152,76232,51061,86057,17183) && isnotnull(substring(ca_zip#13, 1, 5))) - : +- *(6) FileScan parquet default.customer_address[ca_zip#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : +- *(6) FileScan parquet default.customer_address[ca_zip#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ))) +- *(5) Project [ca_zip#12] +- *(5) Filter (count(1)#14 > 10) @@ -38,8 +38,8 @@ TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST], outpu +- *(4) BroadcastHashJoin [ca_address_sk#15], [c_current_addr_sk#16], Inner, BuildRight :- *(4) Project [ca_address_sk#15, ca_zip#13] : +- *(4) Filter isnotnull(ca_address_sk#15) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#15,ca_zip#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#15,ca_zip#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [c_current_addr_sk#16] +- *(3) Filter ((isnotnull(c_preferred_cust_flag#17) && (c_preferred_cust_flag#17 = Y)) && isnotnull(c_current_addr_sk#16)) - +- *(3) FileScan parquet default.customer[c_current_addr_sk#16,c_preferred_cust_flag#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)], ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.customer[c_current_addr_sk#16,c_preferred_cust_flag#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt index 41613515c..f6c20a1a4 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt @@ -20,27 +20,27 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : : : : : +- *(6) BroadcastHashJoin [cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#25 as bigint)], [sr_item_sk#26, sr_ticket_number#27], LeftOuter, BuildRight : : : : : :- *(6) Project [ss_sold_date_sk#23, ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ticket_number#25, ss_ext_sales_price#13, ss_net_profit#15] : : : : : : +- *(6) Filter (((isnotnull(ss_sold_date_sk#23) && isnotnull(ss_store_sk#21)) && isnotnull(ss_item_sk#19)) && isnotnull(ss_promo_sk#17)) - : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#23,ss_item_sk#19,ss_store_sk#21,ss_promo_sk#17,ss_ticket_number#25,ss_ext_sales_price#13,ss_net_profit#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)], ReadSchema: struct= 11192)) && (d_date#28 <= 11222)) && isnotnull(d_date_sk#24)) - : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#24,d_date#28] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), Is..., ReadSchema: struct + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#24,d_date#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), Is..., ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(3) Project [s_store_sk#22, s_store_id#12] : : : +- *(3) Filter isnotnull(s_store_sk#22) - : : : +- *(3) FileScan parquet default.store[s_store_sk#22,s_store_id#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct + : : : +- *(3) FileScan parquet default.store[s_store_sk#22,s_store_id#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(4) Project [i_item_sk#20] : : +- *(4) Filter ((isnotnull(i_current_price#29) && (i_current_price#29 > 50.00)) && isnotnull(i_item_sk#20)) - : : +- *(4) FileScan parquet default.item[i_item_sk#20,i_current_price#29] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)], ReadSchema: struct + : : +- *(4) FileScan parquet default.item[i_item_sk#20,i_current_price#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(5) Project [p_promo_sk#18] : +- *(5) Filter ((isnotnull(p_channel_tv#30) && (p_channel_tv#30 = N)) && isnotnull(p_promo_sk#18)) - : +- *(5) FileScan parquet default.promotion[p_promo_sk#18,p_channel_tv#30] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)], ReadSchema: struct + : +- *(5) FileScan parquet default.promotion[p_promo_sk#18,p_channel_tv#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)], ReadSchema: struct :- *(14) HashAggregate(keys=[cp_catalog_page_id#31], functions=[sum(UnscaledValue(cs_ext_sales_price#32)), sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) : +- Exchange hashpartitioning(cp_catalog_page_id#31, 5) : +- *(13) HashAggregate(keys=[cp_catalog_page_id#31], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#32)), partial_sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) @@ -56,16 +56,16 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : : : : : +- *(13) BroadcastHashJoin [cs_item_sk#37, cs_order_number#41], [cr_item_sk#42, cr_order_number#43], LeftOuter, BuildRight : : : : : :- *(13) Project [cs_sold_date_sk#40, cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_order_number#41, cs_ext_sales_price#32, cs_net_profit#34] : : : : : : +- *(13) Filter (((isnotnull(cs_sold_date_sk#40) && isnotnull(cs_catalog_page_sk#38)) && isnotnull(cs_item_sk#37)) && isnotnull(cs_promo_sk#36)) - : : : : : : +- *(13) FileScan parquet default.catalog_sales[cs_sold_date_sk#40,cs_catalog_page_sk#38,cs_item_sk#37,cs_promo_sk#36,cs_order_number#41,cs_ext_sales_price#32,cs_net_profit#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_p..., ReadSchema: struct + : : : : : +- *(8) FileScan parquet default.catalog_returns[cr_item_sk#42,cr_order_number#43,cr_return_amount#33,cr_net_loss#35] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct : : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(10) Project [cp_catalog_page_sk#39, cp_catalog_page_id#31] : : : +- *(10) Filter isnotnull(cp_catalog_page_sk#39) - : : : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#39,cp_catalog_page_id#31] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct + : : : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#39,cp_catalog_page_id#31] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct : : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(21) HashAggregate(keys=[web_site_id#44], functions=[sum(UnscaledValue(ws_ext_sales_price#45)), sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) @@ -83,15 +83,15 @@ TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NUL : : : : +- *(20) BroadcastHashJoin [cast(ws_item_sk#50 as bigint), cast(ws_order_number#54 as bigint)], [wr_item_sk#55, wr_order_number#56], LeftOuter, BuildRight : : : : :- *(20) Project [ws_sold_date_sk#53, ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_order_number#54, ws_ext_sales_price#45, ws_net_profit#47] : : : : : +- *(20) Filter (((isnotnull(ws_sold_date_sk#53) && isnotnull(ws_web_site_sk#51)) && isnotnull(ws_item_sk#50)) && isnotnull(ws_promo_sk#49)) - : : : : : +- *(20) FileScan parquet default.web_sales[ws_sold_date_sk#53,ws_item_sk#50,ws_web_site_sk#51,ws_promo_sk#49,ws_order_number#54,ws_ext_sales_price#45,ws_net_profit#47] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo..., ReadSchema: struct + : : : : +- *(15) FileScan parquet default.web_returns[wr_item_sk#55,wr_order_number#56,wr_return_amt#46,wr_net_loss#48] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)], ReadSchema: struct : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(17) Project [web_site_sk#52, web_site_id#44] : : +- *(17) Filter isnotnull(web_site_sk#52) - : : +- *(17) FileScan parquet default.web_site[web_site_sk#52,web_site_id#44] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct + : : +- *(17) FileScan parquet default.web_site[web_site_sk#52,web_site_id#44] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt index 93900040c..9951d8388 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt @@ -16,15 +16,15 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salu : : : : +- *(3) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight : : : : :- *(3) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] : : : : : +- *(3) Filter ((isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) && isnotnull(cr_returning_customer_sk#24)) - : : : : : +- *(3) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer..., ReadSchema: struct + : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#28,d_year#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(2) Project [ca_address_sk#18, ca_state#11] : : : +- *(2) Filter (isnotnull(ca_address_sk#18) && isnotnull(ca_state#11)) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#18,ca_state#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#18,ca_state#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) : : +- *(8) HashAggregate(keys=[ctr_state#21], functions=[avg(ctr_total_return#16)]) @@ -39,14 +39,14 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salu : : : +- *(6) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight : : : :- *(6) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] : : : : +- *(6) Filter (isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) - : : : : +- *(6) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)], ReadSchema: struct= 62.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 92.00)) && i_manufact_id#9 IN (129,270,821,423)) && isnotnull(i_item_sk#4)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), In(i_manufact_id, [129,27..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) - : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct + : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [d_date_sk#7] : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 11102)) && (d_date#11 <= 11162)) && isnotnull(d_date_sk#7)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), Is..., ReadSchema: struct + : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), Is..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [ss_item_sk#5] +- *(3) Filter isnotnull(ss_item_sk#5) - +- *(3) FileScan parquet default.store_sales[ss_item_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.store_sales[ss_item_sk#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt index 8b674b50e..06ae0bc8a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt @@ -13,25 +13,25 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,sr_item_qty# : : : +- *(5) BroadcastHashJoin [sr_item_sk#15], [cast(i_item_sk#16 as bigint)], Inner, BuildRight : : : :- *(5) Project [sr_returned_date_sk#13, sr_item_sk#15, sr_return_quantity#12] : : : : +- *(5) Filter (isnotnull(sr_item_sk#15) && isnotnull(sr_returned_date_sk#13)) - : : : : +- *(5) FileScan parquet default.store_returns[sr_returned_date_sk#13,sr_item_sk#15,sr_return_quantity#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)], ReadSchema: struct + : : : : +- *(5) FileScan parquet default.store_returns[sr_returned_date_sk#13,sr_item_sk#15,sr_return_quantity#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [i_item_sk#16, i_item_id#11] : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) - : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(4) Project [d_date_sk#14] : : +- *(4) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight : : :- *(4) Project [d_date_sk#14, d_date#17] : : : +- *(4) Filter isnotnull(d_date_sk#14) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) : : +- *(3) Project [d_date#17 AS d_date#17#18] : : +- *(3) BroadcastHashJoin [d_week_seq#19], [d_week_seq#19#20], LeftSemi, BuildRight - : : :- *(3) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : :- *(3) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [d_week_seq#19 AS d_week_seq#19#20] : : +- *(2) Filter cast(d_date#17 as string) IN (2000-06-30,2000-09-27,2000-11-17) - : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) : +- *(11) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(cr_return_quantity#21 as bigint))]) : +- Exchange hashpartitioning(i_item_id#11, 5) @@ -42,17 +42,17 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,sr_item_qty# : : +- *(10) BroadcastHashJoin [cr_item_sk#23], [i_item_sk#16], Inner, BuildRight : : :- *(10) Project [cr_returned_date_sk#22, cr_item_sk#23, cr_return_quantity#21] : : : +- *(10) Filter (isnotnull(cr_item_sk#23) && isnotnull(cr_returned_date_sk#22)) - : : : +- *(10) FileScan parquet default.catalog_returns[cr_returned_date_sk#22,cr_item_sk#23,cr_return_quantity#21] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_returned_date_sk)], ReadSchema: struct + : : : +- *(10) FileScan parquet default.catalog_returns[cr_returned_date_sk#22,cr_item_sk#23,cr_return_quantity#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_returned_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(6) Project [i_item_sk#16, i_item_id#11] : : +- *(6) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) - : : +- *(6) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct + : : +- *(6) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(9) Project [d_date_sk#14] : +- *(9) BroadcastHashJoin [d_date#17], [d_date#17#24], LeftSemi, BuildRight : :- *(9) Project [d_date_sk#14, d_date#17] : : +- *(9) Filter isnotnull(d_date_sk#14) - : : +- *(9) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(9) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct : +- ReusedExchange [d_date#17#24], BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) +- *(17) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(wr_return_quantity#25 as bigint))]) @@ -64,6 +64,6 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,sr_item_qty# : +- *(16) BroadcastHashJoin [wr_item_sk#27], [cast(i_item_sk#16 as bigint)], Inner, BuildRight : :- *(16) Project [wr_returned_date_sk#26, wr_item_sk#27, wr_return_quantity#25] : : +- *(16) Filter (isnotnull(wr_item_sk#27) && isnotnull(wr_returned_date_sk#26)) - : : +- *(16) FileScan parquet default.web_returns[wr_returned_date_sk#26,wr_item_sk#27,wr_return_quantity#25] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_returned_date_sk)], ReadSchema: struct + : : +- *(16) FileScan parquet default.web_returns[wr_returned_date_sk#26,wr_item_sk#27,wr_return_quantity#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_returned_date_sk)], ReadSchema: struct : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- ReusedExchange [d_date_sk#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt index c0cde7a5d..062eebe04 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt @@ -12,24 +12,24 @@ TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], outp : : : : +- *(6) BroadcastHashJoin [c_current_addr_sk#13], [ca_address_sk#14], Inner, BuildRight : : : : :- *(6) Project [c_customer_id#1, c_current_cdemo_sk#12, c_current_hdemo_sk#10, c_current_addr_sk#13, c_first_name#5, c_last_name#4] : : : : : +- *(6) Filter ((isnotnull(c_current_addr_sk#13) && isnotnull(c_current_cdemo_sk#12)) && isnotnull(c_current_hdemo_sk#10)) - : : : : : +- *(6) FileScan parquet default.customer[c_customer_id#1,c_current_cdemo_sk#12,c_current_hdemo_sk#10,c_current_addr_sk#13,c_first_name#5,c_last_name#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.customer_address[ca_address_sk#14,ca_city#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_city), EqualTo(ca_city,Edgewood), IsNotNull(ca_address_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(2) Project [cd_demo_sk#6] : : : +- *(2) Filter isnotnull(cd_demo_sk#6) - : : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + : : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [hd_demo_sk#11, hd_income_band_sk#8] : : +- *(3) Filter (isnotnull(hd_demo_sk#11) && isnotnull(hd_income_band_sk#8)) - : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#11,hd_income_band_sk#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#11,hd_income_band_sk#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) Project [ib_income_band_sk#9] : +- *(4) Filter ((((isnotnull(ib_lower_bound#16) && isnotnull(ib_upper_bound#17)) && (ib_lower_bound#16 >= 38128)) && (ib_upper_bound#17 <= 88128)) && isnotnull(ib_income_band_sk#9)) - : +- *(4) FileScan parquet default.income_band[ib_income_band_sk#9,ib_lower_bound#16,ib_upper_bound#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), ..., ReadSchema: struct + : +- *(4) FileScan parquet default.income_band[ib_income_band_sk#9,ib_lower_bound#16,ib_upper_bound#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), ..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) +- *(5) Project [sr_cdemo_sk#7] +- *(5) Filter isnotnull(sr_cdemo_sk#7) - +- *(5) FileScan parquet default.store_returns[sr_cdemo_sk#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_cdemo_sk)], ReadSchema: struct \ No newline at end of file + +- *(5) FileScan parquet default.store_returns[sr_cdemo_sk#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_cdemo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt index 106b129ea..51e941c93 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt @@ -19,32 +19,32 @@ TakeOrderedAndProject(limit=100, orderBy=[substring(r_reason_desc, 1, 20)#1 ASC : : : : : : +- *(8) BroadcastHashJoin [cast(ws_item_sk#29 as bigint), cast(ws_order_number#30 as bigint)], [wr_item_sk#31, wr_order_number#32], Inner, BuildRight : : : : : : :- *(8) Project [ws_sold_date_sk#12, ws_item_sk#29, ws_web_page_sk#27, ws_order_number#30, ws_quantity#7, ws_sales_price#26, ws_net_profit#17] : : : : : : : +- *(8) Filter (((isnotnull(ws_item_sk#29) && isnotnull(ws_order_number#30)) && isnotnull(ws_web_page_sk#27)) && isnotnull(ws_sold_date_sk#12)) - : : : : : : : +- *(8) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#29,ws_web_page_sk#27,ws_order_number#30,ws_quantity#7,ws_sales_price#26,ws_net_profit#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_..., ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.web_page[wp_web_page_sk#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_web_page_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(3) Project [cd_demo_sk#25, cd_marital_status#19, cd_education_status#20] : : : : +- *(3) Filter ((isnotnull(cd_demo_sk#25) && isnotnull(cd_education_status#20)) && isnotnull(cd_marital_status#19)) - : : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: struct + : : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, string, true], input[2, string, true])) : : : +- *(4) Project [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] : : : +- *(4) Filter ((isnotnull(cd_marital_status#22) && isnotnull(cd_demo_sk#21)) && isnotnull(cd_education_status#23)) - : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk), IsNotNull(cd_education_status)], ReadSchema: struct + : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk), IsNotNull(cd_education_status)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [ca_address_sk#15, ca_state#16] : : +- *(5) Filter ((isnotnull(ca_country#33) && (ca_country#33 = United States)) && isnotnull(ca_address_sk#15)) - : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#33] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(6) Project [d_date_sk#13] : +- *(6) Filter ((isnotnull(d_year#34) && (d_year#34 = 2000)) && isnotnull(d_date_sk#13)) - : +- *(6) FileScan parquet default.date_dim[d_date_sk#13,d_year#34] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(6) FileScan parquet default.date_dim[d_date_sk#13,d_year#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(7) Project [r_reason_sk#11, r_reason_desc#6] +- *(7) Filter isnotnull(r_reason_sk#11) - +- *(7) FileScan parquet default.reason[r_reason_sk#11,r_reason_desc#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file + +- *(7) FileScan parquet default.reason[r_reason_sk#11,r_reason_desc#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt index 61285df23..a72090405 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt @@ -14,12 +14,12 @@ TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WH : +- *(3) BroadcastHashJoin [ws_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight : :- *(3) Project [ws_sold_date_sk#17, ws_item_sk#15, ws_net_paid#10] : : +- *(3) Filter (isnotnull(ws_sold_date_sk#17) && isnotnull(ws_item_sk#15)) - : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#17,ws_item_sk#15,ws_net_paid#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#17,ws_item_sk#15,ws_net_paid#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [d_date_sk#18] : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(2) Project [i_item_sk#16, i_class#14, i_category#13] +- *(2) Filter isnotnull(i_item_sk#16) - +- *(2) FileScan parquet default.item[i_item_sk#16,i_class#14,i_category#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file + +- *(2) FileScan parquet default.item[i_item_sk#16,i_class#14,i_category#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt index c47ee7616..e3689bd7b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt @@ -17,15 +17,15 @@ : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct + : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [d_date_sk#13, d_date#3] : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] : : +- *(2) Filter isnotnull(c_customer_sk#11) - : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 5) @@ -36,7 +36,7 @@ : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) @@ -49,6 +49,6 @@ : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) - : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct + : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt index d729f0b3f..7eea370f8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt @@ -17,19 +17,19 @@ BroadcastNestedLoopJoin BuildRight, Inner : : : : : : : : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight : : : : : : : : : :- *(4) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] : : : : : : : : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : : +- *(1) Project [hd_demo_sk#6] : : : : : : : : : +- *(1) Filter (((((hd_dep_count#7 = 4) && (hd_vehicle_count#8 <= 6)) || ((hd_dep_count#7 = 2) && (hd_vehicle_count#8 <= 4))) || ((hd_dep_count#7 = 0) && (hd_vehicle_count#8 <= 2))) && isnotnull(hd_demo_sk#6)) -: : : : : : : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#6,hd_dep_count#7,hd_vehicle_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,..., ReadSchema: struct +: : : : : : : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#6,hd_dep_count#7,hd_vehicle_count#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,..., ReadSchema: struct : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : +- *(2) Project [t_time_sk#4] : : : : : : : : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 8)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : : : : : : : +- *(2) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct +: : : : : : : : +- *(2) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : +- *(3) Project [s_store_sk#2] : : : : : : : +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#2)) -: : : : : : : +- *(3) FileScan parquet default.store[s_store_sk#2,s_store_name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct +: : : : : : : +- *(3) FileScan parquet default.store[s_store_sk#2,s_store_name#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct : : : : : : +- BroadcastExchange IdentityBroadcastMode : : : : : : +- *(10) HashAggregate(keys=[], functions=[count(1)]) : : : : : : +- Exchange SinglePartition @@ -42,12 +42,12 @@ BroadcastNestedLoopJoin BuildRight, Inner : : : : : : : : +- *(9) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight : : : : : : : : :- *(9) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] : : : : : : : : : +- *(9) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : : : +- *(9) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : : : +- *(9) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : +- *(7) Project [t_time_sk#4] : : : : : : : +- *(7) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) -: : : : : : : +- *(7) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_ti..., ReadSchema: struct +: : : : : : : +- *(7) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_ti..., ReadSchema: struct : : : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- BroadcastExchange IdentityBroadcastMode : : : : : +- *(15) HashAggregate(keys=[], functions=[count(1)]) @@ -61,12 +61,12 @@ BroadcastNestedLoopJoin BuildRight, Inner : : : : : : : +- *(14) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight : : : : : : : :- *(14) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] : : : : : : : : +- *(14) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : +- *(12) Project [t_time_sk#4] : : : : : : +- *(12) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : : : : : +- *(12) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct +: : : : : : +- *(12) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct : : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- BroadcastExchange IdentityBroadcastMode : : : : +- *(20) HashAggregate(keys=[], functions=[count(1)]) @@ -80,12 +80,12 @@ BroadcastNestedLoopJoin BuildRight, Inner : : : : : : +- *(19) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight : : : : : : :- *(19) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] : : : : : : : +- *(19) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : +- *(19) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : : +- *(19) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : +- *(17) Project [t_time_sk#4] : : : : : +- *(17) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) -: : : : : +- *(17) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct +: : : : : +- *(17) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- BroadcastExchange IdentityBroadcastMode : : : +- *(25) HashAggregate(keys=[], functions=[count(1)]) @@ -99,12 +99,12 @@ BroadcastNestedLoopJoin BuildRight, Inner : : : : : +- *(24) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight : : : : : :- *(24) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] : : : : : : +- *(24) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : +- *(22) Project [t_time_sk#4] : : : : +- *(22) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : : : +- *(22) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct +: : : : +- *(22) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- BroadcastExchange IdentityBroadcastMode : : +- *(30) HashAggregate(keys=[], functions=[count(1)]) @@ -118,12 +118,12 @@ BroadcastNestedLoopJoin BuildRight, Inner : : : : +- *(29) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight : : : : :- *(29) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] : : : : : +- *(29) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : +- *(29) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : : +- *(29) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(27) Project [t_time_sk#4] : : : +- *(27) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) -: : : +- *(27) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct +: : : +- *(27) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange IdentityBroadcastMode : +- *(35) HashAggregate(keys=[], functions=[count(1)]) @@ -137,12 +137,12 @@ BroadcastNestedLoopJoin BuildRight, Inner : : : +- *(34) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight : : : :- *(34) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] : : : : +- *(34) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : +- *(34) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct +: : : : +- *(34) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(32) Project [t_time_sk#4] : : +- *(32) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : +- *(32) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct +: : +- *(32) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- BroadcastExchange IdentityBroadcastMode +- *(40) HashAggregate(keys=[], functions=[count(1)]) @@ -156,10 +156,10 @@ BroadcastNestedLoopJoin BuildRight, Inner : : +- *(39) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight : : :- *(39) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] : : : +- *(39) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) - : : : +- *(39) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- *(39) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(37) Project [t_time_sk#4] : +- *(37) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 12)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) - : +- *(37) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct + : +- *(37) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt index fb0d68dce..f34115720 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt @@ -16,16 +16,16 @@ TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast( : : +- *(4) BroadcastHashJoin [i_item_sk#15], [ss_item_sk#16], Inner, BuildRight : : :- *(4) Project [i_item_sk#15, i_brand#6, i_class#5, i_category#4] : : : +- *(4) Filter (((i_category#4 IN (Books,Electronics,Sports) && i_class#5 IN (computers,stereo,football)) || (i_category#4 IN (Men,Jewelry,Women) && i_class#5 IN (shirts,birdal,dresses))) && isnotnull(i_item_sk#15)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#15,i_brand#6,i_class#5,i_category#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [Or(And(In(i_category, [Books,Electronics,Sports]),In(i_class, [computers,stereo,football])),And(..., ReadSchema: struct + : : : +- *(4) FileScan parquet default.item[i_item_sk#15,i_brand#6,i_class#5,i_category#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [Or(And(In(i_category, [Books,Electronics,Sports]),In(i_class, [computers,stereo,football])),And(..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : +- *(1) Project [ss_sold_date_sk#13, ss_item_sk#16, ss_store_sk#11, ss_sales_price#10] : : +- *(1) Filter ((isnotnull(ss_item_sk#16) && isnotnull(ss_sold_date_sk#13)) && isnotnull(ss_store_sk#11)) - : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#16,ss_store_sk#11,ss_sales_price#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#16,ss_store_sk#11,ss_sales_price#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [d_date_sk#14, d_moy#8] : +- *(2) Filter ((isnotnull(d_year#17) && (d_year#17 = 1999)) && isnotnull(d_date_sk#14)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#14,d_year#17,d_moy#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(2) FileScan parquet default.date_dim[d_date_sk#14,d_year#17,d_moy#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [s_store_sk#12, s_store_name#3, s_company_name#7] +- *(3) Filter isnotnull(s_store_sk#12) - +- *(3) FileScan parquet default.store[s_store_sk#12,s_store_name#3,s_company_name#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.store[s_store_sk#12,s_store_name#3,s_company_name#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt index 0ed6b1727..e822c1ed0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt @@ -6,104 +6,104 @@ : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct : :- Subquery subquery1151 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct : :- Subquery subquery1152 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct : :- Subquery subquery1154 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct : :- Subquery subquery1155 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct : :- Subquery subquery1156 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct : :- Subquery subquery1158 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct : :- Subquery subquery1159 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct : :- Subquery subquery1160 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct : :- Subquery subquery1162 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct : :- Subquery subquery1163 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct : :- Subquery subquery1164 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : : +- *(1) Project [ss_net_paid#8] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct : :- Subquery subquery1166 : : +- *(2) HashAggregate(keys=[], functions=[count(1)]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) : : +- *(1) Project : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct : :- Subquery subquery1167 : : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- Exchange SinglePartition : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) : : +- *(1) Project [ss_ext_discount_amt#7] : : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct +: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct : +- Subquery subquery1168 : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) : +- Exchange SinglePartition : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) : +- *(1) Project [ss_net_paid#8] : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) -: +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct +: +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct +- *(1) Filter (isnotnull(r_reason_sk#9) && (r_reason_sk#9 = 1)) - +- *(1) FileScan parquet default.reason[r_reason_sk#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)], ReadSchema: struct \ No newline at end of file + +- *(1) FileScan parquet default.reason[r_reason_sk#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt index 0d0c65f33..909e73efa 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt @@ -13,19 +13,19 @@ TakeOrderedAndProject(limit=100, orderBy=[am_pm_ratio#1 ASC NULLS FIRST], output : : : +- *(4) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight : : : :- *(4) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] : : : : +- *(4) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(1) Project [hd_demo_sk#9] : : : +- *(1) Filter ((isnotnull(hd_dep_count#10) && (hd_dep_count#10 = 6)) && isnotnull(hd_demo_sk#9)) - : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_dep_count#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_dep_count#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [t_time_sk#7] : : +- *(2) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 8)) && (t_hour#11 <= 9)) && isnotnull(t_time_sk#7)) - : : +- *(2) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [wp_web_page_sk#5] : +- *(3) Filter (((isnotnull(wp_char_count#12) && (wp_char_count#12 >= 5000)) && (wp_char_count#12 <= 5200)) && isnotnull(wp_web_page_sk#5)) - : +- *(3) FileScan parquet default.web_page[wp_web_page_sk#5,wp_char_count#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,..., ReadSchema: struct + : +- *(3) FileScan parquet default.web_page[wp_web_page_sk#5,wp_char_count#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,..., ReadSchema: struct +- BroadcastExchange IdentityBroadcastMode +- *(10) HashAggregate(keys=[], functions=[count(1)]) +- Exchange SinglePartition @@ -38,10 +38,10 @@ TakeOrderedAndProject(limit=100, orderBy=[am_pm_ratio#1 ASC NULLS FIRST], output : : +- *(9) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight : : :- *(9) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] : : : +- *(9) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) - : : : +- *(9) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct + : : : +- *(9) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct : : +- ReusedExchange [hd_demo_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(7) Project [t_time_sk#7] : +- *(7) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 19)) && (t_hour#11 <= 20)) && isnotnull(t_time_sk#7)) - : +- *(7) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)], ReadSchema: struct + : +- *(7) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)], ReadSchema: struct +- ReusedExchange [wp_web_page_sk#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt index fd5605d2e..66e681cea 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt @@ -18,28 +18,28 @@ : : : : : +- *(7) BroadcastHashJoin [cc_call_center_sk#18], [cr_call_center_sk#19], Inner, BuildRight : : : : : :- *(7) Project [cc_call_center_sk#18, cc_call_center_id#2, cc_name#3, cc_manager#4] : : : : : : +- *(7) Filter isnotnull(cc_call_center_sk#18) - : : : : : : +- *(7) FileScan parquet default.call_center[cc_call_center_sk#18,cc_call_center_id#2,cc_name#3,cc_manager#4] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct + : : : : : : +- *(7) FileScan parquet default.call_center[cc_call_center_sk#18,cc_call_center_id#2,cc_name#3,cc_manager#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) : : : : : +- *(1) Project [cr_returned_date_sk#16, cr_returning_customer_sk#14, cr_call_center_sk#19, cr_net_loss#7] : : : : : +- *(1) Filter ((isnotnull(cr_call_center_sk#19) && isnotnull(cr_returned_date_sk#16)) && isnotnull(cr_returning_customer_sk#14)) - : : : : : +- *(1) FileScan parquet default.catalog_returns[cr_returned_date_sk#16,cr_returning_customer_sk#14,cr_call_center_sk#19,cr_net_loss#7] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)], ReadSchema: struct + : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#20,d_moy#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(3) Project [c_customer_sk#15, c_current_cdemo_sk#10, c_current_hdemo_sk#8, c_current_addr_sk#12] : : : +- *(3) Filter (((isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) && isnotnull(c_current_cdemo_sk#10)) && isnotnull(c_current_hdemo_sk#8)) - : : : +- *(3) FileScan parquet default.customer[c_customer_sk#15,c_current_cdemo_sk#10,c_current_hdemo_sk#8,c_current_addr_sk#12] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull..., ReadSchema: struct + : : : +- *(3) FileScan parquet default.customer[c_customer_sk#15,c_current_cdemo_sk#10,c_current_hdemo_sk#8,c_current_addr_sk#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(4) Project [ca_address_sk#13] : : +- *(4) Filter ((isnotnull(ca_gmt_offset#22) && (ca_gmt_offset#22 = -7.00)) && isnotnull(ca_address_sk#13)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#13,ca_gmt_offset#22] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)], ReadSchema: struct + : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#13,ca_gmt_offset#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(5) Project [cd_demo_sk#11, cd_marital_status#5, cd_education_status#6] : +- *(5) Filter ((((cd_marital_status#5 = M) && (cd_education_status#6 = Unknown)) || ((cd_marital_status#5 = W) && (cd_education_status#6 = Advanced Degree))) && isnotnull(cd_demo_sk#11)) - : +- *(5) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#5,cd_education_status#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_demographics], PartitionFilters: [], PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital..., ReadSchema: struct + : +- *(5) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#5,cd_education_status#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(6) Project [hd_demo_sk#9] +- *(6) Filter ((isnotnull(hd_buy_potential#23) && StartsWith(hd_buy_potential#23, Unknown)) && isnotnull(hd_demo_sk#9)) - +- *(6) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_buy_potential#23] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file + +- *(6) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_buy_potential#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt index c27934561..8a696f0e5 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt @@ -11,11 +11,11 @@ TakeOrderedAndProject(limit=100, orderBy=[Excess Discount Amount #1 ASC NULLS FI : : +- *(6) BroadcastHashJoin [ws_item_sk#6], [i_item_sk#5], Inner, BuildRight : : :- *(6) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] : : : +- *(6) Filter ((isnotnull(ws_item_sk#6) && isnotnull(ws_ext_discount_amt#2)) && isnotnull(ws_sold_date_sk#3)) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt), IsNotNull(ws_sold_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [i_item_sk#5] : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 350)) && isnotnull(i_item_sk#5)) - : : +- *(1) FileScan parquet default.item[i_item_sk#5,i_manufact_id#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.item[i_item_sk#5,i_manufact_id#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) : +- *(4) HashAggregate(keys=[ws_item_sk#6], functions=[avg(UnscaledValue(ws_ext_discount_amt#2))]) @@ -25,9 +25,9 @@ TakeOrderedAndProject(limit=100, orderBy=[Excess Discount Amount #1 ASC NULLS FI : +- *(3) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight : :- *(3) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] : : +- *(3) Filter (isnotnull(ws_sold_date_sk#3) && isnotnull(ws_item_sk#6)) - : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [d_date_sk#4] : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#4)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#4,d_date#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct + : +- *(2) FileScan parquet default.date_dim[d_date_sk#4,d_date#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct +- ReusedExchange [d_date_sk#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt index 5cbc9a6c1..9da0e4c24 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt @@ -7,12 +7,12 @@ TakeOrderedAndProject(limit=100, orderBy=[sumsales#1 ASC NULLS FIRST,ss_customer +- *(3) BroadcastHashJoin [sr_reason_sk#7], [cast(r_reason_sk#8 as bigint)], Inner, BuildRight :- *(3) Project [ss_customer_sk#2, ss_quantity#5, ss_sales_price#6, sr_reason_sk#7, sr_return_quantity#4] : +- *(3) BroadcastHashJoin [cast(ss_item_sk#9 as bigint), cast(ss_ticket_number#10 as bigint)], [sr_item_sk#11, sr_ticket_number#12], Inner, BuildRight - : :- *(3) FileScan parquet default.store_sales[ss_item_sk#9,ss_customer_sk#2,ss_ticket_number#10,ss_quantity#5,ss_sales_price#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : +- *(1) FileScan parquet default.store_returns[sr_item_sk#11,sr_reason_sk#7,sr_ticket_number#12,sr_return_quantity#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_reason_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(2) Project [r_reason_sk#8] +- *(2) Filter ((isnotnull(r_reason_desc#13) && (r_reason_desc#13 = reason 28)) && isnotnull(r_reason_sk#8)) - +- *(2) FileScan parquet default.reason[r_reason_sk#8,r_reason_desc#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28), IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file + +- *(2) FileScan parquet default.reason[r_reason_sk#8,r_reason_desc#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28), IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt index 2e6006488..424585ea5 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt @@ -17,21 +17,21 @@ TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], outpu : : : : +- *(6) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight, NOT (ws_warehouse_sk#15 = ws_warehouse_sk#15#16) : : : : :- *(6) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_warehouse_sk#15, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] : : : : : +- *(6) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) - : : : : : +- *(6) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_warehouse_sk#15,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.web_sales[ws_warehouse_sk#15,ws_order_number#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) - : : : +- *(2) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct + : : : +- *(2) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(3) Project [d_date_sk#12] : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 1999-02-01)) && (d_date#17 <= 10683)) && isnotnull(d_date_sk#12)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(4) Project [ca_address_sk#10] : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = IL)) && isnotnull(ca_address_sk#10)) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(5) Project [web_site_sk#8] +- *(5) Filter ((isnotnull(web_company_name#19) && (web_company_name#19 = pri)) && isnotnull(web_site_sk#8)) - +- *(5) FileScan parquet default.web_site[web_site_sk#8,web_company_name#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file + +- *(5) FileScan parquet default.web_site[web_site_sk#8,web_company_name#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt index 2d497b75e..d25afb9da 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt @@ -16,39 +16,39 @@ TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], outpu : : : :- *(9) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight : : : : :- *(9) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] : : : : : +- *(9) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) - : : : : : +- *(9) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct + : : : : : +- *(2) FileScan parquet default.web_sales[ws_warehouse_sk#16,ws_order_number#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : : : +- *(1) Project [ws_warehouse_sk#17, ws_order_number#15] : : : : +- *(1) Filter (isnotnull(ws_order_number#15) && isnotnull(ws_warehouse_sk#17)) - : : : : +- *(1) FileScan parquet default.web_sales[ws_warehouse_sk#17,ws_order_number#15] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct + : : : : +- *(1) FileScan parquet default.web_sales[ws_warehouse_sk#17,ws_order_number#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) : : : +- *(5) Project [wr_order_number#13] : : : +- *(5) BroadcastHashJoin [wr_order_number#13], [cast(ws_order_number#6 as bigint)], Inner, BuildRight : : : :- *(5) Project [wr_order_number#13] : : : : +- *(5) Filter isnotnull(wr_order_number#13) - : : : : +- *(5) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_order_number)], ReadSchema: struct + : : : : +- *(5) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_order_number)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(4) Project [ws_order_number#6] : : : +- *(4) BroadcastHashJoin [ws_order_number#6], [ws_order_number#15], Inner, BuildRight, NOT (ws_warehouse_sk#16 = ws_warehouse_sk#17) : : : :- *(4) Project [ws_warehouse_sk#16, ws_order_number#6] : : : : +- *(4) Filter (isnotnull(ws_order_number#6) && isnotnull(ws_warehouse_sk#16)) - : : : : +- *(4) FileScan parquet default.web_sales[ws_warehouse_sk#16,ws_order_number#6] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct + : : : : +- *(4) FileScan parquet default.web_sales[ws_warehouse_sk#16,ws_order_number#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct : : : +- ReusedExchange [ws_warehouse_sk#17, ws_order_number#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(6) Project [d_date_sk#12] : : +- *(6) Filter (((isnotnull(d_date#18) && (cast(d_date#18 as string) >= 1999-02-01)) && (d_date#18 <= 10683)) && isnotnull(d_date_sk#12)) - : : +- *(6) FileScan parquet default.date_dim[d_date_sk#12,d_date#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct + : : +- *(6) FileScan parquet default.date_dim[d_date_sk#12,d_date#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(7) Project [ca_address_sk#10] : +- *(7) Filter ((isnotnull(ca_state#19) && (ca_state#19 = IL)) && isnotnull(ca_address_sk#10)) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#19] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct + : +- *(7) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(8) Project [web_site_sk#8] +- *(8) Filter ((isnotnull(web_company_name#20) && (web_company_name#20 = pri)) && isnotnull(web_site_sk#8)) - +- *(8) FileScan parquet default.web_site[web_site_sk#8,web_company_name#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file + +- *(8) FileScan parquet default.web_site[web_site_sk#8,web_company_name#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt index c66e7e331..4700925d1 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt @@ -11,16 +11,16 @@ TakeOrderedAndProject(limit=100, orderBy=[count(1)#1 ASC NULLS FIRST], output=[c : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#6], [hd_demo_sk#7], Inner, BuildRight : : :- *(4) Project [ss_sold_time_sk#4, ss_hdemo_sk#6, ss_store_sk#2] : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#6) && isnotnull(ss_sold_time_sk#4)) && isnotnull(ss_store_sk#2)) - : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#4,ss_hdemo_sk#6,ss_store_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct + : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#4,ss_hdemo_sk#6,ss_store_sk#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(1) Project [hd_demo_sk#7] : : +- *(1) Filter ((isnotnull(hd_dep_count#8) && (hd_dep_count#8 = 7)) && isnotnull(hd_demo_sk#7)) - : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#7,hd_dep_count#8] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)], ReadSchema: struct + : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#7,hd_dep_count#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(2) Project [t_time_sk#5] : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 20)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#5)) - : +- *(2) FileScan parquet default.time_dim[t_time_sk#5,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct + : +- *(2) FileScan parquet default.time_dim[t_time_sk#5,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(3) Project [s_store_sk#3] +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#3)) - +- *(3) FileScan parquet default.store[s_store_sk#3,s_store_name#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file + +- *(3) FileScan parquet default.store[s_store_sk#3,s_store_name#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt index 0d528d336..fa7b0e6ae 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt @@ -14,11 +14,11 @@ CollectLimit 100 : +- *(2) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight : :- *(2) Project [ss_sold_date_sk#7, ss_item_sk#6, ss_customer_sk#5] : : +- *(2) Filter isnotnull(ss_sold_date_sk#7) - : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#6,ss_customer_sk#5] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#6,ss_customer_sk#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [d_date_sk#8] : +- *(1) Filter (((isnotnull(d_month_seq#9) && (d_month_seq#9 >= 1200)) && (d_month_seq#9 <= 1211)) && isnotnull(d_date_sk#8)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#9] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct + : +- *(1) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct +- *(8) Sort [customer_sk#2 ASC NULLS FIRST, item_sk#4 ASC NULLS FIRST], false, 0 +- Exchange hashpartitioning(customer_sk#2, item_sk#4, 5) +- *(7) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) @@ -28,5 +28,5 @@ CollectLimit 100 +- *(6) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#8], Inner, BuildRight :- *(6) Project [cs_sold_date_sk#12, cs_bill_customer_sk#10, cs_item_sk#11] : +- *(6) Filter isnotnull(cs_sold_date_sk#12) - : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_bill_customer_sk#10,cs_item_sk#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct + : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_bill_customer_sk#10,cs_item_sk#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct +- ReusedExchange [d_date_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt index 2ffc242b5..d93c3fdc2 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt @@ -15,12 +15,12 @@ : +- *(3) BroadcastHashJoin [ss_item_sk#14], [i_item_sk#15], Inner, BuildRight : :- *(3) Project [ss_sold_date_sk#12, ss_item_sk#14, ss_ext_sales_price#11] : : +- *(3) Filter (isnotnull(ss_item_sk#14) && isnotnull(ss_sold_date_sk#12)) - : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_item_sk#14,ss_ext_sales_price#11] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_item_sk#14,ss_ext_sales_price#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(1) Project [i_item_sk#15, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] : +- *(1) Filter (i_category#2 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) - : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#7,i_item_desc#1,i_current_price#4,i_class#3,i_category#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) - +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file + +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt index 2106af4de..09cba55e4 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt @@ -13,20 +13,20 @@ TakeOrderedAndProject(limit=100, orderBy=[substring(w_warehouse_name, 1, 20)#1 A : : : +- *(5) BroadcastHashJoin [cs_warehouse_sk#18], [w_warehouse_sk#19], Inner, BuildRight : : : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, cs_ship_mode_sk#16, cs_warehouse_sk#18] : : : : +- *(5) Filter (((isnotnull(cs_warehouse_sk#18) && isnotnull(cs_ship_mode_sk#16)) && isnotnull(cs_call_center_sk#14)) && isnotnull(cs_ship_date_sk#11)) - : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_ship_date_sk#11,cs_call_center_sk#14,cs_ship_mode_sk#16,cs_warehouse_sk#18] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(..., ReadSchema: struct + : : : +- *(1) FileScan parquet default.warehouse[w_warehouse_sk#19,w_warehouse_name#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) - : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct + : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(3) Project [cc_call_center_sk#15, cc_name#3] : +- *(3) Filter isnotnull(cc_call_center_sk#15) - : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#15,cc_name#3] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct + : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#15,cc_name#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(4) Project [d_date_sk#13] +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/C:/Users/apdave/github/hyperspace-1/spark-warehouse/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file + +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala index e9227e097..fdfd1b086 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala @@ -240,7 +240,7 @@ trait PlanStabilitySuite extends TPCDSBase with Logging { private def normalizeLocation(plan: String): String = { plan.replaceAll( - s"Location.*$clsName/", + s"Location.*spark-warehouse/", "Location [not included in comparison]/{warehouse_dir}/") } From d5fbec859f685622e600150564308637af46d91e Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Tue, 16 Mar 2021 16:45:46 -0700 Subject: [PATCH 23/31] query plans for all queries --- .../approved-plans-v1_4/q29/explain.txt | 4 ++-- .../approved-plans-v1_4/q44/explain.txt | 8 ++++---- .../approved-plans-v1_4/q54/explain.txt | 10 +++++----- .../approved-plans-v1_4/q58/explain.txt | 18 +++++++++--------- .../approved-plans-v1_4/q64/explain.txt | 4 ++-- .../approved-plans-v1_4/q72/explain.txt | 4 ++-- .../approved-plans-v1_4/q78/explain.txt | 6 +++--- .../approved-plans-v1_4/q85/explain.txt | 4 ++-- .../goldstandard/PlanStabilitySuite.scala | 9 ++++----- 9 files changed, 33 insertions(+), 34 deletions(-) diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/explain.txt index 204dda3c3..1db3bf363 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/explain.txt @@ -34,8 +34,8 @@ TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_des : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : +- *(4) Project [d_date_sk#18] - : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 9)) && (d_moy#31 <= 12)) && (d_year#30 = 1999)) && isnotnull(d_date_sk#18)) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), Equ..., ReadSchema: struct + : : : +- *(4) Filter (((((isnotnull(d_moy#30) && isnotnull(d_year#31)) && (d_moy#30 >= 9)) && (d_moy#30 <= 12)) && (d_year#31 = 1999)) && isnotnull(d_date_sk#18)) + : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), Equ..., ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [d_date_sk#16] : : +- *(5) Filter (d_year#32 IN (1999,2000,2001) && isnotnull(d_date_sk#16)) diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt index 1abd6c8f3..aebba592c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt @@ -12,8 +12,8 @@ TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1, : : : +- *(3) Sort [rank_col#11 ASC NULLS FIRST], false, 0 : : : +- Exchange SinglePartition : : : +- *(2) Project [item_sk#8, rank_col#11] - : : : +- *(2) Filter (isnotnull(avg(ss_net_profit#12)#13) && (cast(avg(ss_net_profit#12)#13 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7089)), DecimalType(13,7)))) - : : : : +- Subquery subquery7089 + : : : +- *(2) Filter (isnotnull(avg(ss_net_profit#12)#13) && (cast(avg(ss_net_profit#12)#13 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery6830)), DecimalType(13,7)))) + : : : : +- Subquery subquery6830 : : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) : : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) : : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) @@ -33,8 +33,8 @@ TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1, : : +- *(6) Sort [rank_col#17 DESC NULLS LAST], false, 0 : : +- Exchange SinglePartition : : +- *(5) Project [item_sk#6, rank_col#17] - : : +- *(5) Filter (isnotnull(avg(ss_net_profit#12)#18) && (cast(avg(ss_net_profit#12)#18 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery7094)), DecimalType(13,7)))) - : : : +- Subquery subquery7094 + : : +- *(5) Filter (isnotnull(avg(ss_net_profit#12)#18) && (cast(avg(ss_net_profit#12)#18 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery6835)), DecimalType(13,7)))) + : : : +- Subquery subquery6835 : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt index 91c20f804..90bf0d52e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt @@ -56,15 +56,15 @@ TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customer : +- *(9) FileScan parquet default.store[s_county#10,s_state#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) +- *(10) Project [d_date_sk#7] - +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery9801)) && (d_month_seq#32 <= Subquery subquery9802)) && isnotnull(d_date_sk#7)) - : :- Subquery subquery9801 + +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery9542)) && (d_month_seq#32 <= Subquery subquery9543)) && isnotnull(d_date_sk#7)) + : :- Subquery subquery9542 : : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) : : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] : : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) : : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - : +- Subquery subquery9802 + : +- Subquery subquery9543 : +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) : +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) : +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) @@ -72,14 +72,14 @@ TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customer : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct +- *(10) FileScan parquet default.date_dim[d_date_sk#7,d_month_seq#32] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct - :- Subquery subquery9801 + :- Subquery subquery9542 : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - +- Subquery subquery9802 + +- Subquery subquery9543 +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt index 9b8184978..f0a3a143c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt @@ -27,13 +27,13 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) : : +- *(2) Project [d_date#17 AS d_date#17#18] - : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12128)) - : : : +- Subquery subquery12128 + : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery11869)) + : : : +- Subquery subquery11869 : : : +- *(1) Project [d_week_seq#19] : : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - : : +- Subquery subquery12128 + : : +- Subquery subquery11869 : : +- *(1) Project [d_week_seq#19] : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct @@ -58,13 +58,13 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : : +- *(7) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) : +- *(6) Project [d_date#17 AS d_date#17#23] - : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12132)) - : : +- Subquery subquery12132 + : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery11873)) + : : +- Subquery subquery11873 : : +- *(1) Project [d_week_seq#19] : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct : +- *(6) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - : +- Subquery subquery12132 + : +- Subquery subquery11873 : +- *(1) Project [d_week_seq#19] : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct @@ -89,13 +89,13 @@ TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev# : +- *(12) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) +- *(11) Project [d_date#17 AS d_date#17#27] - +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery12136)) - : +- Subquery subquery12136 + +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery11877)) + : +- Subquery subquery11877 : +- *(1) Project [d_week_seq#19] : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct +- *(11) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - +- Subquery subquery12136 + +- Subquery subquery11877 +- *(1) Project [d_week_seq#19] +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt index c91d21f8b..5a211f617 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt @@ -68,8 +68,8 @@ : : : : : : : : : : : : : : : +- *(5) FileScan parquet default.date_dim[d_date_sk#74,d_year#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : : : : : : : +- *(6) Project [s_store_sk#72, s_store_name#28, s_zip#29] - : : : : : : : : : : : : : : +- *(6) Filter ((isnotnull(s_store_sk#72) && isnotnull(s_zip#29)) && isnotnull(s_store_name#28)) - : : : : : : : : : : : : : : +- *(6) FileScan parquet default.store[s_store_sk#72,s_store_name#28,s_zip#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip), IsNotNull(s_store_name)], ReadSchema: struct + : : : : : : : : : : : : : : +- *(6) Filter ((isnotnull(s_store_sk#72) && isnotnull(s_store_name#28)) && isnotnull(s_zip#29)) + : : : : : : : : : : : : : : +- *(6) FileScan parquet default.store[s_store_sk#72,s_store_name#28,s_zip#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)], ReadSchema: struct : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : : : : : : : : : : : : +- *(7) Project [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] : : : : : : : : : : : : : +- *(7) Filter (((((isnotnull(c_customer_sk#70) && isnotnull(c_first_sales_date_sk#67)) && isnotnull(c_first_shipto_date_sk#65)) && isnotnull(c_current_cdemo_sk#59)) && isnotnull(c_current_hdemo_sk#53)) && isnotnull(c_current_addr_sk#49)) diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt index 09f911c81..99558702e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt @@ -56,8 +56,8 @@ TakeOrderedAndProject(limit=100, orderBy=[total_cnt#1 DESC NULLS LAST,i_item_des : : : +- *(7) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(8) Project [d_date_sk#14, d_date#15] - : : +- *(8) Filter (isnotnull(d_date_sk#14) && isnotnull(d_date#15)) - : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_date)], ReadSchema: struct + : : +- *(8) Filter (isnotnull(d_date#15) && isnotnull(d_date_sk#14)) + : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)], ReadSchema: struct : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : +- *(9) Project [p_promo_sk#12] : +- *(9) Filter isnotnull(p_promo_sk#12) diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt index a10754cf1..3a21513d1 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt @@ -1,6 +1,6 @@ == Physical Plan == TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC NULLS LAST,ss_wc#3 DESC NULLS LAST,ss_sp#4 DESC NULLS LAST,other_chan_qty#5 ASC NULLS FIRST,other_chan_wholesale_cost#6 ASC NULLS FIRST,other_chan_sales_price#7 ASC NULLS FIRST,round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) ASC NULLS FIRST], output=[ratio#1,store_qty#10,store_wholesale_cost#11,store_sales_price#12,other_chan_qty#5,other_chan_wholesale_cost#6,other_chan_sales_price#7]) -+- *(12) Project [round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) AS ratio#1, ss_qty#2 AS store_qty#10, ss_wc#3 AS store_wholesale_cost#11, ss_sp#4 AS store_sales_price#12, (coalesce(ws_qty#8, 0) + coalesce(cs_qty#9, 0)) AS other_chan_qty#5, CheckOverflow((promote_precision(cast(coalesce(ws_wc#13, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#6, CheckOverflow((promote_precision(cast(coalesce(ws_sp#15, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#16, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#7, ss_wc#3, cs_qty#9, ss_sp#4, ss_qty#2, ws_qty#8] ++- *(12) Project [round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) AS ratio#1, ss_qty#2 AS store_qty#10, ss_wc#3 AS store_wholesale_cost#11, ss_sp#4 AS store_sales_price#12, (coalesce(ws_qty#8, 0) + coalesce(cs_qty#9, 0)) AS other_chan_qty#5, CheckOverflow((promote_precision(cast(coalesce(ws_wc#13, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#6, CheckOverflow((promote_precision(cast(coalesce(ws_sp#15, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#16, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#7, cs_qty#9, ss_wc#3, ss_qty#2, ss_sp#4, ws_qty#8] +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [cs_sold_year#20, cs_item_sk#21, cs_customer_sk#22], Inner, BuildRight :- *(12) Project [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19, ss_qty#2, ss_wc#3, ss_sp#4, ws_qty#8, ws_wc#13, ws_sp#15] : +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [ws_sold_year#23, ws_item_sk#24, ws_customer_sk#25], Inner, BuildRight @@ -34,8 +34,8 @@ TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC : : +- *(6) Filter isnull(wr_order_number#40) : : +- *(6) BroadcastHashJoin [cast(ws_order_number#41 as bigint), cast(ws_item_sk#24 as bigint)], [wr_order_number#40, wr_item_sk#42], LeftOuter, BuildRight : : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_order_number#41, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] - : : : +- *(6) Filter ((isnotnull(ws_sold_date_sk#39) && isnotnull(ws_item_sk#24)) && isnotnull(ws_bill_customer_sk#35)) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#39,ws_item_sk#24,ws_bill_customer_sk#35,ws_order_number#41,ws_quantity#36,ws_wholesale_cost#37,ws_sales_price#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, string, true], input[2, string, true])) : : : +- *(4) Project [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] - : : : +- *(4) Filter ((isnotnull(cd_marital_status#22) && isnotnull(cd_demo_sk#21)) && isnotnull(cd_education_status#23)) - : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), IsNotNull(cd_demo_sk), IsNotNull(cd_education_status)], ReadSchema: struct + : : : +- *(4) Filter ((isnotnull(cd_demo_sk#21) && isnotnull(cd_marital_status#22)) && isnotnull(cd_education_status#23)) + : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)], ReadSchema: struct : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) : : +- *(5) Project [ca_address_sk#15, ca_state#16] : : +- *(5) Filter ((isnotnull(ca_country#33) && (ca_country#33 = United States)) && isnotnull(ca_address_sk#15)) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala index 8136bf467..00358ebf4 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala @@ -5,7 +5,6 @@ * * The below license was copied from: https://github.com/FasterXML/jackson-module-scala/blob/2.10/src/main/resources/META-INF/LICENSE */ - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -272,10 +271,10 @@ class TPCDSV1_4_SparkPlanStabilitySuite extends PlanStabilitySuite { override val goldenFilePath: String = new File(baseResourcePath, "spark-2.4/approved-plans-v1_4").getAbsolutePath - // Enable cross join because some queries fail during query optimization phase. - withSQLConf("spark.sql.crossJoin.enabled" -> "true") { - tpcdsQueries.foreach { q => - test(s"check simplified (tpcds-v1.4/$q)") { + tpcdsQueries.foreach { q => + test(s"check simplified (tpcds-v1.4/$q)") { + // Enable cross join because some queries fail during query optimization phase. + withSQLConf("spark.sql.crossJoin.enabled" -> "true") { testQuery("tpcds/queries", q) } } From 3b515333ca1a8f0e06ca62c84951a9bb32616230 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Thu, 18 Mar 2021 10:52:21 -0700 Subject: [PATCH 24/31] sample query q0 with sample indexes --- .../approved-plans-v1_4/q0/explain.txt | 10 ++ .../approved-plans-v1_4/q0/simplified.txt | 14 +++ src/test/resources/tpcds/queries/q0.sql | 6 ++ .../goldstandard/IndexLogEntryCreator.scala | 94 +++++++++++++++++++ .../goldstandard/MockSignatureProvider.scala | 22 +++++ .../hyperspace/goldstandard/TPCDSBase.scala | 12 +-- .../goldstandard/TPCDS_Hyperspace.scala | 46 +++++++++ 7 files changed, 198 insertions(+), 6 deletions(-) create mode 100644 src/test/resources/tpcds/hyperspace/approved-plans-v1_4/q0/explain.txt create mode 100644 src/test/resources/tpcds/hyperspace/approved-plans-v1_4/q0/simplified.txt create mode 100644 src/test/resources/tpcds/queries/q0.sql create mode 100644 src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala create mode 100644 src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala create mode 100644 src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala diff --git a/src/test/resources/tpcds/hyperspace/approved-plans-v1_4/q0/explain.txt b/src/test/resources/tpcds/hyperspace/approved-plans-v1_4/q0/explain.txt new file mode 100644 index 000000000..b17bdec96 --- /dev/null +++ b/src/test/resources/tpcds/hyperspace/approved-plans-v1_4/q0/explain.txt @@ -0,0 +1,10 @@ +== Physical Plan == +CollectLimit 100 ++- *(3) Project [d_year#1, ss_customer_sk#2] + +- *(3) SortMergeJoin [d_date_sk#3], [ss_sold_date_sk#4], Inner + :- *(1) Project [d_date_sk#3, d_year#1] + : +- *(1) Filter isnotnull(d_date_sk#3) + : +- *(1) FileScan Hyperspace(Type: CI, Name: dtindex, LogVersion: 0) default.date_dim[d_date_sk#3,d_year#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct, SelectedBucketsCount: 200 out of 200 + +- *(2) Project [ss_sold_date_sk#4, ss_customer_sk#2] + +- *(2) Filter isnotnull(ss_sold_date_sk#4) + +- *(2) FileScan Hyperspace(Type: CI, Name: ssIndex, LogVersion: 0) default.store_sales[ss_sold_date_sk#4,ss_customer_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct, SelectedBucketsCount: 200 out of 200 \ No newline at end of file diff --git a/src/test/resources/tpcds/hyperspace/approved-plans-v1_4/q0/simplified.txt b/src/test/resources/tpcds/hyperspace/approved-plans-v1_4/q0/simplified.txt new file mode 100644 index 000000000..317695b4c --- /dev/null +++ b/src/test/resources/tpcds/hyperspace/approved-plans-v1_4/q0/simplified.txt @@ -0,0 +1,14 @@ +CollectLimit + WholeStageCodegen + Project [d_year,ss_customer_sk] + SortMergeJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_date_sk] + Scan Hyperspace(Type: CI, Name: dtindex, LogVersion: 0) default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + WholeStageCodegen + Project [ss_customer_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + Scan Hyperspace(Type: CI, Name: ssIndex, LogVersion: 0) default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] diff --git a/src/test/resources/tpcds/queries/q0.sql b/src/test/resources/tpcds/queries/q0.sql new file mode 100644 index 000000000..791f50ea6 --- /dev/null +++ b/src/test/resources/tpcds/queries/q0.sql @@ -0,0 +1,6 @@ +SELECT + date_dim.d_year, + store_sales.ss_customer_sk +FROM date_dim, store_sales +WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk +LIMIT 100 diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala new file mode 100644 index 000000000..31d742ce7 --- /dev/null +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala @@ -0,0 +1,94 @@ +package com.microsoft.hyperspace.goldstandard +// scalastyle:off +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningAwareFileIndex} +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.{DataFrame, SparkSession} + +import com.microsoft.hyperspace.actions.Constants +import com.microsoft.hyperspace.example.SparkApp +import com.microsoft.hyperspace.index._ +import com.microsoft.hyperspace.util.{JsonUtils, PathUtils} + +object IndexLogEntryCreator extends SparkApp { + def createIndex(str: String): Unit = { + val splits = str.split(";") + val name = splits(0) + val indexCols = splits(2).split(",").toSeq + val includedCols = splits(3).split(",").toSeq + val config = IndexConfig(name, indexCols, includedCols) + val table = splits(1) + val indexPath = { + val baseResourcePath = { + // use the same way as `SQLQueryTestSuite` to get the resource path + java.nio.file.Paths.get("src", "test", "resources", "tpcds").toFile + } + PathUtils + .makeAbsolute( + new Path(baseResourcePath.toString, s"hyperspace/indexes/$name"), + new Configuration) + } + new IndexLogManagerImpl(indexPath) + .writeLog(0, toIndex(config, s"${indexPath.toString}/v__=0", table, spark)) + } + + def toRelation(sourceDf: DataFrame): Option[Relation] = { + val leafPlans = sourceDf.queryExecution.optimizedPlan.collectLeaves() + leafPlans.head match { + case LogicalRelation( + HadoopFsRelation(location: PartitioningAwareFileIndex, _, dataSchema, _, _, _), + _, + _, + _) => + val sourceDataProperties = + Hdfs.Properties(Content.fromDirectory(location.rootPaths.head, new FileIdTracker)) + val fileFormatName = "parquet" + + Some( + Relation( + location.rootPaths.map(_.toString), + Hdfs(sourceDataProperties), + dataSchema.json, + fileFormatName, + Map.empty)) + case _ => None + } + } + + def toIndex( + config: IndexConfig, + path: String, + tableName: String, + spark: SparkSession): IndexLogEntry = { + val sourceDf = spark.table(tableName) + val indexSchema = { + val allCols = config.indexedColumns ++ config.includedColumns + StructType(sourceDf.schema.filter(f => allCols.contains(f.name))) + } + val relation: Relation = toRelation(sourceDf).get + + val sourcePlanProperties = SparkPlan.Properties( + Seq(relation), + null, + null, + LogicalPlanFingerprint( + LogicalPlanFingerprint.Properties(Seq( + Signature("com.microsoft.hyperspace.goldstandard.MockSignatureProvider", tableName))))) + + val entry = IndexLogEntry( + config.indexName, + CoveringIndex( + CoveringIndex.Properties( + CoveringIndex.Properties + .Columns(config.indexedColumns, config.includedColumns), + IndexLogEntry.schemaString(indexSchema), + 200, + Map("hasParquetAsSourceFormat" -> "true"))), + Content(Directory.fromDirectory(new Path(path), new FileIdTracker)), + Source(SparkPlan(sourcePlanProperties)), + Map()) + entry.state = Constants.States.ACTIVE + entry + } +} diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala new file mode 100644 index 000000000..d08e5e4f5 --- /dev/null +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala @@ -0,0 +1,22 @@ +package com.microsoft.hyperspace.goldstandard + +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningAwareFileIndex} + +import com.microsoft.hyperspace.index.LogicalPlanSignatureProvider + +class MockSignatureProvider extends LogicalPlanSignatureProvider { + override def signature(logicalPlan: LogicalPlan): Option[String] = { + val leafPlans = logicalPlan.collectLeaves() + if (leafPlans.size != 1) return None + + leafPlans.head match { + case LogicalRelation( + HadoopFsRelation(location: PartitioningAwareFileIndex, _, _, _, _, _), + _, + _, + _) => + Some(location.rootPaths.head.getName) + } + } +} diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala index 796f34e6b..b743db6dd 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala @@ -38,8 +38,8 @@ trait TPCDSBase extends SparkFunSuite with SparkInvolvedSuite { // The TPCDS queries below are based on v1.4. // TODO: Fix bulid pipeline for q49 and reenable q49. - val tpcdsQueries = Seq( - "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", + val tpcdsQueries = Seq("q0") + /* ("q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40", @@ -48,7 +48,7 @@ trait TPCDSBase extends SparkFunSuite with SparkInvolvedSuite { "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70", "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", - "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") + "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") */ private val tableColumns = Map( "store_sales" -> @@ -572,9 +572,9 @@ trait TPCDSBase extends SparkFunSuite with SparkInvolvedSuite { } override def afterAll(): Unit = { - tableNames.foreach { tableName => - spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true) - } + //tableNames.foreach { tableName => + // spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true) + //} super.afterAll() } } diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala new file mode 100644 index 000000000..e902bfbc7 --- /dev/null +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala @@ -0,0 +1,46 @@ +package com.microsoft.hyperspace.goldstandard + +import java.io.File + +import org.apache.hadoop.fs.Path + +import com.microsoft.hyperspace._ +import com.microsoft.hyperspace.goldstandard.IndexLogEntryCreator.createIndex +import com.microsoft.hyperspace.index.IndexConstants.INDEX_SYSTEM_PATH +import com.microsoft.hyperspace.util.FileUtils + +class TPCDS_Hyperspace extends PlanStabilitySuite { + + override val goldenFilePath: String = + new File(baseResourcePath, "hyperspace/approved-plans-v1_4").getAbsolutePath + + val indexSystemPath = new File(baseResourcePath, "hyperspace/indexes").toString + + override def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set(INDEX_SYSTEM_PATH, indexSystemPath) + spark.enableHyperspace() + } + + override def afterAll(): Unit = { + FileUtils.delete(new Path(indexSystemPath)) + super.afterAll() + } + + tpcdsQueries.foreach { q => + test(s"check simplified (tpcds-v1.4/$q)") { + + val indexes = Seq( + "dtindex;date_dim;d_date_sk;d_year", + "ssIndex;store_sales;ss_sold_date_sk;ss_customer_sk") + indexes.foreach(createIndex) + + // Enable cross join because some queries fail during query optimization phase. + withSQLConf( + ("spark.sql.crossJoin.enabled" -> "true"), + ("spark.sql.autoBroadcastJoinThreshold" -> "-1")) { + testQuery("tpcds/queries", q) + } + } + } +} From fada1d626d11bdf44a532073dad66c43f59483c4 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Thu, 18 Mar 2021 13:49:59 -0700 Subject: [PATCH 25/31] revert changes from a dependency PR and depend directly on master --- .../approved-plans-v1_4/q0/explain.txt | 11 + .../approved-plans-v1_4/q0/simplified.txt | 13 + .../approved-plans-v1_4/q10/explain.txt | 326 ++++- .../approved-plans-v1_4/q10/simplified.txt | 83 +- .../approved-plans-v1_4/q11/explain.txt | 494 ++++++-- .../approved-plans-v1_4/q11/simplified.txt | 181 ++- .../approved-plans-v1_4/q12/explain.txt | 159 ++- .../approved-plans-v1_4/q12/simplified.txt | 48 +- .../approved-plans-v1_4/q13/explain.txt | 251 +++- .../approved-plans-v1_4/q13/simplified.txt | 78 +- .../approved-plans-v1_4/q14a/explain.txt | 988 ++++++++++++--- .../approved-plans-v1_4/q14a/simplified.txt | 377 +++--- .../approved-plans-v1_4/q14b/explain.txt | 927 +++++++++++--- .../approved-plans-v1_4/q14b/simplified.txt | 328 +++-- .../approved-plans-v1_4/q15/explain.txt | 174 ++- .../approved-plans-v1_4/q15/simplified.txt | 51 +- .../approved-plans-v1_4/q16/explain.txt | 270 +++- .../approved-plans-v1_4/q16/simplified.txt | 73 +- .../approved-plans-v1_4/q17/explain.txt | 317 ++++- .../approved-plans-v1_4/q17/simplified.txt | 107 +- .../approved-plans-v1_4/q18/explain.txt | 307 ++++- .../approved-plans-v1_4/q18/simplified.txt | 90 +- .../approved-plans-v1_4/q19/explain.txt | 257 +++- .../approved-plans-v1_4/q19/simplified.txt | 80 +- .../approved-plans-v1_4/q2/explain.txt | 252 +++- .../approved-plans-v1_4/q2/simplified.txt | 67 +- .../approved-plans-v1_4/q20/explain.txt | 159 ++- .../approved-plans-v1_4/q20/simplified.txt | 44 +- .../approved-plans-v1_4/q21/explain.txt | 180 ++- .../approved-plans-v1_4/q21/simplified.txt | 55 +- .../approved-plans-v1_4/q22/explain.txt | 180 ++- .../approved-plans-v1_4/q22/simplified.txt | 57 +- .../approved-plans-v1_4/q23a/explain.txt | 624 +++++++-- .../approved-plans-v1_4/q23a/simplified.txt | 260 ++-- .../approved-plans-v1_4/q23b/explain.txt | 788 ++++++++++-- .../approved-plans-v1_4/q23b/simplified.txt | 244 ++-- .../approved-plans-v1_4/q24a/explain.txt | 557 ++++++-- .../approved-plans-v1_4/q24a/simplified.txt | 188 +-- .../approved-plans-v1_4/q24b/explain.txt | 557 ++++++-- .../approved-plans-v1_4/q24b/simplified.txt | 188 +-- .../approved-plans-v1_4/q25/explain.txt | 317 ++++- .../approved-plans-v1_4/q25/simplified.txt | 107 +- .../approved-plans-v1_4/q26/explain.txt | 223 +++- .../approved-plans-v1_4/q26/simplified.txt | 58 +- .../approved-plans-v1_4/q27/explain.txt | 224 +++- .../approved-plans-v1_4/q27/simplified.txt | 69 +- .../approved-plans-v1_4/q28/explain.txt | 501 +++++++- .../approved-plans-v1_4/q28/simplified.txt | 120 +- .../approved-plans-v1_4/q29/explain.txt | 340 ++++- .../approved-plans-v1_4/q29/simplified.txt | 105 +- .../approved-plans-v1_4/q3/explain.txt | 140 +- .../approved-plans-v1_4/q3/simplified.txt | 41 +- .../approved-plans-v1_4/q30/explain.txt | 353 +++++- .../approved-plans-v1_4/q30/simplified.txt | 104 +- .../approved-plans-v1_4/q31/explain.txt | 661 ++++++++-- .../approved-plans-v1_4/q31/simplified.txt | 200 +-- .../approved-plans-v1_4/q32/explain.txt | 203 ++- .../approved-plans-v1_4/q32/simplified.txt | 46 +- .../approved-plans-v1_4/q33/explain.txt | 441 ++++++- .../approved-plans-v1_4/q33/simplified.txt | 128 +- .../approved-plans-v1_4/q34/explain.txt | 235 +++- .../approved-plans-v1_4/q34/simplified.txt | 64 +- .../approved-plans-v1_4/q35/explain.txt | 321 ++++- .../approved-plans-v1_4/q35/simplified.txt | 84 +- .../approved-plans-v1_4/q36/explain.txt | 209 ++- .../approved-plans-v1_4/q36/simplified.txt | 60 +- .../approved-plans-v1_4/q37/explain.txt | 184 ++- .../approved-plans-v1_4/q37/simplified.txt | 53 +- .../approved-plans-v1_4/q38/explain.txt | 376 +++++- .../approved-plans-v1_4/q38/simplified.txt | 151 +-- .../approved-plans-v1_4/q39a/explain.txt | 341 ++++- .../approved-plans-v1_4/q39a/simplified.txt | 104 +- .../approved-plans-v1_4/q39b/explain.txt | 341 ++++- .../approved-plans-v1_4/q39b/simplified.txt | 104 +- .../approved-plans-v1_4/q4/explain.txt | 728 +++++++++-- .../approved-plans-v1_4/q4/simplified.txt | 265 ++-- .../approved-plans-v1_4/q40/explain.txt | 213 +++- .../approved-plans-v1_4/q40/simplified.txt | 64 +- .../approved-plans-v1_4/q41/explain.txt | 137 +- .../approved-plans-v1_4/q41/simplified.txt | 26 +- .../approved-plans-v1_4/q42/explain.txt | 140 +- .../approved-plans-v1_4/q42/simplified.txt | 43 +- .../approved-plans-v1_4/q43/explain.txt | 140 +- .../approved-plans-v1_4/q43/simplified.txt | 41 +- .../approved-plans-v1_4/q44/explain.txt | 296 ++++- .../approved-plans-v1_4/q44/simplified.txt | 82 +- .../approved-plans-v1_4/q45/explain.txt | 263 +++- .../approved-plans-v1_4/q45/simplified.txt | 76 +- .../approved-plans-v1_4/q46/explain.txt | 280 +++- .../approved-plans-v1_4/q46/simplified.txt | 85 +- .../approved-plans-v1_4/q47/explain.txt | 327 ++++- .../approved-plans-v1_4/q47/simplified.txt | 143 ++- .../approved-plans-v1_4/q48/explain.txt | 212 +++- .../approved-plans-v1_4/q48/simplified.txt | 63 +- .../approved-plans-v1_4/q49/explain.txt | 433 +++++++ .../approved-plans-v1_4/q49/simplified.txt | 126 ++ .../approved-plans-v1_4/q5/explain.txt | 509 ++++++-- .../approved-plans-v1_4/q5/simplified.txt | 173 +-- .../approved-plans-v1_4/q50/explain.txt | 213 +++- .../approved-plans-v1_4/q50/simplified.txt | 70 +- .../approved-plans-v1_4/q51/explain.txt | 267 +++- .../approved-plans-v1_4/q51/simplified.txt | 104 +- .../approved-plans-v1_4/q52/explain.txt | 140 +- .../approved-plans-v1_4/q52/simplified.txt | 41 +- .../approved-plans-v1_4/q53/explain.txt | 209 ++- .../approved-plans-v1_4/q53/simplified.txt | 60 +- .../approved-plans-v1_4/q54/explain.txt | 545 ++++++-- .../approved-plans-v1_4/q54/simplified.txt | 160 ++- .../approved-plans-v1_4/q55/explain.txt | 140 +- .../approved-plans-v1_4/q55/simplified.txt | 41 +- .../approved-plans-v1_4/q56/explain.txt | 441 ++++++- .../approved-plans-v1_4/q56/simplified.txt | 128 +- .../approved-plans-v1_4/q57/explain.txt | 327 ++++- .../approved-plans-v1_4/q57/simplified.txt | 143 ++- .../approved-plans-v1_4/q58/explain.txt | 576 +++++++-- .../approved-plans-v1_4/q58/simplified.txt | 164 ++- .../approved-plans-v1_4/q59/explain.txt | 290 ++++- .../approved-plans-v1_4/q59/simplified.txt | 82 +- .../approved-plans-v1_4/q6/explain.txt | 357 +++++- .../approved-plans-v1_4/q6/simplified.txt | 99 +- .../approved-plans-v1_4/q60/explain.txt | 441 ++++++- .../approved-plans-v1_4/q60/simplified.txt | 126 +- .../approved-plans-v1_4/q61/explain.txt | 462 ++++++- .../approved-plans-v1_4/q61/simplified.txt | 123 +- .../approved-plans-v1_4/q62/explain.txt | 213 +++- .../approved-plans-v1_4/q62/simplified.txt | 70 +- .../approved-plans-v1_4/q63/explain.txt | 209 ++- .../approved-plans-v1_4/q63/simplified.txt | 60 +- .../approved-plans-v1_4/q64/explain.txt | 1086 +++++++++++++--- .../approved-plans-v1_4/q64/simplified.txt | 331 ++--- .../approved-plans-v1_4/q65/explain.txt | 285 ++++- .../approved-plans-v1_4/q65/simplified.txt | 88 +- .../approved-plans-v1_4/q66/explain.txt | 362 +++++- .../approved-plans-v1_4/q66/simplified.txt | 108 +- .../approved-plans-v1_4/q67/explain.txt | 204 ++- .../approved-plans-v1_4/q67/simplified.txt | 67 +- .../approved-plans-v1_4/q68/explain.txt | 280 +++- .../approved-plans-v1_4/q68/simplified.txt | 85 +- .../approved-plans-v1_4/q69/explain.txt | 320 ++++- .../approved-plans-v1_4/q69/simplified.txt | 83 +- .../approved-plans-v1_4/q7/explain.txt | 223 +++- .../approved-plans-v1_4/q7/simplified.txt | 64 +- .../approved-plans-v1_4/q70/explain.txt | 308 ++++- .../approved-plans-v1_4/q70/simplified.txt | 105 +- .../approved-plans-v1_4/q71/explain.txt | 270 +++- .../approved-plans-v1_4/q71/simplified.txt | 81 +- .../approved-plans-v1_4/q72/explain.txt | 457 ++++++- .../approved-plans-v1_4/q72/simplified.txt | 136 +- .../approved-plans-v1_4/q73/explain.txt | 235 +++- .../approved-plans-v1_4/q73/simplified.txt | 60 +- .../approved-plans-v1_4/q74/explain.txt | 488 +++++-- .../approved-plans-v1_4/q74/simplified.txt | 181 ++- .../approved-plans-v1_4/q75/explain.txt | 762 +++++++++-- .../approved-plans-v1_4/q75/simplified.txt | 259 ++-- .../approved-plans-v1_4/q76/explain.txt | 246 +++- .../approved-plans-v1_4/q76/simplified.txt | 85 +- .../approved-plans-v1_4/q77/explain.txt | 618 +++++++-- .../approved-plans-v1_4/q77/simplified.txt | 208 ++- .../approved-plans-v1_4/q78/explain.txt | 400 +++++- .../approved-plans-v1_4/q78/simplified.txt | 123 +- .../approved-plans-v1_4/q79/explain.txt | 223 +++- .../approved-plans-v1_4/q79/simplified.txt | 68 +- .../approved-plans-v1_4/q8/explain.txt | 315 ++++- .../approved-plans-v1_4/q8/simplified.txt | 99 +- .../approved-plans-v1_4/q80/explain.txt | 648 ++++++++-- .../approved-plans-v1_4/q80/simplified.txt | 195 +-- .../approved-plans-v1_4/q81/explain.txt | 348 ++++- .../approved-plans-v1_4/q81/simplified.txt | 101 +- .../approved-plans-v1_4/q82/explain.txt | 184 ++- .../approved-plans-v1_4/q82/simplified.txt | 51 +- .../approved-plans-v1_4/q83/explain.txt | 411 +++++- .../approved-plans-v1_4/q83/simplified.txt | 123 +- .../approved-plans-v1_4/q84/explain.txt | 233 +++- .../approved-plans-v1_4/q84/simplified.txt | 58 +- .../approved-plans-v1_4/q85/explain.txt | 335 ++++- .../approved-plans-v1_4/q85/simplified.txt | 106 +- .../approved-plans-v1_4/q86/explain.txt | 165 ++- .../approved-plans-v1_4/q86/simplified.txt | 50 +- .../approved-plans-v1_4/q87/explain.txt | 375 +++++- .../approved-plans-v1_4/q87/simplified.txt | 138 +- .../approved-plans-v1_4/q88/explain.txt | 1123 ++++++++++++++--- .../approved-plans-v1_4/q88/simplified.txt | 242 ++-- .../approved-plans-v1_4/q89/explain.txt | 204 ++- .../approved-plans-v1_4/q89/simplified.txt | 67 +- .../approved-plans-v1_4/q9/explain.txt | 825 ++++++++++-- .../approved-plans-v1_4/q9/simplified.txt | 186 +-- .../approved-plans-v1_4/q90/explain.txt | 325 ++++- .../approved-plans-v1_4/q90/simplified.txt | 74 +- .../approved-plans-v1_4/q91/explain.txt | 307 ++++- .../approved-plans-v1_4/q91/simplified.txt | 82 +- .../approved-plans-v1_4/q92/explain.txt | 227 +++- .../approved-plans-v1_4/q92/simplified.txt | 62 +- .../approved-plans-v1_4/q93/explain.txt | 127 +- .../approved-plans-v1_4/q93/simplified.txt | 37 +- .../approved-plans-v1_4/q94/explain.txt | 270 +++- .../approved-plans-v1_4/q94/simplified.txt | 73 +- .../approved-plans-v1_4/q95/explain.txt | 370 +++++- .../approved-plans-v1_4/q95/simplified.txt | 95 +- .../approved-plans-v1_4/q96/explain.txt | 184 ++- .../approved-plans-v1_4/q96/simplified.txt | 41 +- .../approved-plans-v1_4/q97/explain.txt | 204 ++- .../approved-plans-v1_4/q97/simplified.txt | 89 +- .../approved-plans-v1_4/q98/explain.txt | 171 ++- .../approved-plans-v1_4/q98/simplified.txt | 54 +- .../approved-plans-v1_4/q99/explain.txt | 213 +++- .../approved-plans-v1_4/q99/simplified.txt | 64 +- .../goldstandard/IndexLogEntryCreator.scala | 18 +- .../goldstandard/MockSignatureProvider.scala | 16 + .../goldstandard/TPCDS_Hyperspace.scala | 16 + 209 files changed, 36733 insertions(+), 10431 deletions(-) create mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q0/explain.txt create mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q0/simplified.txt create mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt create mode 100644 src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q0/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q0/explain.txt new file mode 100644 index 000000000..6798acb39 --- /dev/null +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q0/explain.txt @@ -0,0 +1,11 @@ +== Physical Plan == +CollectLimit 100 ++- *(2) Project [d_year#1, ss_customer_sk#2] + +- *(2) BroadcastHashJoin [d_date_sk#3], [ss_sold_date_sk#4], Inner, BuildRight + :- *(2) Project [d_date_sk#3, d_year#1] + : +- *(2) Filter isnotnull(d_date_sk#3) + : +- *(2) FileScan parquet default.date_dim[d_date_sk#3,d_year#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) + +- *(1) Project [ss_sold_date_sk#4, ss_customer_sk#2] + +- *(1) Filter isnotnull(ss_sold_date_sk#4) + +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#4,ss_customer_sk#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q0/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q0/simplified.txt new file mode 100644 index 000000000..7055dbafb --- /dev/null +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q0/simplified.txt @@ -0,0 +1,13 @@ +CollectLimit + WholeStageCodegen + Project [d_year,ss_customer_sk] + BroadcastHashJoin [d_date_sk,ss_sold_date_sk] + Project [d_date_sk,d_year] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + BroadcastExchange #1 + WholeStageCodegen + Project [ss_customer_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt index 1462b13af..509ce3ac4 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/explain.txt @@ -1,49 +1,279 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST,cd_dep_count#6 ASC NULLS FIRST,cd_dep_employed_count#7 ASC NULLS FIRST,cd_dep_college_count#8 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#9,cd_purchase_estimate#4,cnt2#10,cd_credit_rating#5,cnt3#11,cd_dep_count#6,cnt4#12,cd_dep_employed_count#7,cnt5#13,cd_dep_college_count#8,cnt6#14]) -+- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[count(1)]) - +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8, 5) - +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8], functions=[partial_count(1)]) - +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] - +- *(9) BroadcastHashJoin [c_current_cdemo_sk#15], [cd_demo_sk#16], Inner, BuildRight - :- *(9) Project [c_current_cdemo_sk#15] - : +- *(9) BroadcastHashJoin [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight - : :- *(9) Project [c_current_cdemo_sk#15, c_current_addr_sk#17] - : : +- *(9) Filter (exists#19 || exists#20) - : : +- *(9) BroadcastHashJoin [c_customer_sk#21], [cs_ship_customer_sk#22], ExistenceJoin(exists#20), BuildRight - : : :- *(9) BroadcastHashJoin [c_customer_sk#21], [ws_bill_customer_sk#23], ExistenceJoin(exists#19), BuildRight - : : : :- *(9) BroadcastHashJoin [c_customer_sk#21], [ss_customer_sk#24], LeftSemi, BuildRight - : : : : :- *(9) Project [c_customer_sk#21, c_current_cdemo_sk#15, c_current_addr_sk#17] - : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#17) && isnotnull(c_current_cdemo_sk#15)) - : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#21,c_current_cdemo_sk#15,c_current_addr_sk#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [ss_customer_sk#24] - : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#25], [d_date_sk#26], Inner, BuildRight - : : : : :- *(2) Project [ss_sold_date_sk#25, ss_customer_sk#24] - : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#25) - : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#25,ss_customer_sk#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [d_date_sk#26] - : : : : +- *(1) Filter (((((isnotnull(d_year#27) && isnotnull(d_moy#28)) && (d_year#27 = 2002)) && (d_moy#28 >= 1)) && (d_moy#28 <= 4)) && isnotnull(d_date_sk#26)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#26,d_year#27,d_moy#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThan..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [ws_bill_customer_sk#23] - : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#26], Inner, BuildRight - : : : :- *(4) Project [ws_sold_date_sk#29, ws_bill_customer_sk#23] - : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#29) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_bill_customer_sk#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [cs_ship_customer_sk#22] - : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#30], [d_date_sk#26], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#30, cs_ship_customer_sk#22] - : : : +- *(6) Filter isnotnull(cs_sold_date_sk#30) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_ship_customer_sk#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [ca_address_sk#18] - : +- *(7) Filter (ca_county#31 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) && isnotnull(ca_address_sk#18)) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#18,ca_county#31] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [In(ca_county, [Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County]), IsNo..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [cd_demo_sk#16, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, cd_dep_count#6, cd_dep_employed_count#7, cd_dep_college_count#8] - +- *(8) Filter isnotnull(cd_demo_sk#16) - +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#16,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5,cd_dep_count#6,cd_dep_employed_count#7,cd_dep_college_count#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#6, ss_customer_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] + +(6) Filter [codegen id : 2] +Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] +Condition : isnotnull(ss_sold_date_sk#6) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_moy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] +Condition : (((((isnotnull(d_year#9) AND isnotnull(d_moy#10)) AND (d_year#9 = 2002)) AND (d_moy#10 >= 1)) AND (d_moy#10 <= 4)) AND isnotnull(d_date_sk#8)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#9, d_moy#10] + +(11) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(13) Project [codegen id : 2] +Output [1]: [ss_customer_sk#7] +Input [3]: [ss_sold_date_sk#6, ss_customer_sk#7, d_date_sk#8] + +(14) BroadcastExchange +Input [1]: [ss_customer_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#7] +Join condition: None + +(16) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] + +(18) Filter [codegen id : 4] +Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] +Condition : isnotnull(ws_sold_date_sk#13) + +(19) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#8] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#13] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(21) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#14] +Input [3]: [ws_sold_date_sk#13, ws_bill_customer_sk#14, d_date_sk#8] + +(22) BroadcastExchange +Input [1]: [ws_bill_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(23) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#14] +Join condition: None + +(24) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 6] +Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] + +(26) Filter [codegen id : 6] +Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] +Condition : isnotnull(cs_sold_date_sk#16) + +(27) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#8] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(29) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#17] +Input [3]: [cs_sold_date_sk#16, cs_ship_customer_sk#17, d_date_sk#8] + +(30) BroadcastExchange +Input [1]: [cs_ship_customer_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(31) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#17] +Join condition: None + +(32) Filter [codegen id : 9] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(33) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(34) Scan parquet default.customer_address +Output [2]: [ca_address_sk#19, ca_county#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_county, [Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_county#20] + +(36) Filter [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_county#20] +Condition : (ca_county#20 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#19)) + +(37) Project [codegen id : 7] +Output [1]: [ca_address_sk#19] +Input [2]: [ca_address_sk#19, ca_county#20] + +(38) BroadcastExchange +Input [1]: [ca_address_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#19] +Join condition: None + +(40) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#4] +Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#19] + +(41) Scan parquet default.customer_demographics +Output [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 8] +Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] + +(43) Filter [codegen id : 8] +Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Condition : isnotnull(cd_demo_sk#22) + +(44) BroadcastExchange +Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] + +(45) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#22] +Join condition: None + +(46) Project [codegen id : 9] +Output [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] + +(47) HashAggregate [codegen id : 9] +Input [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#32] +Results [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] + +(48) Exchange +Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] +Arguments: hashpartitioning(cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, 5), true, [id=#34] + +(49) HashAggregate [codegen id : 10] +Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] +Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#35] +Results [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, count(1)#35 AS cnt1#36, cd_purchase_estimate#26, count(1)#35 AS cnt2#37, cd_credit_rating#27, count(1)#35 AS cnt3#38, cd_dep_count#28, count(1)#35 AS cnt4#39, cd_dep_employed_count#29, count(1)#35 AS cnt5#40, cd_dep_college_count#30, count(1)#35 AS cnt6#41] + +(50) TakeOrderedAndProject +Input [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] +Arguments: 100, [cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_education_status#25 ASC NULLS FIRST, cd_purchase_estimate#26 ASC NULLS FIRST, cd_credit_rating#27 ASC NULLS FIRST, cd_dep_count#28 ASC NULLS FIRST, cd_dep_employed_count#29 ASC NULLS FIRST, cd_dep_college_count#30 ASC NULLS FIRST], [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/simplified.txt index bba77af4d..2eb1bd3c5 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q10/simplified.txt @@ -1,65 +1,74 @@ -TakeOrderedAndProject [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] - WholeStageCodegen - HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count,count(1)] [cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count,count(1)] +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,cnt1,cnt2,cnt3,cnt4,cnt5,cnt6] + WholeStageCodegen (10) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count] [count(1),cnt1,cnt2,cnt3,cnt4,cnt5,cnt6,count] InputAdapter - Exchange [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 - WholeStageCodegen - HashAggregate [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count,count] [count,count] - Project [cd_credit_rating,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_current_cdemo_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] Filter [exists,exists] BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] - Filter [c_current_addr_sk,c_current_cdemo_sk] - Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (2) Project [ss_customer_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (4) Project [ws_bill_customer_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_customer_sk,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (6) Project [cs_ship_customer_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ship_customer_sk,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] [cs_ship_customer_sk,cs_sold_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_customer_sk] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (7) Project [ca_address_sk] - Filter [ca_address_sk,ca_county] - Scan parquet default.customer_address [ca_address_sk,ca_county] [ca_address_sk,ca_county] + Filter [ca_county,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] - Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [cd_credit_rating,cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/explain.txt index c17229a91..246a5d5b0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/explain.txt @@ -1,81 +1,415 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[customer_preferred_cust_flag#1 ASC NULLS FIRST], output=[customer_preferred_cust_flag#1]) -+- *(17) Project [customer_preferred_cust_flag#1] - +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#3], Inner, BuildRight, (CASE WHEN (year_total#4 > 0.00) THEN CheckOverflow((promote_precision(year_total#5) / promote_precision(year_total#4)), DecimalType(38,20)) ELSE null END > CASE WHEN (year_total#6 > 0.00) THEN CheckOverflow((promote_precision(year_total#7) / promote_precision(year_total#6)), DecimalType(38,20)) ELSE null END) - :- *(17) Project [customer_id#2, year_total#6, customer_preferred_cust_flag#1, year_total#7, year_total#4] - : +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#8], Inner, BuildRight - : :- *(17) Project [customer_id#2, year_total#6, customer_preferred_cust_flag#1, year_total#7] - : : +- *(17) BroadcastHashJoin [customer_id#2], [customer_id#9], Inner, BuildRight - : : :- Union - : : : :- *(4) Filter (isnotnull(year_total#6) && (year_total#6 > 0.00)) - : : : : +- *(4) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) - : : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 5) - : : : : +- *(3) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) - : : : : +- *(3) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] - : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight - : : : : :- *(3) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_sold_date_sk#20, ss_ext_discount_amt#19, ss_ext_list_price#18] - : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#22], [ss_customer_sk#23], Inner, BuildRight - : : : : : :- *(3) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] - : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) - : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : : +- LocalTableScan , [customer_id#24, year_total#25] - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : +- Union - : : :- *(8) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) - : : : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, 5) - : : : +- *(7) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, d_year#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#18 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#19 as decimal(8,2)))), DecimalType(8,2))))]) - : : : +- *(7) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#19, ss_ext_list_price#18, d_year#13] - : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight - : : : :- *(7) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_sold_date_sk#20, ss_ext_discount_amt#19, ss_ext_list_price#18] - : : : : +- *(7) BroadcastHashJoin [c_customer_sk#22], [ss_customer_sk#23], Inner, BuildRight - : : : : :- *(7) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] - : : : : : +- *(7) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) - : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : +- LocalTableScan , [customer_id#24, customer_preferred_cust_flag#26, year_total#25] - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- Union - : :- LocalTableScan , [customer_id#8, year_total#4] - : +- *(12) Filter (isnotnull(year_total#25) && (year_total#25 > 0.00)) - : +- *(12) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) - : +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 5) - : +- *(11) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) - : +- *(11) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] - : +- *(11) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight - : :- *(11) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_sold_date_sk#29, ws_ext_discount_amt#28, ws_ext_list_price#27] - : : +- *(11) BroadcastHashJoin [c_customer_sk#22], [ws_bill_customer_sk#30], Inner, BuildRight - : : :- *(11) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] - : : : +- *(11) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) - : : : +- *(11) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#3, year_total#5] - +- *(16) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) - +- Exchange hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13, 5) - +- *(15) HashAggregate(keys=[c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#13], functions=[partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#28 as decimal(8,2)))), DecimalType(8,2))))]) - +- *(15) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_ext_discount_amt#28, ws_ext_list_price#27, d_year#13] - +- *(15) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#21], Inner, BuildRight - :- *(15) Project [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ws_sold_date_sk#29, ws_ext_discount_amt#28, ws_ext_list_price#27] - : +- *(15) BroadcastHashJoin [c_customer_sk#22], [ws_bill_customer_sk#30], Inner, BuildRight - : :- *(15) Project [c_customer_sk#22, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] - : : +- *(15) Filter (isnotnull(c_customer_sk#22) && isnotnull(c_customer_id#10)) - : : +- *(15) FileScan parquet default.customer[c_customer_sk#22,c_customer_id#10,c_first_name#11,c_last_name#12,c_preferred_cust_flag#14,c_birth_country#15,c_login#16,c_email_address#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(3) Filter [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(6) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] +Condition : (isnotnull(ss_customer_sk#10) AND isnotnull(ss_sold_date_sk#9)) + +(7) BroadcastExchange +Input [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#13] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#10] +Join condition: None + +(9) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2001)) AND isnotnull(d_date_sk#14)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#14, d_year#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(15) Project [codegen id : 3] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12, d_date_sk#14, d_year#15] + +(16) HashAggregate [codegen id : 3] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#17] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] + +(17) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), true, [id=#19] + +(18) HashAggregate [codegen id : 16] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#20] +Results [2]: [c_customer_id#2 AS customer_id#21, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#20,18,2) AS year_total#22] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#21, year_total#22] +Condition : (isnotnull(year_total#22) AND (year_total#22 > 0.00)) + +(20) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(22) Filter [codegen id : 6] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [4]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#10] +Join condition: None + +(25) Project [codegen id : 6] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_list_price#12] + +(26) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#14, d_year#15] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#14, d_year#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(31) Project [codegen id : 6] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_list_price#12, d_date_sk#14, d_year#15] + +(32) HashAggregate [codegen id : 6] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_list_price#12, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#24] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] + +(33) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), true, [id=#26] + +(34) HashAggregate [codegen id : 7] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#25] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#27] +Results [3]: [c_customer_id#2 AS customer_id#28, c_preferred_cust_flag#5 AS customer_preferred_cust_flag#29, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#12 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(8,2)))), DecimalType(8,2), true)))#27,18,2) AS year_total#30] + +(35) BroadcastExchange +Input [3]: [customer_id#28, customer_preferred_cust_flag#29, year_total#30] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#31] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#21] +Right keys [1]: [customer_id#28] +Join condition: None + +(37) Project [codegen id : 16] +Output [4]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30] +Input [5]: [customer_id#21, year_total#22, customer_id#28, customer_preferred_cust_flag#29, year_total#30] + +(38) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(40) Filter [codegen id : 10] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(41) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(42) ColumnarToRow [codegen id : 8] +Input [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] + +(43) Filter [codegen id : 8] +Input [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] +Condition : (isnotnull(ws_bill_customer_sk#33) AND isnotnull(ws_sold_date_sk#32)) + +(44) BroadcastExchange +Input [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#36] + +(45) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#33] +Join condition: None + +(46) Project [codegen id : 10] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] + +(47) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#14, d_year#15] + +(48) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(49) Project [codegen id : 10] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35, d_date_sk#14, d_year#15] + +(50) HashAggregate [codegen id : 10] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#37] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#38] + +(51) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#38] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, 5), true, [id=#39] + +(52) HashAggregate [codegen id : 11] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#38] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#40] +Results [2]: [c_customer_id#2 AS customer_id#41, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#40,18,2) AS year_total#42] + +(53) Filter [codegen id : 11] +Input [2]: [customer_id#41, year_total#42] +Condition : (isnotnull(year_total#42) AND (year_total#42 > 0.00)) + +(54) Project [codegen id : 11] +Output [2]: [customer_id#41 AS customer_id#43, year_total#42 AS year_total#44] +Input [2]: [customer_id#41, year_total#42] + +(55) BroadcastExchange +Input [2]: [customer_id#43, year_total#44] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#45] + +(56) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#21] +Right keys [1]: [customer_id#43] +Join condition: None + +(57) Project [codegen id : 16] +Output [5]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30, year_total#44] +Input [6]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30, customer_id#43, year_total#44] + +(58) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 14] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(60) Filter [codegen id : 14] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(61) ReusedExchange [Reuses operator id: 44] +Output [4]: [ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] + +(62) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#33] +Join condition: None + +(63) Project [codegen id : 14] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35] +Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_bill_customer_sk#33, ws_ext_discount_amt#34, ws_ext_list_price#35] + +(64) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#14, d_year#15] + +(65) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(66) Project [codegen id : 14] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#32, ws_ext_discount_amt#34, ws_ext_list_price#35, d_date_sk#14, d_year#15] + +(67) HashAggregate [codegen id : 14] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#34, ws_ext_list_price#35, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum#46] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#47] + +(68) Exchange +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#47] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, 5), true, [id=#48] + +(69) HashAggregate [codegen id : 15] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15, sum#47] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#15] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#49] +Results [2]: [c_customer_id#2 AS customer_id#50, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#35 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#34 as decimal(8,2)))), DecimalType(8,2), true)))#49,18,2) AS year_total#51] + +(70) BroadcastExchange +Input [2]: [customer_id#50, year_total#51] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#52] + +(71) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#21] +Right keys [1]: [customer_id#50] +Join condition: (CASE WHEN (year_total#44 > 0.00) THEN CheckOverflow((promote_precision(year_total#51) / promote_precision(year_total#44)), DecimalType(38,20), true) ELSE null END > CASE WHEN (year_total#22 > 0.00) THEN CheckOverflow((promote_precision(year_total#30) / promote_precision(year_total#22)), DecimalType(38,20), true) ELSE null END) + +(72) Project [codegen id : 16] +Output [1]: [customer_preferred_cust_flag#29] +Input [7]: [customer_id#21, year_total#22, customer_preferred_cust_flag#29, year_total#30, year_total#44, customer_id#50, year_total#51] + +(73) TakeOrderedAndProject +Input [1]: [customer_preferred_cust_flag#29] +Arguments: 100, [customer_preferred_cust_flag#29 ASC NULLS FIRST], [customer_preferred_cust_flag#29] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/simplified.txt index b2c7f30e5..3850cea62 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q11/simplified.txt @@ -1,109 +1,108 @@ TakeOrderedAndProject [customer_preferred_cust_flag] - WholeStageCodegen + WholeStageCodegen (16) Project [customer_preferred_cust_flag] BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] - Project [customer_id,customer_preferred_cust_flag,year_total,year_total,year_total] + Project [customer_id,year_total,customer_preferred_cust_flag,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] - Project [customer_id,customer_preferred_cust_flag,year_total,year_total] + Project [customer_id,year_total,customer_preferred_cust_flag,year_total] BroadcastHashJoin [customer_id,customer_id] - InputAdapter - Union - WholeStageCodegen - Filter [year_total] - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2))))] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),year_total] - InputAdapter - Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #1 - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,sum,sum] [sum,sum] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_sold_date_sk,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [customer_id,year_total] [customer_id,year_total] + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - Union - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2))))] [customer_id,customer_preferred_cust_flag,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),year_total] - InputAdapter - Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #5 - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,sum,sum] [sum,sum] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - InputAdapter - ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk] #2 - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [customer_id,customer_preferred_cust_flag,year_total] [customer_id,customer_preferred_cust_flag,year_total] + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,customer_preferred_cust_flag,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_list_price,ss_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_sold_date_sk,ss_ext_discount_amt,ss_ext_list_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #7 - Union - LocalTableScan [customer_id,year_total] [customer_id,year_total] - WholeStageCodegen + WholeStageCodegen (11) + Project [customer_id,year_total] Filter [year_total] - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2))))] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum] InputAdapter - Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #8 - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_sold_date_sk,ws_ext_discount_amt,ws_ext_list_price] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] InputAdapter BroadcastExchange #9 - WholeStageCodegen - Project [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] + WholeStageCodegen (8) + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 + ReusedExchange [d_date_sk,d_year] #3 InputAdapter BroadcastExchange #10 - Union - LocalTableScan [customer_id,year_total] [customer_id,year_total] - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2))))] [customer_id,sum,sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),year_total] - InputAdapter - Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #11 - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum,ws_ext_discount_amt,ws_ext_list_price] [sum,sum] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - InputAdapter - ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk] #9 - InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2), true))),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_discount_amt] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_sold_date_sk,ws_ext_discount_amt,ws_ext_list_price] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/explain.txt index fd6e68bce..f786839e8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/explain.txt @@ -1,24 +1,137 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 ASC NULLS FIRST,i_item_id#3 ASC NULLS FIRST,i_item_desc#4 ASC NULLS FIRST,revenueratio#5 ASC NULLS FIRST], output=[i_item_desc#4,i_category#1,i_class#2,i_current_price#6,itemrevenue#7,revenueratio#5]) -+- *(6) Project [i_item_desc#4, i_category#1, i_class#2, i_current_price#6, itemrevenue#7, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#5, i_item_id#3] - +- Window [sum(_w1#10) windowspecdefinition(i_class#2, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#2] - +- *(5) Sort [i_class#2 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_class#2, 5) - +- *(4) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[sum(UnscaledValue(ws_ext_sales_price#11))]) - +- Exchange hashpartitioning(i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6, 5) - +- *(3) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#11))]) - +- *(3) Project [ws_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] - +- *(3) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - :- *(3) Project [ws_sold_date_sk#12, ws_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] - : +- *(3) BroadcastHashJoin [ws_item_sk#14], [i_item_sk#15], Inner, BuildRight - : :- *(3) Project [ws_sold_date_sk#12, ws_item_sk#14, ws_ext_sales_price#11] - : : +- *(3) Filter (isnotnull(ws_item_sk#14) && isnotnull(ws_sold_date_sk#12)) - : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#14,ws_ext_sales_price#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] - : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) - : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) - +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.web_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet default.date_dim (10) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3] +Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ws_sold_date_sk#1, ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_date#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] +Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ws_sold_date_sk#1, ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ws_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#3))#17] +Results [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#3))#17,17,2) AS _w1#20, i_item_id#5] + +(20) Exchange +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] + +(21) Sort [codegen id : 5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#5] +Input [9]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5, _we0#22] + +(24) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt index b815e3d91..f67bfe65f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q12/simplified.txt @@ -1,34 +1,38 @@ -TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] - WholeStageCodegen - Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (6) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] InputAdapter Window [_w1,i_class] - WholeStageCodegen + WholeStageCodegen (5) Sort [i_class] InputAdapter Exchange [i_class] #1 - WholeStageCodegen - HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ws_ext_sales_price))] + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,_w1,sum] InputAdapter - Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 - WholeStageCodegen - HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ws_ext_sales_price,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] - Filter [i_category,i_item_sk] - Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt index bf01d0881..d0c4e0e49 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/explain.txt @@ -1,37 +1,216 @@ == Physical Plan == -*(7) HashAggregate(keys=[], functions=[avg(cast(ss_quantity#1 as bigint)), avg(UnscaledValue(ss_ext_sales_price#2)), avg(UnscaledValue(ss_ext_wholesale_cost#3)), sum(UnscaledValue(ss_ext_wholesale_cost#3))]) -+- Exchange SinglePartition - +- *(6) HashAggregate(keys=[], functions=[partial_avg(cast(ss_quantity#1 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#2)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#3)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#3))]) - +- *(6) Project [ss_quantity#1, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] - +- *(6) BroadcastHashJoin [ss_hdemo_sk#4], [hd_demo_sk#5], Inner, BuildRight, (((((((cd_marital_status#6 = M) && (cd_education_status#7 = Advanced Degree)) && (ss_sales_price#8 >= 100.00)) && (ss_sales_price#8 <= 150.00)) && (hd_dep_count#9 = 3)) || (((((cd_marital_status#6 = S) && (cd_education_status#7 = College)) && (ss_sales_price#8 >= 50.00)) && (ss_sales_price#8 <= 100.00)) && (hd_dep_count#9 = 1))) || (((((cd_marital_status#6 = W) && (cd_education_status#7 = 2 yr Degree)) && (ss_sales_price#8 >= 150.00)) && (ss_sales_price#8 <= 200.00)) && (hd_dep_count#9 = 1))) - :- *(6) Project [ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, cd_marital_status#6, cd_education_status#7] - : +- *(6) BroadcastHashJoin [ss_cdemo_sk#10], [cd_demo_sk#11], Inner, BuildRight - : :- *(6) Project [ss_cdemo_sk#10, ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] - : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3] - : : : +- *(6) BroadcastHashJoin [ss_addr_sk#14], [ca_address_sk#15], Inner, BuildRight, ((((ca_state#16 IN (TX,OH) && (ss_net_profit#17 >= 100.00)) && (ss_net_profit#17 <= 200.00)) || ((ca_state#16 IN (OR,NM,KY) && (ss_net_profit#17 >= 150.00)) && (ss_net_profit#17 <= 300.00))) || ((ca_state#16 IN (VA,TX,MS) && (ss_net_profit#17 >= 50.00)) && (ss_net_profit#17 <= 250.00))) - : : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_addr_sk#14, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, ss_net_profit#17] - : : : : +- *(6) BroadcastHashJoin [ss_store_sk#18], [s_store_sk#19], Inner, BuildRight - : : : : :- *(6) Project [ss_sold_date_sk#12, ss_cdemo_sk#10, ss_hdemo_sk#4, ss_addr_sk#14, ss_store_sk#18, ss_quantity#1, ss_sales_price#8, ss_ext_sales_price#2, ss_ext_wholesale_cost#3, ss_net_profit#17] - : : : : : +- *(6) Filter ((((isnotnull(ss_store_sk#18) && isnotnull(ss_addr_sk#14)) && isnotnull(ss_sold_date_sk#12)) && isnotnull(ss_cdemo_sk#10)) && isnotnull(ss_hdemo_sk#4)) - : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_cdemo_sk#10,ss_hdemo_sk#4,ss_addr_sk#14,ss_store_sk#18,ss_quantity#1,ss_sales_price#8,ss_ext_sales_price#2,ss_ext_wholesale_cost#3,ss_net_profit#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [ca_address_sk#15, ca_state#16] - : : : +- *(2) Filter ((isnotnull(ca_country#20) && (ca_country#20 = United States)) && isnotnull(ca_address_sk#15)) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#13] - : : +- *(3) Filter ((isnotnull(d_year#21) && (d_year#21 = 2001)) && isnotnull(d_date_sk#13)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#13,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [cd_demo_sk#11, cd_marital_status#6, cd_education_status#7] - : +- *(4) Filter isnotnull(cd_demo_sk#11) - : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#6,cd_education_status#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [hd_demo_sk#5, hd_dep_count#9] - +- *(5) Filter isnotnull(hd_demo_sk#5) - +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#5,hd_dep_count#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file +* HashAggregate (38) ++- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store (4) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.customer_address (10) + : : +- BroadcastExchange (21) + : : +- * Project (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.date_dim (17) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.customer_demographics (24) + +- BroadcastExchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.household_demographics (30) + + +(1) Scan parquet default.store_sales +Output [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_hdemo_sk), Or(Or(And(GreaterThanOrEqual(ss_net_profit,100.00),LessThanOrEqual(ss_net_profit,200.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,300.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,250.00))), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] + +(3) Filter [codegen id : 6] +Input [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] +Condition : ((((((isnotnull(ss_store_sk#5) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_cdemo_sk#2)) AND isnotnull(ss_hdemo_sk#3)) AND ((((ss_net_profit#10 >= 100.00) AND (ss_net_profit#10 <= 200.00)) OR ((ss_net_profit#10 >= 150.00) AND (ss_net_profit#10 <= 300.00))) OR ((ss_net_profit#10 >= 50.00) AND (ss_net_profit#10 <= 250.00)))) AND ((((ss_sales_price#7 >= 100.00) AND (ss_sales_price#7 <= 150.00)) OR ((ss_sales_price#7 >= 50.00) AND (ss_sales_price#7 <= 100.00))) OR ((ss_sales_price#7 >= 150.00) AND (ss_sales_price#7 <= 200.00)))) + +(4) Scan parquet default.store +Output [1]: [s_store_sk#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [s_store_sk#11] + +(6) Filter [codegen id : 1] +Input [1]: [s_store_sk#11] +Condition : isnotnull(s_store_sk#11) + +(7) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#11] +Join condition: None + +(9) Project [codegen id : 6] +Output [9]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10] +Input [11]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, s_store_sk#11] + +(10) Scan parquet default.customer_address +Output [3]: [ca_address_sk#13, ca_state#14, ca_country#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [TX,OH]),In(ca_state, [OR,NM,KY])),In(ca_state, [VA,TX,MS]))] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] +Condition : (((isnotnull(ca_country#15) AND (ca_country#15 = United States)) AND isnotnull(ca_address_sk#13)) AND ((ca_state#14 IN (TX,OH) OR ca_state#14 IN (OR,NM,KY)) OR ca_state#14 IN (VA,TX,MS))) + +(13) Project [codegen id : 2] +Output [2]: [ca_address_sk#13, ca_state#14] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] + +(14) BroadcastExchange +Input [2]: [ca_address_sk#13, ca_state#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_addr_sk#4] +Right keys [1]: [ca_address_sk#13] +Join condition: ((((ca_state#14 IN (TX,OH) AND (ss_net_profit#10 >= 100.00)) AND (ss_net_profit#10 <= 200.00)) OR ((ca_state#14 IN (OR,NM,KY) AND (ss_net_profit#10 >= 150.00)) AND (ss_net_profit#10 <= 300.00))) OR ((ca_state#14 IN (VA,TX,MS) AND (ss_net_profit#10 >= 50.00)) AND (ss_net_profit#10 <= 250.00))) + +(16) Project [codegen id : 6] +Output [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] +Input [11]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, ca_address_sk#13, ca_state#14] + +(17) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_year#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#17, d_year#18] + +(19) Filter [codegen id : 3] +Input [2]: [d_date_sk#17, d_year#18] +Condition : ((isnotnull(d_year#18) AND (d_year#18 = 2001)) AND isnotnull(d_date_sk#17)) + +(20) Project [codegen id : 3] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_year#18] + +(21) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(23) Project [codegen id : 6] +Output [6]: [ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] +Input [8]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, d_date_sk#17] + +(24) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] + +(26) Filter [codegen id : 4] +Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Condition : (isnotnull(cd_demo_sk#20) AND ((((cd_marital_status#21 = M) AND (cd_education_status#22 = Advanced Degree)) OR ((cd_marital_status#21 = S) AND (cd_education_status#22 = College))) OR ((cd_marital_status#21 = W) AND (cd_education_status#22 = 2 yr Degree)))) + +(27) BroadcastExchange +Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#20] +Join condition: ((((((cd_marital_status#21 = M) AND (cd_education_status#22 = Advanced Degree)) AND (ss_sales_price#7 >= 100.00)) AND (ss_sales_price#7 <= 150.00)) OR ((((cd_marital_status#21 = S) AND (cd_education_status#22 = College)) AND (ss_sales_price#7 >= 50.00)) AND (ss_sales_price#7 <= 100.00))) OR ((((cd_marital_status#21 = W) AND (cd_education_status#22 = 2 yr Degree)) AND (ss_sales_price#7 >= 150.00)) AND (ss_sales_price#7 <= 200.00))) + +(29) Project [codegen id : 6] +Output [7]: [ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, cd_marital_status#21, cd_education_status#22] +Input [9]: [ss_cdemo_sk#2, ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] + +(30) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#24, hd_dep_count#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), Or(Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1)),EqualTo(hd_dep_count,1))] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [hd_demo_sk#24, hd_dep_count#25] + +(32) Filter [codegen id : 5] +Input [2]: [hd_demo_sk#24, hd_dep_count#25] +Condition : (isnotnull(hd_demo_sk#24) AND (((hd_dep_count#25 = 3) OR (hd_dep_count#25 = 1)) OR (hd_dep_count#25 = 1))) + +(33) BroadcastExchange +Input [2]: [hd_demo_sk#24, hd_dep_count#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#24] +Join condition: (((((((cd_marital_status#21 = M) AND (cd_education_status#22 = Advanced Degree)) AND (ss_sales_price#7 >= 100.00)) AND (ss_sales_price#7 <= 150.00)) AND (hd_dep_count#25 = 3)) OR (((((cd_marital_status#21 = S) AND (cd_education_status#22 = College)) AND (ss_sales_price#7 >= 50.00)) AND (ss_sales_price#7 <= 100.00)) AND (hd_dep_count#25 = 1))) OR (((((cd_marital_status#21 = W) AND (cd_education_status#22 = 2 yr Degree)) AND (ss_sales_price#7 >= 150.00)) AND (ss_sales_price#7 <= 200.00)) AND (hd_dep_count#25 = 1))) + +(35) Project [codegen id : 6] +Output [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] +Input [9]: [ss_hdemo_sk#3, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, cd_marital_status#21, cd_education_status#22, hd_demo_sk#24, hd_dep_count#25] + +(36) HashAggregate [codegen id : 6] +Input [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9] +Keys: [] +Functions [4]: [partial_avg(cast(ss_quantity#6 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#8)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#9)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#9))] +Aggregate Attributes [7]: [sum#27, count#28, sum#29, count#30, sum#31, count#32, sum#33] +Results [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] + +(37) Exchange +Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] +Arguments: SinglePartition, true, [id=#41] + +(38) HashAggregate [codegen id : 7] +Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40] +Keys: [] +Functions [4]: [avg(cast(ss_quantity#6 as bigint)), avg(UnscaledValue(ss_ext_sales_price#8)), avg(UnscaledValue(ss_ext_wholesale_cost#9)), sum(UnscaledValue(ss_ext_wholesale_cost#9))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#6 as bigint))#42, avg(UnscaledValue(ss_ext_sales_price#8))#43, avg(UnscaledValue(ss_ext_wholesale_cost#9))#44, sum(UnscaledValue(ss_ext_wholesale_cost#9))#45] +Results [4]: [avg(cast(ss_quantity#6 as bigint))#42 AS avg(ss_quantity)#46, cast((avg(UnscaledValue(ss_ext_sales_price#8))#43 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#47, cast((avg(UnscaledValue(ss_ext_wholesale_cost#9))#44 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#48, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#9))#45,17,2) AS sum(ss_ext_wholesale_cost)#49] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt index cd146d06c..6e3a322d8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q13/simplified.txt @@ -1,49 +1,57 @@ -WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),avg(cast(ss_quantity as bigint)),count,count,count,sum,sum,sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost))] [avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),avg(cast(ss_quantity as bigint)),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),avg(ss_quantity),count,count,count,sum,sum,sum,sum,sum(UnscaledValue(ss_ext_wholesale_cost)),sum(ss_ext_wholesale_cost)] +WholeStageCodegen (7) + HashAggregate [sum,count,sum,count,sum,count,sum] [avg(cast(ss_quantity as bigint)),avg(UnscaledValue(ss_ext_sales_price)),avg(UnscaledValue(ss_ext_wholesale_cost)),sum(UnscaledValue(ss_ext_wholesale_cost)),avg(ss_quantity),avg(ss_ext_sales_price),avg(ss_ext_wholesale_cost),sum(ss_ext_wholesale_cost),sum,count,sum,count,sum,count,sum] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [count,count,count,count,count,count,ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] - Project [ss_ext_sales_price,ss_ext_wholesale_cost,ss_quantity] - BroadcastHashJoin [cd_education_status,cd_marital_status,hd_demo_sk,hd_dep_count,ss_hdemo_sk,ss_sales_price] - Project [cd_education_status,cd_marital_status,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] - Project [ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] - BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] - Project [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_addr_sk,ss_cdemo_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_addr_sk,ss_cdemo_sk,ss_ext_sales_price,ss_ext_wholesale_cost,ss_hdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (6) + HashAggregate [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] [sum,count,sum,count,sum,count,sum,sum,count,sum,count,sum,count,sum] + Project [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price,hd_dep_count] + Project [ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,cd_marital_status,cd_education_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price] + Project [ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost] + BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] + Project [ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_addr_sk,ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] + WholeStageCodegen (1) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [ca_address_sk,ca_state] - Filter [ca_address_sk,ca_country] - Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] + Filter [ca_country,ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [cd_demo_sk,cd_education_status,cd_marital_status] - Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] + WholeStageCodegen (4) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [hd_demo_sk,hd_dep_count] - Filter [hd_demo_sk] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] + WholeStageCodegen (5) + Filter [hd_demo_sk,hd_dep_count] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt index 0e062f45f..3f0cc9e7a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/explain.txt @@ -1,192 +1,798 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sum(sales)#5,sum(number_sales)#6]) -+- *(80) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[sum(sales#8), sum(number_sales#9)]) - +- Exchange hashpartitioning(channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7, 5) - +- *(79) HashAggregate(keys=[channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7], functions=[partial_sum(sales#8), partial_sum(number_sales#9)]) - +- *(79) Expand [List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13, 0), List(sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, null, 1), List(sales#8, number_sales#9, channel#10, i_brand_id#11, null, null, 3), List(sales#8, number_sales#9, channel#10, null, null, null, 7), List(sales#8, number_sales#9, null, null, null, null, 15)], [sales#8, number_sales#9, channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, spark_grouping_id#7] - +- Union - :- *(26) Project [sales#8, number_sales#9, channel#10, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(26) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2)))#16 as decimal(32,6)) > cast(Subquery subquery1662 as decimal(32,6)))) - : : +- Subquery subquery1662 - : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Exchange SinglePartition - : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Union - : : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] - : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] - : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#20] - : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] - : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight - : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] - : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) - : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] - : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] - : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(26) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) - : +- *(25) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#14 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#15 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) - : +- *(25) Project [ss_quantity#14, ss_list_price#15, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(25) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : :- *(25) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15, i_brand_id#11, i_class_id#12, i_category_id#13] - : : +- *(25) BroadcastHashJoin [ss_item_sk#32], [i_item_sk#33], Inner, BuildRight - : : :- *(25) BroadcastHashJoin [ss_item_sk#32], [ss_item_sk#34], LeftSemi, BuildRight - : : : :- *(25) Project [ss_sold_date_sk#19, ss_item_sk#32, ss_quantity#14, ss_list_price#15] - : : : : +- *(25) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) - : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(11) Project [i_item_sk#33 AS ss_item_sk#34] - : : : +- *(11) BroadcastHashJoin [i_brand_id#11, i_class_id#12, i_category_id#13], [brand_id#35, class_id#36, category_id#37], Inner, BuildRight - : : : :- *(11) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : +- *(11) Filter ((isnotnull(i_class_id#12) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) - : : : : +- *(11) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) - : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) - : : : +- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) - : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) - : : : :- *(10) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) - : : : : +- Exchange hashpartitioning(brand_id#35, class_id#36, category_id#37, 5) - : : : : +- *(6) HashAggregate(keys=[brand_id#35, class_id#36, category_id#37], functions=[]) - : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#35, 0), coalesce(class_id#36, 0), coalesce(category_id#37, 0)], [coalesce(i_brand_id#11, 0), coalesce(i_class_id#12, 0), coalesce(i_category_id#13, 0)], LeftSemi, BuildRight, (((brand_id#35 <=> i_brand_id#11) && (class_id#36 <=> i_class_id#12)) && (category_id#37 <=> i_category_id#13)) - : : : : :- *(6) Project [i_brand_id#11 AS brand_id#35, i_class_id#12 AS class_id#36, i_category_id#13 AS category_id#37] - : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : : : : :- *(6) Project [ss_sold_date_sk#19, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#32], [i_item_sk#33], Inner, BuildRight - : : : : : : :- *(6) Project [ss_sold_date_sk#19, ss_item_sk#32] - : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#32) && isnotnull(ss_sold_date_sk#19)) - : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#32] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- *(1) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#33) && isnotnull(i_class_id#12)) && isnotnull(i_brand_id#11)) && isnotnull(i_category_id#13)) - : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(2) Project [d_date_sk#20] - : : : : : +- *(2) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) - : : : : +- *(5) Project [i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight - : : : : :- *(5) Project [cs_sold_date_sk#26, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#38], [i_item_sk#33], Inner, BuildRight - : : : : : :- *(5) Project [cs_sold_date_sk#26, cs_item_sk#38] - : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) - : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(3) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : : +- *(3) Filter isnotnull(i_item_sk#33) - : : : : : +- *(3) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) - : : : +- *(9) Project [i_brand_id#11, i_class_id#12, i_category_id#13] - : : : +- *(9) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight - : : : :- *(9) Project [ws_sold_date_sk#31, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : : +- *(9) BroadcastHashJoin [ws_item_sk#39], [i_item_sk#33], Inner, BuildRight - : : : : :- *(9) Project [ws_sold_date_sk#31, ws_item_sk#39] - : : : : : +- *(9) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) - : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(23) BroadcastHashJoin [i_item_sk#33], [ss_item_sk#34], LeftSemi, BuildRight - : : :- *(23) Project [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13] - : : : +- *(23) Filter isnotnull(i_item_sk#33) - : : : +- *(23) FileScan parquet default.item[i_item_sk#33,i_brand_id#11,i_class_id#12,i_category_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(24) Project [d_date_sk#20] - : +- *(24) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#40)) && (d_year#21 = 2001)) && (d_moy#40 = 11)) && isnotnull(d_date_sk#20)) - : +- *(24) FileScan parquet default.date_dim[d_date_sk#20,d_year#21,d_moy#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct - :- *(52) Project [sales#41, number_sales#42, channel#43, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44) && (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2)))#44 as decimal(32,6)) > cast(Subquery subquery1667 as decimal(32,6)))) - : : +- Subquery subquery1667 - : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Exchange SinglePartition - : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Union - : : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] - : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] - : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#20] - : : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] - : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight - : : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] - : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) - : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] - : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] - : : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(52) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - : +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) - : +- *(51) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#22 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#24 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) - : +- *(51) Project [cs_quantity#22, cs_list_price#24, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(51) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight - : :- *(51) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24, i_brand_id#11, i_class_id#12, i_category_id#13] - : : +- *(51) BroadcastHashJoin [cs_item_sk#38], [i_item_sk#33], Inner, BuildRight - : : :- *(51) BroadcastHashJoin [cs_item_sk#38], [ss_item_sk#34], LeftSemi, BuildRight - : : : :- *(51) Project [cs_sold_date_sk#26, cs_item_sk#38, cs_quantity#22, cs_list_price#24] - : : : : +- *(51) Filter (isnotnull(cs_item_sk#38) && isnotnull(cs_sold_date_sk#26)) - : : : : +- *(51) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_item_sk#38,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(78) Project [sales#45, number_sales#46, channel#47, i_brand_id#11, i_class_id#12, i_category_id#13] - +- *(78) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2)))#48 as decimal(32,6)) > cast(Subquery subquery1672 as decimal(32,6)))) - : +- Subquery subquery1672 - : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : +- Exchange SinglePartition - : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#17 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#18 as decimal(12,2)))), DecimalType(18,2)))]) - : +- Union - : :- *(2) Project [ss_quantity#14 AS quantity#17, ss_list_price#15 AS list_price#18] - : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : :- *(2) Project [ss_sold_date_sk#19, ss_quantity#14, ss_list_price#15] - : : : +- *(2) Filter isnotnull(ss_sold_date_sk#19) - : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_quantity#14,ss_list_price#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [d_date_sk#20] - : : +- *(1) Filter (((isnotnull(d_year#21) && (d_year#21 >= 1999)) && (d_year#21 <= 2001)) && isnotnull(d_date_sk#20)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#20,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : :- *(4) Project [cs_quantity#22 AS quantity#23, cs_list_price#24 AS list_price#25] - : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#20], Inner, BuildRight - : : :- *(4) Project [cs_sold_date_sk#26, cs_quantity#22, cs_list_price#24] - : : : +- *(4) Filter isnotnull(cs_sold_date_sk#26) - : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_quantity#22,cs_list_price#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [ws_quantity#27 AS quantity#28, ws_list_price#29 AS list_price#30] - : +- *(6) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight - : :- *(6) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29] - : : +- *(6) Filter isnotnull(ws_sold_date_sk#31) - : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(78) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - +- Exchange hashpartitioning(i_brand_id#11, i_class_id#12, i_category_id#13, 5) - +- *(77) HashAggregate(keys=[i_brand_id#11, i_class_id#12, i_category_id#13], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#29 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) - +- *(77) Project [ws_quantity#27, ws_list_price#29, i_brand_id#11, i_class_id#12, i_category_id#13] - +- *(77) BroadcastHashJoin [ws_sold_date_sk#31], [d_date_sk#20], Inner, BuildRight - :- *(77) Project [ws_sold_date_sk#31, ws_quantity#27, ws_list_price#29, i_brand_id#11, i_class_id#12, i_category_id#13] - : +- *(77) BroadcastHashJoin [ws_item_sk#39], [i_item_sk#33], Inner, BuildRight - : :- *(77) BroadcastHashJoin [ws_item_sk#39], [ss_item_sk#34], LeftSemi, BuildRight - : : :- *(77) Project [ws_sold_date_sk#31, ws_item_sk#39, ws_quantity#27, ws_list_price#29] - : : : +- *(77) Filter (isnotnull(ws_item_sk#39) && isnotnull(ws_sold_date_sk#31)) - : : : +- *(77) FileScan parquet default.web_sales[ws_sold_date_sk#31,ws_item_sk#39,ws_quantity#27,ws_list_price#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [ss_item_sk#34], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [i_item_sk#33, i_brand_id#11, i_class_id#12, i_category_id#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (115) ++- * HashAggregate (114) + +- Exchange (113) + +- * HashAggregate (112) + +- * Expand (111) + +- Union (110) + :- * Project (77) + : +- * Filter (76) + : +- * HashAggregate (75) + : +- Exchange (74) + : +- * HashAggregate (73) + : +- * Project (72) + : +- * BroadcastHashJoin Inner BuildRight (71) + : :- * Project (65) + : : +- * BroadcastHashJoin Inner BuildRight (64) + : : :- * BroadcastHashJoin LeftSemi BuildRight (57) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (56) + : : : +- * Project (55) + : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.item (4) + : : : +- BroadcastExchange (53) + : : : +- * HashAggregate (52) + : : : +- * HashAggregate (51) + : : : +- * HashAggregate (50) + : : : +- Exchange (49) + : : : +- * HashAggregate (48) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (47) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (36) + : : : : :- * Project (22) + : : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : : :- * Project (15) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : :- * Filter (9) + : : : : : : : +- * ColumnarToRow (8) + : : : : : : : +- Scan parquet default.store_sales (7) + : : : : : : +- BroadcastExchange (13) + : : : : : : +- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet default.item (10) + : : : : : +- BroadcastExchange (20) + : : : : : +- * Project (19) + : : : : : +- * Filter (18) + : : : : : +- * ColumnarToRow (17) + : : : : : +- Scan parquet default.date_dim (16) + : : : : +- BroadcastExchange (35) + : : : : +- * Project (34) + : : : : +- * BroadcastHashJoin Inner BuildRight (33) + : : : : :- * Project (31) + : : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : : :- * Filter (25) + : : : : : : +- * ColumnarToRow (24) + : : : : : : +- Scan parquet default.catalog_sales (23) + : : : : : +- BroadcastExchange (29) + : : : : : +- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet default.item (26) + : : : : +- ReusedExchange (32) + : : : +- BroadcastExchange (46) + : : : +- * Project (45) + : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Filter (39) + : : : : : +- * ColumnarToRow (38) + : : : : : +- Scan parquet default.web_sales (37) + : : : : +- ReusedExchange (40) + : : : +- ReusedExchange (43) + : : +- BroadcastExchange (63) + : : +- * BroadcastHashJoin LeftSemi BuildRight (62) + : : :- * Filter (60) + : : : +- * ColumnarToRow (59) + : : : +- Scan parquet default.item (58) + : : +- ReusedExchange (61) + : +- BroadcastExchange (70) + : +- * Project (69) + : +- * Filter (68) + : +- * ColumnarToRow (67) + : +- Scan parquet default.date_dim (66) + :- * Project (93) + : +- * Filter (92) + : +- * HashAggregate (91) + : +- Exchange (90) + : +- * HashAggregate (89) + : +- * Project (88) + : +- * BroadcastHashJoin Inner BuildRight (87) + : :- * Project (85) + : : +- * BroadcastHashJoin Inner BuildRight (84) + : : :- * BroadcastHashJoin LeftSemi BuildRight (82) + : : : :- * Filter (80) + : : : : +- * ColumnarToRow (79) + : : : : +- Scan parquet default.catalog_sales (78) + : : : +- ReusedExchange (81) + : : +- ReusedExchange (83) + : +- ReusedExchange (86) + +- * Project (109) + +- * Filter (108) + +- * HashAggregate (107) + +- Exchange (106) + +- * HashAggregate (105) + +- * Project (104) + +- * BroadcastHashJoin Inner BuildRight (103) + :- * Project (101) + : +- * BroadcastHashJoin Inner BuildRight (100) + : :- * BroadcastHashJoin LeftSemi BuildRight (98) + : : :- * Filter (96) + : : : +- * ColumnarToRow (95) + : : : +- Scan parquet default.web_sales (94) + : : +- ReusedExchange (97) + : +- ReusedExchange (99) + +- ReusedExchange (102) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] + +(9) Filter [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(10) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(12) Filter [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_brand_id#6)) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(13) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(15) Project [codegen id : 9] +Output [4]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] + +(18) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(20) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 9] +Output [3]: [i_brand_id#6 AS brand_id#13, i_class_id#7 AS class_id#14, i_category_id#8 AS category_id#15] +Input [5]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(23) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] + +(25) Filter [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) + +(26) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(28) Filter [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : isnotnull(i_item_sk#5) + +(29) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(30) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(31) Project [codegen id : 5] +Output [4]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [cs_sold_date_sk#16, cs_item_sk#17, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(32) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(33) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(34) Project [codegen id : 5] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(35) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#19] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(37) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] + +(39) Filter [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) + +(40) ReusedExchange [Reuses operator id: 29] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(42) Project [codegen id : 8] +Output [4]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ws_sold_date_sk#20, ws_item_sk#21, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(43) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(45) Project [codegen id : 8] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(46) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#22] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(48) HashAggregate [codegen id : 9] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(49) Exchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), true, [id=#23] + +(50) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(51) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(52) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(53) BroadcastExchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#24] + +(54) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#13, class_id#14, category_id#15] +Join condition: None + +(55) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#25] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#13, class_id#14, category_id#15] + +(56) BroadcastExchange +Input [1]: [ss_item_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(57) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(58) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(60) Filter [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : isnotnull(i_item_sk#5) + +(61) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(62) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#5] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(63) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(64) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(65) Project [codegen id : 25] +Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(66) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(67) ColumnarToRow [codegen id : 24] +Input [3]: [d_date_sk#10, d_year#11, d_moy#28] + +(68) Filter [codegen id : 24] +Input [3]: [d_date_sk#10, d_year#11, d_moy#28] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#28)) AND (d_year#11 = 2001)) AND (d_moy#28 = 11)) AND isnotnull(d_date_sk#10)) + +(69) Project [codegen id : 24] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_moy#28] + +(70) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] + +(71) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(72) Project [codegen id : 25] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(73) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#30, isEmpty#31, count#32] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] + +(74) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#36] + +(75) HashAggregate [codegen id : 26] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37, count(1)#38] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#39, count(1)#38 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] + +(76) Filter [codegen id : 26] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41 as decimal(32,6)) > cast(Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) + +(77) Project [codegen id : 26] +Output [6]: [sales#39, number_sales#40, store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41] + +(78) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 51] +Input [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] + +(80) Filter [codegen id : 51] +Input [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) + +(81) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(82) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(83) ReusedExchange [Reuses operator id: 63] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(84) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(85) Project [codegen id : 51] +Output [6]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(86) ReusedExchange [Reuses operator id: 70] +Output [1]: [d_date_sk#10] + +(87) BroadcastHashJoin [codegen id : 51] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(88) Project [codegen id : 51] +Output [5]: [cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(89) HashAggregate [codegen id : 51] +Input [5]: [cs_quantity#45, cs_list_price#46, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#47, isEmpty#48, count#49] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] + +(90) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#53] + +(91) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54, count(1)#55] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#56, count(1)#55 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] + +(92) Filter [codegen id : 52] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) + +(93) Project [codegen id : 52] +Output [6]: [sales#56, number_sales#57, catalog AS channel#59, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58] + +(94) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(95) ColumnarToRow [codegen id : 77] +Input [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] + +(96) Filter [codegen id : 77] +Input [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61] +Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) + +(97) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(98) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(99) ReusedExchange [Reuses operator id: 63] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(100) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(101) Project [codegen id : 77] +Output [6]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(102) ReusedExchange [Reuses operator id: 70] +Output [1]: [d_date_sk#10] + +(103) BroadcastHashJoin [codegen id : 77] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(104) Project [codegen id : 77] +Output [5]: [ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(105) HashAggregate [codegen id : 77] +Input [5]: [ws_quantity#60, ws_list_price#61, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#62, isEmpty#63, count#64] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] + +(106) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#68] + +(107) HashAggregate [codegen id : 78] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69, count(1)#70] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#71, count(1)#70 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] + +(108) Filter [codegen id : 78] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6)))) + +(109) Project [codegen id : 78] +Output [6]: [sales#71, number_sales#72, web AS channel#74, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73] + +(110) Union + +(111) Expand [codegen id : 79] +Input [6]: [sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: [List(sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 0), List(sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, null, 1), List(sales#39, number_sales#40, channel#44, i_brand_id#6, null, null, 3), List(sales#39, number_sales#40, channel#44, null, null, null, 7), List(sales#39, number_sales#40, null, null, null, null, 15)], [sales#39, number_sales#40, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] + +(112) HashAggregate [codegen id : 79] +Input [7]: [sales#39, number_sales#40, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] +Keys [5]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] +Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)] +Aggregate Attributes [3]: [sum#80, isEmpty#81, sum#82] +Results [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85] + +(113) Exchange +Input [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85] +Arguments: hashpartitioning(channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, 5), true, [id=#86] + +(114) HashAggregate [codegen id : 80] +Input [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85] +Keys [5]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79] +Functions [2]: [sum(sales#39), sum(number_sales#40)] +Aggregate Attributes [2]: [sum(sales#39)#87, sum(number_sales#40)#88] +Results [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales#39)#87 AS sum(sales)#89, sum(number_sales#40)#88 AS sum(number_sales)#90] + +(115) TakeOrderedAndProject +Input [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales)#89, sum(number_sales)#90] +Arguments: 100, [channel#75 ASC NULLS FIRST, i_brand_id#76 ASC NULLS FIRST, i_class_id#77 ASC NULLS FIRST, i_category_id#78 ASC NULLS FIRST], [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales)#89, sum(number_sales)#90] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#42, [id=#43] +* HashAggregate (141) ++- Exchange (140) + +- * HashAggregate (139) + +- Union (138) + :- * Project (125) + : +- * BroadcastHashJoin Inner BuildRight (124) + : :- * Filter (118) + : : +- * ColumnarToRow (117) + : : +- Scan parquet default.store_sales (116) + : +- BroadcastExchange (123) + : +- * Project (122) + : +- * Filter (121) + : +- * ColumnarToRow (120) + : +- Scan parquet default.date_dim (119) + :- * Project (131) + : +- * BroadcastHashJoin Inner BuildRight (130) + : :- * Filter (128) + : : +- * ColumnarToRow (127) + : : +- Scan parquet default.catalog_sales (126) + : +- ReusedExchange (129) + +- * Project (137) + +- * BroadcastHashJoin Inner BuildRight (136) + :- * Filter (134) + : +- * ColumnarToRow (133) + : +- Scan parquet default.web_sales (132) + +- ReusedExchange (135) + + +(116) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(117) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] + +(118) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Condition : isnotnull(ss_sold_date_sk#1) + +(119) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(120) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] + +(121) Filter [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(122) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(123) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#91] + +(124) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(125) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#92, ss_list_price#4 AS list_price#93] +Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(126) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(127) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] + +(128) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46] +Condition : isnotnull(cs_sold_date_sk#16) + +(129) ReusedExchange [Reuses operator id: 123] +Output [1]: [d_date_sk#10] + +(130) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(131) Project [codegen id : 4] +Output [2]: [cs_quantity#45 AS quantity#94, cs_list_price#46 AS list_price#95] +Input [4]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, d_date_sk#10] + +(132) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(133) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] + +(134) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61] +Condition : isnotnull(ws_sold_date_sk#20) + +(135) ReusedExchange [Reuses operator id: 123] +Output [1]: [d_date_sk#10] + +(136) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(137) Project [codegen id : 6] +Output [2]: [ws_quantity#60 AS quantity#96, ws_list_price#61 AS list_price#97] +Input [4]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, d_date_sk#10] + +(138) Union + +(139) HashAggregate [codegen id : 7] +Input [2]: [quantity#92, list_price#93] +Keys: [] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#98, count#99] +Results [2]: [sum#100, count#101] + +(140) Exchange +Input [2]: [sum#100, count#101] +Arguments: SinglePartition, true, [id=#102] + +(141) HashAggregate [codegen id : 8] +Input [2]: [sum#100, count#101] +Keys: [] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))#103] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))#103 AS average_sales#104] + +Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] + +Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43] + + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt index 837757bfe..dfa8c1bcc 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14a/simplified.txt @@ -1,267 +1,214 @@ -TakeOrderedAndProject [channel,i_brand_id,i_category_id,i_class_id,sum(number_sales),sum(sales)] - WholeStageCodegen - HashAggregate [channel,i_brand_id,i_category_id,i_class_id,spark_grouping_id,sum,sum,sum(number_salesL),sum(sales)] [sum,sum,sum(number_salesL),sum(number_sales),sum(sales),sum(sales)] +TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),sum(number_sales)] + WholeStageCodegen (80) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum(sales),sum(number_sales),sum,isEmpty,sum] InputAdapter - Exchange [channel,i_brand_id,i_category_id,i_class_id,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales,spark_grouping_id,sum,sum,sum,sum] [sum,sum,sum,sum] - Expand [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] + Exchange [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id] #1 + WholeStageCodegen (79) + HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum] + Expand [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id] InputAdapter Union - WholeStageCodegen - Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] + WholeStageCodegen (26) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] Subquery #1 - WholeStageCodegen - HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] + WholeStageCodegen (8) + HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count] InputAdapter Exchange #13 - WholeStageCodegen - HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] InputAdapter Union - WholeStageCodegen - Project [ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] InputAdapter BroadcastExchange #14 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - WholeStageCodegen - Project [cs_list_price,cs_quantity] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - WholeStageCodegen - Project [ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] + ReusedExchange [d_date_sk] #14 + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] + ReusedExchange [d_date_sk] #14 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter - Exchange [i_brand_id,i_category_id,i_class_id] #2 - WholeStageCodegen - HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] + Exchange [i_brand_id,i_class_id,i_category_id] #2 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] - Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (11) Project [i_item_sk] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_brand_id,i_category_id,i_class_id] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #4 - WholeStageCodegen - HashAggregate [brand_id,category_id,class_id] - HashAggregate [brand_id,category_id,class_id] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - HashAggregate [brand_id,category_id,class_id] - InputAdapter - Exchange [brand_id,category_id,class_id] #5 - WholeStageCodegen - HashAggregate [brand_id,category_id,class_id] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - Project [i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] + WholeStageCodegen (10) + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #5 + WholeStageCodegen (9) + HashAggregate [brand_id,class_id,category_id] + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (1) + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #8 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id] + WholeStageCodegen (5) + Project [i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] [cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] InputAdapter BroadcastExchange #9 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #7 - InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_item_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] [ws_item_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #9 + ReusedExchange [d_date_sk] #7 InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #7 + BroadcastExchange #10 + WholeStageCodegen (8) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9 + InputAdapter + ReusedExchange [d_date_sk] #7 InputAdapter BroadcastExchange #11 - WholeStageCodegen + WholeStageCodegen (23) BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter - ReusedExchange [ss_item_sk] [ss_item_sk] #3 + ReusedExchange [ss_item_sk] #3 InputAdapter BroadcastExchange #12 - WholeStageCodegen + WholeStageCodegen (24) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - WholeStageCodegen - Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] - Subquery #2 - WholeStageCodegen - HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] - InputAdapter - Exchange #13 - WholeStageCodegen - HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] - InputAdapter - Union - WholeStageCodegen - Project [ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] - InputAdapter - BroadcastExchange #14 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - WholeStageCodegen - Project [cs_list_price,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - WholeStageCodegen - Project [ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (52) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter - Exchange [i_brand_id,i_category_id,i_class_id] #15 - WholeStageCodegen - HashAggregate [count,count,cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id,sum,sum] [count,count,sum,sum] - Project [cs_list_price,cs_quantity,i_brand_id,i_category_id,i_class_id] + Exchange [i_brand_id,i_class_id,i_category_id] #15 + WholeStageCodegen (51) + HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] + Project [cs_sold_date_sk,cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [cs_item_sk,i_item_sk] BroadcastHashJoin [cs_item_sk,ss_item_sk] - Project [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] [cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price] InputAdapter - ReusedExchange [ss_item_sk] [ss_item_sk] #3 + ReusedExchange [ss_item_sk] #3 InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #11 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #12 - WholeStageCodegen - Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] - Subquery #3 - WholeStageCodegen - HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] - InputAdapter - Exchange #13 - WholeStageCodegen - HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] - InputAdapter - Union - WholeStageCodegen - Project [ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] - InputAdapter - BroadcastExchange #14 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - WholeStageCodegen - Project [cs_list_price,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - WholeStageCodegen - Project [ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #14 - HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] + ReusedExchange [d_date_sk] #12 + WholeStageCodegen (78) + Project [sales,number_sales,i_brand_id,i_class_id,i_category_id] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #1 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter - Exchange [i_brand_id,i_category_id,i_class_id] #16 - WholeStageCodegen - HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,sum,sum,ws_list_price,ws_quantity] [count,count,sum,sum] - Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ws_list_price,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - BroadcastHashJoin [ss_item_sk,ws_item_sk] - Project [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] [ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + Exchange [i_brand_id,i_class_id,i_category_id] #16 + WholeStageCodegen (77) + HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + BroadcastHashJoin [ws_item_sk,ss_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price] InputAdapter - ReusedExchange [ss_item_sk] [ss_item_sk] #3 + ReusedExchange [ss_item_sk] #3 InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #11 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11 InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #12 + ReusedExchange [d_date_sk] #12 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt index 4d49d4d7a..1f31ded51 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/explain.txt @@ -1,166 +1,763 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,i_brand_id#2 ASC NULLS FIRST,i_class_id#3 ASC NULLS FIRST,i_category_id#4 ASC NULLS FIRST], output=[channel#1,i_brand_id#2,i_class_id#3,i_category_id#4,sales#5,number_sales#6,channel#7,i_brand_id#8,i_class_id#9,i_category_id#10,sales#11,number_sales#12]) -+- *(52) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [i_brand_id#8, i_class_id#9, i_category_id#10], Inner, BuildRight - :- *(52) Project [channel#1, i_brand_id#2, i_class_id#3, i_category_id#4, sales#5, number_sales#6] - : +- *(52) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#15 as decimal(32,6)) > cast(Subquery subquery1884 as decimal(32,6)))) - : : +- Subquery subquery1884 - : : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Exchange SinglePartition - : : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Union - : : :- *(2) Project [ss_quantity#13 AS quantity#16, ss_list_price#14 AS list_price#17] - : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight - : : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] - : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#19] - : : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] - : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight - : : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] - : : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) - : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] - : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] - : : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(52) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - : +- Exchange hashpartitioning(i_brand_id#2, i_class_id#3, i_category_id#4, 5) - : +- *(25) HashAggregate(keys=[i_brand_id#2, i_class_id#3, i_category_id#4], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) - : +- *(25) Project [ss_quantity#13, ss_list_price#14, i_brand_id#2, i_class_id#3, i_category_id#4] - : +- *(25) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight - : :- *(25) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14, i_brand_id#2, i_class_id#3, i_category_id#4] - : : +- *(25) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#32], Inner, BuildRight - : : :- *(25) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight - : : : :- *(25) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] - : : : : +- *(25) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) - : : : : +- *(25) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(11) Project [i_item_sk#32 AS ss_item_sk#33] - : : : +- *(11) BroadcastHashJoin [i_brand_id#2, i_class_id#3, i_category_id#4], [brand_id#34, class_id#35, category_id#36], Inner, BuildRight - : : : :- *(11) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : +- *(11) Filter ((isnotnull(i_class_id#3) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) - : : : : +- *(11) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) - : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) - : : : +- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) - : : : +- *(10) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) - : : : :- *(10) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) - : : : : +- Exchange hashpartitioning(brand_id#34, class_id#35, category_id#36, 5) - : : : : +- *(6) HashAggregate(keys=[brand_id#34, class_id#35, category_id#36], functions=[]) - : : : : +- *(6) BroadcastHashJoin [coalesce(brand_id#34, 0), coalesce(class_id#35, 0), coalesce(category_id#36, 0)], [coalesce(i_brand_id#2, 0), coalesce(i_class_id#3, 0), coalesce(i_category_id#4, 0)], LeftSemi, BuildRight, (((brand_id#34 <=> i_brand_id#2) && (class_id#35 <=> i_class_id#3)) && (category_id#36 <=> i_category_id#4)) - : : : : :- *(6) Project [i_brand_id#2 AS brand_id#34, i_class_id#3 AS class_id#35, i_category_id#4 AS category_id#36] - : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight - : : : : : :- *(6) Project [ss_sold_date_sk#18, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : : : +- *(6) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#32], Inner, BuildRight - : : : : : : :- *(6) Project [ss_sold_date_sk#18, ss_item_sk#31] - : : : : : : : +- *(6) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) - : : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- *(1) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : : : +- *(1) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_brand_id#2)) && isnotnull(i_category_id#4)) - : : : : : : +- *(1) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_brand_id), IsNotNull(i_category_id)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(2) Project [d_date_sk#19] - : : : : : +- *(2) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) - : : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) - : : : : +- *(5) Project [i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight - : : : : :- *(5) Project [cs_sold_date_sk#25, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : : +- *(5) BroadcastHashJoin [cs_item_sk#37], [i_item_sk#32], Inner, BuildRight - : : : : : :- *(5) Project [cs_sold_date_sk#25, cs_item_sk#37] - : : : : : : +- *(5) Filter (isnotnull(cs_item_sk#37) && isnotnull(cs_sold_date_sk#25)) - : : : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_item_sk#37] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(3) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : : +- *(3) Filter isnotnull(i_item_sk#32) - : : : : : +- *(3) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), coalesce(input[1, int, true], 0), coalesce(input[2, int, true], 0))) - : : : +- *(9) Project [i_brand_id#2, i_class_id#3, i_category_id#4] - : : : +- *(9) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight - : : : :- *(9) Project [ws_sold_date_sk#30, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : : +- *(9) BroadcastHashJoin [ws_item_sk#38], [i_item_sk#32], Inner, BuildRight - : : : : :- *(9) Project [ws_sold_date_sk#30, ws_item_sk#38] - : : : : : +- *(9) Filter (isnotnull(ws_item_sk#38) && isnotnull(ws_sold_date_sk#30)) - : : : : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_item_sk#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : : +- ReusedExchange [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(23) BroadcastHashJoin [i_item_sk#32], [ss_item_sk#33], LeftSemi, BuildRight - : : :- *(23) Project [i_item_sk#32, i_brand_id#2, i_class_id#3, i_category_id#4] - : : : +- *(23) Filter (((isnotnull(i_item_sk#32) && isnotnull(i_class_id#3)) && isnotnull(i_category_id#4)) && isnotnull(i_brand_id#2)) - : : : +- *(23) FileScan parquet default.item[i_item_sk#32,i_brand_id#2,i_class_id#3,i_category_id#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_brand_id)], ReadSchema: struct - : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(24) Project [d_date_sk#19] - : +- *(24) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1883)) && isnotnull(d_date_sk#19)) - : : +- Subquery subquery1883 - : : +- *(1) Project [d_week_seq#39] - : : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct - : +- *(24) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct - : +- Subquery subquery1883 - : +- *(1) Project [d_week_seq#39] - : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 2000)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,12), ..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true])) - +- *(51) Project [channel#7, i_brand_id#8, i_class_id#9, i_category_id#10, sales#11, number_sales#12] - +- *(51) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2)))#42 as decimal(32,6)) > cast(Subquery subquery1890 as decimal(32,6)))) - : +- Subquery subquery1890 - : +- *(8) HashAggregate(keys=[], functions=[avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) - : +- Exchange SinglePartition - : +- *(7) HashAggregate(keys=[], functions=[partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#16 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#17 as decimal(12,2)))), DecimalType(18,2)))]) - : +- Union - : :- *(2) Project [ss_quantity#13 AS quantity#16, ss_list_price#14 AS list_price#17] - : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight - : : :- *(2) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14] - : : : +- *(2) Filter isnotnull(ss_sold_date_sk#18) - : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [d_date_sk#19] - : : +- *(1) Filter (((isnotnull(d_year#20) && (d_year#20 >= 1999)) && (d_year#20 <= 2001)) && isnotnull(d_date_sk#19)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#19,d_year#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_da..., ReadSchema: struct - : :- *(4) Project [cs_quantity#21 AS quantity#22, cs_list_price#23 AS list_price#24] - : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#19], Inner, BuildRight - : : :- *(4) Project [cs_sold_date_sk#25, cs_quantity#21, cs_list_price#23] - : : : +- *(4) Filter isnotnull(cs_sold_date_sk#25) - : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_quantity#21,cs_list_price#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [ws_quantity#26 AS quantity#27, ws_list_price#28 AS list_price#29] - : +- *(6) BroadcastHashJoin [ws_sold_date_sk#30], [d_date_sk#19], Inner, BuildRight - : :- *(6) Project [ws_sold_date_sk#30, ws_quantity#26, ws_list_price#28] - : : +- *(6) Filter isnotnull(ws_sold_date_sk#30) - : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#30,ws_quantity#26,ws_list_price#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(51) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), count(1)]) - +- Exchange hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, 5) - +- *(50) HashAggregate(keys=[i_brand_id#8, i_class_id#9, i_category_id#10], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#14 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]) - +- *(50) Project [ss_quantity#13, ss_list_price#14, i_brand_id#8, i_class_id#9, i_category_id#10] - +- *(50) BroadcastHashJoin [ss_sold_date_sk#18], [d_date_sk#19], Inner, BuildRight - :- *(50) Project [ss_sold_date_sk#18, ss_quantity#13, ss_list_price#14, i_brand_id#8, i_class_id#9, i_category_id#10] - : +- *(50) BroadcastHashJoin [ss_item_sk#31], [i_item_sk#43], Inner, BuildRight - : :- *(50) BroadcastHashJoin [ss_item_sk#31], [ss_item_sk#33], LeftSemi, BuildRight - : : :- *(50) Project [ss_sold_date_sk#18, ss_item_sk#31, ss_quantity#13, ss_list_price#14] - : : : +- *(50) Filter (isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#18)) - : : : +- *(50) FileScan parquet default.store_sales[ss_sold_date_sk#18,ss_item_sk#31,ss_quantity#13,ss_list_price#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [ss_item_sk#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [i_item_sk#43, i_brand_id#8, i_class_id#9, i_category_id#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(49) Project [d_date_sk#19] - +- *(49) Filter ((isnotnull(d_week_seq#39) && (d_week_seq#39 = Subquery subquery1889)) && isnotnull(d_date_sk#19)) - : +- Subquery subquery1889 - : +- *(1) Project [d_week_seq#39] - : +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - : +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct - +- *(49) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#39] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct - +- Subquery subquery1889 - +- *(1) Project [d_week_seq#39] - +- *(1) Filter (((((isnotnull(d_year#20) && isnotnull(d_dom#40)) && isnotnull(d_moy#41)) && (d_year#20 = 1999)) && (d_moy#41 = 12)) && (d_dom#40 = 11)) - +- *(1) FileScan parquet default.date_dim[d_week_seq#39,d_year#20,d_moy#41,d_dom#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_dom), IsNotNull(d_moy), EqualTo(d_year,1999), EqualTo(d_moy,12), ..., ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (100) ++- * BroadcastHashJoin Inner BuildRight (99) + :- * Project (77) + : +- * Filter (76) + : +- * HashAggregate (75) + : +- Exchange (74) + : +- * HashAggregate (73) + : +- * Project (72) + : +- * BroadcastHashJoin Inner BuildRight (71) + : :- * Project (65) + : : +- * BroadcastHashJoin Inner BuildRight (64) + : : :- * BroadcastHashJoin LeftSemi BuildRight (57) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (56) + : : : +- * Project (55) + : : : +- * BroadcastHashJoin Inner BuildRight (54) + : : : :- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.item (4) + : : : +- BroadcastExchange (53) + : : : +- * HashAggregate (52) + : : : +- * HashAggregate (51) + : : : +- * HashAggregate (50) + : : : +- Exchange (49) + : : : +- * HashAggregate (48) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (47) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (36) + : : : : :- * Project (22) + : : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : : :- * Project (15) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : :- * Filter (9) + : : : : : : : +- * ColumnarToRow (8) + : : : : : : : +- Scan parquet default.store_sales (7) + : : : : : : +- BroadcastExchange (13) + : : : : : : +- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet default.item (10) + : : : : : +- BroadcastExchange (20) + : : : : : +- * Project (19) + : : : : : +- * Filter (18) + : : : : : +- * ColumnarToRow (17) + : : : : : +- Scan parquet default.date_dim (16) + : : : : +- BroadcastExchange (35) + : : : : +- * Project (34) + : : : : +- * BroadcastHashJoin Inner BuildRight (33) + : : : : :- * Project (31) + : : : : : +- * BroadcastHashJoin Inner BuildRight (30) + : : : : : :- * Filter (25) + : : : : : : +- * ColumnarToRow (24) + : : : : : : +- Scan parquet default.catalog_sales (23) + : : : : : +- BroadcastExchange (29) + : : : : : +- * Filter (28) + : : : : : +- * ColumnarToRow (27) + : : : : : +- Scan parquet default.item (26) + : : : : +- ReusedExchange (32) + : : : +- BroadcastExchange (46) + : : : +- * Project (45) + : : : +- * BroadcastHashJoin Inner BuildRight (44) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Filter (39) + : : : : : +- * ColumnarToRow (38) + : : : : : +- Scan parquet default.web_sales (37) + : : : : +- ReusedExchange (40) + : : : +- ReusedExchange (43) + : : +- BroadcastExchange (63) + : : +- * BroadcastHashJoin LeftSemi BuildRight (62) + : : :- * Filter (60) + : : : +- * ColumnarToRow (59) + : : : +- Scan parquet default.item (58) + : : +- ReusedExchange (61) + : +- BroadcastExchange (70) + : +- * Project (69) + : +- * Filter (68) + : +- * ColumnarToRow (67) + : +- Scan parquet default.date_dim (66) + +- BroadcastExchange (98) + +- * Project (97) + +- * Filter (96) + +- * HashAggregate (95) + +- Exchange (94) + +- * HashAggregate (93) + +- * Project (92) + +- * BroadcastHashJoin Inner BuildRight (91) + :- * Project (85) + : +- * BroadcastHashJoin Inner BuildRight (84) + : :- * BroadcastHashJoin LeftSemi BuildRight (82) + : : :- * Filter (80) + : : : +- * ColumnarToRow (79) + : : : +- Scan parquet default.store_sales (78) + : : +- ReusedExchange (81) + : +- ReusedExchange (83) + +- BroadcastExchange (90) + +- * Project (89) + +- * Filter (88) + +- * ColumnarToRow (87) + +- Scan parquet default.date_dim (86) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(3) Filter [codegen id : 25] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(6) Filter [codegen id : 11] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : ((isnotnull(i_brand_id#6) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(7) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] + +(9) Filter [codegen id : 9] +Input [2]: [ss_sold_date_sk#1, ss_item_sk#2] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(10) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(12) Filter [codegen id : 1] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_brand_id#6)) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(13) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(15) Project [codegen id : 9] +Output [4]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] + +(18) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(20) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(21) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 9] +Output [3]: [i_brand_id#6 AS brand_id#13, i_class_id#7 AS class_id#14, i_category_id#8 AS category_id#15] +Input [5]: [ss_sold_date_sk#1, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(23) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] + +(25) Filter [codegen id : 5] +Input [2]: [cs_sold_date_sk#16, cs_item_sk#17] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_sold_date_sk#16)) + +(26) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(28) Filter [codegen id : 3] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : isnotnull(i_item_sk#5) + +(29) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(30) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#17] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(31) Project [codegen id : 5] +Output [4]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [cs_sold_date_sk#16, cs_item_sk#17, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(32) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(33) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(34) Project [codegen id : 5] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [cs_sold_date_sk#16, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(35) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#19] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(37) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] + +(39) Filter [codegen id : 8] +Input [2]: [ws_sold_date_sk#20, ws_item_sk#21] +Condition : (isnotnull(ws_item_sk#21) AND isnotnull(ws_sold_date_sk#20)) + +(40) ReusedExchange [Reuses operator id: 29] +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_item_sk#21] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(42) Project [codegen id : 8] +Output [4]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [6]: [ws_sold_date_sk#20, ws_item_sk#21, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(43) ReusedExchange [Reuses operator id: 20] +Output [1]: [d_date_sk#10] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(45) Project [codegen id : 8] +Output [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Input [5]: [ws_sold_date_sk#20, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(46) BroadcastExchange +Input [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#22] + +(47) BroadcastHashJoin [codegen id : 9] +Left keys [6]: [coalesce(brand_id#13, 0), isnull(brand_id#13), coalesce(class_id#14, 0), isnull(class_id#14), coalesce(category_id#15, 0), isnull(category_id#15)] +Right keys [6]: [coalesce(i_brand_id#6, 0), isnull(i_brand_id#6), coalesce(i_class_id#7, 0), isnull(i_class_id#7), coalesce(i_category_id#8, 0), isnull(i_category_id#8)] +Join condition: None + +(48) HashAggregate [codegen id : 9] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(49) Exchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), true, [id=#23] + +(50) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(51) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(52) HashAggregate [codegen id : 10] +Input [3]: [brand_id#13, class_id#14, category_id#15] +Keys [3]: [brand_id#13, class_id#14, category_id#15] +Functions: [] +Aggregate Attributes: [] +Results [3]: [brand_id#13, class_id#14, category_id#15] + +(53) BroadcastExchange +Input [3]: [brand_id#13, class_id#14, category_id#15] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#24] + +(54) BroadcastHashJoin [codegen id : 11] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [brand_id#13, class_id#14, category_id#15] +Join condition: None + +(55) Project [codegen id : 11] +Output [1]: [i_item_sk#5 AS ss_item_sk#25] +Input [7]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8, brand_id#13, class_id#14, category_id#15] + +(56) BroadcastExchange +Input [1]: [ss_item_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(57) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(58) Scan parquet default.item +Output [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] +ReadSchema: struct + +(59) ColumnarToRow [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(60) Filter [codegen id : 23] +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Condition : (((isnotnull(i_item_sk#5) AND isnotnull(i_brand_id#6)) AND isnotnull(i_class_id#7)) AND isnotnull(i_category_id#8)) + +(61) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(62) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [i_item_sk#5] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(63) BroadcastExchange +Input [4]: [i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(64) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(65) Project [codegen id : 25] +Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#5, i_brand_id#6, i_class_id#7, i_category_id#8] + +(66) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_week_seq#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(67) ColumnarToRow [codegen id : 24] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(68) Filter [codegen id : 24] +Input [2]: [d_date_sk#10, d_week_seq#28] +Condition : ((isnotnull(d_week_seq#28) AND (d_week_seq#28 = Subquery scalar-subquery#29, [id=#30])) AND isnotnull(d_date_sk#10)) + +(69) Project [codegen id : 24] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(70) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#31] + +(71) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(72) Project [codegen id : 25] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8, d_date_sk#10] + +(73) HashAggregate [codegen id : 25] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#6, i_class_id#7, i_category_id#8] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#32, isEmpty#33, count#34] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] + +(74) Exchange +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] +Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#38] + +(75) HashAggregate [codegen id : 52] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, count#37] +Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39, count(1)#40] +Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#41, count(1)#40 AS number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] + +(76) Filter [codegen id : 52] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43 as decimal(32,6)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(32,6)))) + +(77) Project [codegen id : 52] +Output [6]: [store AS channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42] +Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43] + +(78) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 50] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] + +(80) Filter [codegen id : 50] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(81) ReusedExchange [Reuses operator id: 56] +Output [1]: [ss_item_sk#25] + +(82) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [ss_item_sk#25] +Join condition: None + +(83) ReusedExchange [Reuses operator id: 63] +Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] + +(84) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#47] +Join condition: None + +(85) Project [codegen id : 50] +Output [6]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] + +(86) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_week_seq#28] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(87) ColumnarToRow [codegen id : 49] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(88) Filter [codegen id : 49] +Input [2]: [d_date_sk#10, d_week_seq#28] +Condition : ((isnotnull(d_week_seq#28) AND (d_week_seq#28 = Subquery scalar-subquery#51, [id=#52])) AND isnotnull(d_date_sk#10)) + +(89) Project [codegen id : 49] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_week_seq#28] + +(90) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] + +(91) BroadcastHashJoin [codegen id : 50] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(92) Project [codegen id : 50] +Output [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] +Input [7]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50, d_date_sk#10] + +(93) HashAggregate [codegen id : 50] +Input [5]: [ss_quantity#3, ss_list_price#4, i_brand_id#48, i_class_id#49, i_category_id#50] +Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), partial_count(1)] +Aggregate Attributes [3]: [sum#54, isEmpty#55, count#56] +Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] + +(94) Exchange +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] +Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), true, [id=#60] + +(95) HashAggregate [codegen id : 51] +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58, count#59] +Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62] +Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#63, count(1)#62 AS number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] + +(96) Filter [codegen id : 51] +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#44, [id=#45] as decimal(32,6)))) + +(97) Project [codegen id : 51] +Output [6]: [store AS channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] +Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65] + +(98) BroadcastExchange +Input [6]: [channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#67] + +(99) BroadcastHashJoin [codegen id : 52] +Left keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8] +Right keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Join condition: None + +(100) TakeOrderedAndProject +Input [12]: [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] +Arguments: 100, [i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#44, [id=#45] +* HashAggregate (126) ++- Exchange (125) + +- * HashAggregate (124) + +- Union (123) + :- * Project (110) + : +- * BroadcastHashJoin Inner BuildRight (109) + : :- * Filter (103) + : : +- * ColumnarToRow (102) + : : +- Scan parquet default.store_sales (101) + : +- BroadcastExchange (108) + : +- * Project (107) + : +- * Filter (106) + : +- * ColumnarToRow (105) + : +- Scan parquet default.date_dim (104) + :- * Project (116) + : +- * BroadcastHashJoin Inner BuildRight (115) + : :- * Filter (113) + : : +- * ColumnarToRow (112) + : : +- Scan parquet default.catalog_sales (111) + : +- ReusedExchange (114) + +- * Project (122) + +- * BroadcastHashJoin Inner BuildRight (121) + :- * Filter (119) + : +- * ColumnarToRow (118) + : +- Scan parquet default.web_sales (117) + +- ReusedExchange (120) + + +(101) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(102) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] + +(103) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4] +Condition : isnotnull(ss_sold_date_sk#1) + +(104) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_year#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(105) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] + +(106) Filter [codegen id : 1] +Input [2]: [d_date_sk#10, d_year#11] +Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10)) + +(107) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_year#11] + +(108) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#68] + +(109) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(110) Project [codegen id : 2] +Output [2]: [ss_quantity#3 AS quantity#69, ss_list_price#4 AS list_price#70] +Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10] + +(111) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(112) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] + +(113) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72] +Condition : isnotnull(cs_sold_date_sk#16) + +(114) ReusedExchange [Reuses operator id: 108] +Output [1]: [d_date_sk#10] + +(115) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(116) Project [codegen id : 4] +Output [2]: [cs_quantity#71 AS quantity#73, cs_list_price#72 AS list_price#74] +Input [4]: [cs_sold_date_sk#16, cs_quantity#71, cs_list_price#72, d_date_sk#10] + +(117) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(118) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] + +(119) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76] +Condition : isnotnull(ws_sold_date_sk#20) + +(120) ReusedExchange [Reuses operator id: 108] +Output [1]: [d_date_sk#10] + +(121) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(122) Project [codegen id : 6] +Output [2]: [ws_quantity#75 AS quantity#77, ws_list_price#76 AS list_price#78] +Input [4]: [ws_sold_date_sk#20, ws_quantity#75, ws_list_price#76, d_date_sk#10] + +(123) Union + +(124) HashAggregate [codegen id : 7] +Input [2]: [quantity#69, list_price#70] +Keys: [] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#79, count#80] +Results [2]: [sum#81, count#82] + +(125) Exchange +Input [2]: [sum#81, count#82] +Arguments: SinglePartition, true, [id=#83] + +(126) HashAggregate [codegen id : 8] +Input [2]: [sum#81, count#82] +Keys: [] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#84] +Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#69 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#70 as decimal(12,2)))), DecimalType(18,2), true))#84 AS average_sales#85] + +Subquery:2 Hosting operator id = 68 Hosting Expression = Subquery scalar-subquery#29, [id=#30] +* Project (130) ++- * Filter (129) + +- * ColumnarToRow (128) + +- Scan parquet default.date_dim (127) + + +(127) Scan parquet default.date_dim +Output [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(128) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + +(129) Filter [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#86)) AND isnotnull(d_dom#87)) AND (d_year#11 = 2000)) AND (d_moy#86 = 12)) AND (d_dom#87 = 11)) + +(130) Project [codegen id : 1] +Output [1]: [d_week_seq#28] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + +Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#44, [id=#45] + +Subquery:4 Hosting operator id = 88 Hosting Expression = Subquery scalar-subquery#51, [id=#52] +* Project (134) ++- * Filter (133) + +- * ColumnarToRow (132) + +- Scan parquet default.date_dim (131) + + +(131) Scan parquet default.date_dim +Output [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)] +ReadSchema: struct + +(132) ColumnarToRow [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + +(133) Filter [codegen id : 1] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] +Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#86)) AND isnotnull(d_dom#87)) AND (d_year#11 = 1999)) AND (d_moy#86 = 12)) AND (d_dom#87 = 11)) + +(134) Project [codegen id : 1] +Output [1]: [d_week_seq#28] +Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87] + + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt index ee18cd267..7bbf83e3d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q14b/simplified.txt @@ -1,226 +1,204 @@ -TakeOrderedAndProject [channel,channel,i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,number_sales,number_sales,sales,sales] - WholeStageCodegen - BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id] - Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] +TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] + WholeStageCodegen (52) + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] Subquery #2 - WholeStageCodegen - HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] + WholeStageCodegen (8) + HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count] InputAdapter Exchange #12 - WholeStageCodegen - HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] + WholeStageCodegen (7) + HashAggregate [quantity,list_price] [sum,count,sum,count] InputAdapter Union - WholeStageCodegen - Project [ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] + WholeStageCodegen (2) + Project [ss_quantity,ss_list_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price] InputAdapter BroadcastExchange #13 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - WholeStageCodegen - Project [cs_list_price,cs_quantity] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + WholeStageCodegen (4) + Project [cs_quantity,cs_list_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #13 - WholeStageCodegen - Project [ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] + ReusedExchange [d_date_sk] #13 + WholeStageCodegen (6) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #13 - HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] + ReusedExchange [d_date_sk] #13 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter - Exchange [i_brand_id,i_category_id,i_class_id] #1 - WholeStageCodegen - HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] + Exchange [i_brand_id,i_class_id,i_category_id] #1 + WholeStageCodegen (25) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] - Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (11) Project [i_item_sk] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_brand_id,i_category_id,i_class_id] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id] + Filter [i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #3 - WholeStageCodegen - HashAggregate [brand_id,category_id,class_id] - HashAggregate [brand_id,category_id,class_id] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - HashAggregate [brand_id,category_id,class_id] - InputAdapter - Exchange [brand_id,category_id,class_id] #4 - WholeStageCodegen - HashAggregate [brand_id,category_id,class_id] - BroadcastHashJoin [brand_id,category_id,class_id,i_brand_id,i_category_id,i_class_id] - Project [i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] + WholeStageCodegen (10) + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + HashAggregate [brand_id,class_id,category_id] + InputAdapter + Exchange [brand_id,class_id,category_id] #4 + WholeStageCodegen (9) + HashAggregate [brand_id,class_id,category_id] + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id] + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (1) + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id] + WholeStageCodegen (5) + Project [i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,i_brand_id,i_category_id,i_class_id] + Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_item_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk] [cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk] InputAdapter BroadcastExchange #8 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #6 - InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_item_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk] [ws_item_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #8 + ReusedExchange [d_date_sk] #6 InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #6 + BroadcastExchange #9 + WholeStageCodegen (8) + Project [i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk] + InputAdapter + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #8 + InputAdapter + ReusedExchange [d_date_sk] #6 InputAdapter BroadcastExchange #10 - WholeStageCodegen + WholeStageCodegen (23) BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_brand_id,i_category_id,i_class_id,i_item_sk] - Filter [i_brand_id,i_category_id,i_class_id,i_item_sk] - Scan parquet default.item [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] + Filter [i_item_sk,i_brand_id,i_class_id,i_category_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id] InputAdapter - ReusedExchange [ss_item_sk] [ss_item_sk] #2 + ReusedExchange [ss_item_sk] #2 InputAdapter BroadcastExchange #11 - WholeStageCodegen + WholeStageCodegen (24) Project [d_date_sk] - Filter [d_date_sk,d_week_seq] + Filter [d_week_seq,d_date_sk] Subquery #1 - WholeStageCodegen + WholeStageCodegen (1) Project [d_week_seq] - Filter [d_dom,d_moy,d_year] - Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] - Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] - Subquery #1 - WholeStageCodegen - Project [d_week_seq] - Filter [d_dom,d_moy,d_year] - Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] + Filter [d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] InputAdapter BroadcastExchange #14 - WholeStageCodegen - Project [channel,i_brand_id,i_category_id,i_class_id,number_sales,sales] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] - Subquery #4 - WholeStageCodegen - HashAggregate [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] [average_sales,avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),count,sum] - InputAdapter - Exchange #12 - WholeStageCodegen - HashAggregate [count,count,list_price,quantity,sum,sum] [count,count,sum,sum] - InputAdapter - Union - WholeStageCodegen - Project [ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_list_price,ss_quantity,ss_sold_date_sk] [ss_list_price,ss_quantity,ss_sold_date_sk] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - WholeStageCodegen - Project [cs_list_price,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_list_price,cs_quantity,cs_sold_date_sk] [cs_list_price,cs_quantity,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #13 - WholeStageCodegen - Project [ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_list_price,ws_quantity,ws_sold_date_sk] [ws_list_price,ws_quantity,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #13 - HashAggregate [count,count(1),i_brand_id,i_category_id,i_class_id,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] [channel,count,count(1),number_sales,sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2)))] + WholeStageCodegen (51) + Project [i_brand_id,i_class_id,i_category_id,sales,number_sales] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [average_sales] #2 + HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count] InputAdapter - Exchange [i_brand_id,i_category_id,i_class_id] #15 - WholeStageCodegen - HashAggregate [count,count,i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,sum,sum] [count,count,sum,sum] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,ss_list_price,ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] + Exchange [i_brand_id,i_class_id,i_category_id] #15 + WholeStageCodegen (50) + HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count] + Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] BroadcastHashJoin [ss_item_sk,ss_item_sk] - Project [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] [ss_item_sk,ss_list_price,ss_quantity,ss_sold_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price] InputAdapter - ReusedExchange [ss_item_sk] [ss_item_sk] #2 + ReusedExchange [ss_item_sk] #2 InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk] [i_brand_id,i_category_id,i_class_id,i_item_sk] #10 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10 InputAdapter BroadcastExchange #16 - WholeStageCodegen + WholeStageCodegen (49) Project [d_date_sk] - Filter [d_date_sk,d_week_seq] + Filter [d_week_seq,d_date_sk] Subquery #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_week_seq] - Filter [d_dom,d_moy,d_year] - Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] - Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] - Subquery #3 - WholeStageCodegen - Project [d_week_seq] - Filter [d_dom,d_moy,d_year] - Scan parquet default.date_dim [d_dom,d_moy,d_week_seq,d_year] [d_dom,d_moy,d_week_seq,d_year] + Filter [d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt index f77fe16bb..4dc0abf9a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/explain.txt @@ -1,26 +1,150 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST], output=[ca_zip#1,sum(cs_sales_price)#2]) -+- *(5) HashAggregate(keys=[ca_zip#1], functions=[sum(UnscaledValue(cs_sales_price#3))]) - +- Exchange hashpartitioning(ca_zip#1, 5) - +- *(4) HashAggregate(keys=[ca_zip#1], functions=[partial_sum(UnscaledValue(cs_sales_price#3))]) - +- *(4) Project [cs_sales_price#3, ca_zip#1] - +- *(4) BroadcastHashJoin [cs_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight - :- *(4) Project [cs_sold_date_sk#4, cs_sales_price#3, ca_zip#1] - : +- *(4) BroadcastHashJoin [c_current_addr_sk#6], [ca_address_sk#7], Inner, BuildRight, ((substring(ca_zip#1, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) || ca_state#8 IN (CA,WA,GA)) || (cs_sales_price#3 > 500.00)) - : :- *(4) Project [cs_sold_date_sk#4, cs_sales_price#3, c_current_addr_sk#6] - : : +- *(4) BroadcastHashJoin [cs_bill_customer_sk#9], [c_customer_sk#10], Inner, BuildRight - : : :- *(4) Project [cs_sold_date_sk#4, cs_bill_customer_sk#9, cs_sales_price#3] - : : : +- *(4) Filter (isnotnull(cs_bill_customer_sk#9) && isnotnull(cs_sold_date_sk#4)) - : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#9,cs_sales_price#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [c_customer_sk#10, c_current_addr_sk#6] - : : +- *(1) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#6)) - : : +- *(1) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [ca_address_sk#7, ca_state#8, ca_zip#1] - : +- *(2) Filter isnotnull(ca_address_sk#7) - : +- *(2) FileScan parquet default.customer_address[ca_address_sk#7,ca_state#8,ca_zip#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [d_date_sk#5] - +- *(3) Filter ((((isnotnull(d_qoy#11) && isnotnull(d_year#12)) && (d_qoy#11 = 2)) && (d_year#12 = 2001)) && isnotnull(d_date_sk#5)) - +- *(3) FileScan parquet default.date_dim[d_date_sk#5,d_year#12,d_qoy#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (26) ++- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (15) + : +- * BroadcastHashJoin Inner BuildRight (14) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.catalog_sales (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.customer (4) + : +- BroadcastExchange (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.customer_address (10) + +- BroadcastExchange (20) + +- * Project (19) + +- * Filter (18) + +- * ColumnarToRow (17) + +- Scan parquet default.date_dim (16) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] + +(3) Filter [codegen id : 4] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3] +Condition : (isnotnull(cs_bill_customer_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.customer +Output [2]: [c_customer_sk#4, c_current_addr_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_customer_sk#4) AND isnotnull(c_current_addr_sk#5)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#4, c_current_addr_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#4] +Join condition: None + +(9) Project [codegen id : 4] +Output [3]: [cs_sold_date_sk#1, cs_sales_price#3, c_current_addr_sk#5] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_sales_price#3, c_customer_sk#4, c_current_addr_sk#5] + +(10) Scan parquet default.customer_address +Output [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] +Condition : isnotnull(ca_address_sk#7) + +(13) BroadcastExchange +Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#7] +Join condition: ((substr(ca_zip#9, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#8 IN (CA,WA,GA)) OR (cs_sales_price#3 > 500.00)) + +(15) Project [codegen id : 4] +Output [3]: [cs_sold_date_sk#1, cs_sales_price#3, ca_zip#9] +Input [6]: [cs_sold_date_sk#1, cs_sales_price#3, c_current_addr_sk#5, ca_address_sk#7, ca_state#8, ca_zip#9] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] +Condition : ((((isnotnull(d_qoy#13) AND isnotnull(d_year#12)) AND (d_qoy#13 = 2)) AND (d_year#12 = 2001)) AND isnotnull(d_date_sk#11)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#11] +Input [3]: [d_date_sk#11, d_year#12, d_qoy#13] + +(20) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(22) Project [codegen id : 4] +Output [2]: [cs_sales_price#3, ca_zip#9] +Input [4]: [cs_sold_date_sk#1, cs_sales_price#3, ca_zip#9, d_date_sk#11] + +(23) HashAggregate [codegen id : 4] +Input [2]: [cs_sales_price#3, ca_zip#9] +Keys [1]: [ca_zip#9] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [ca_zip#9, sum#16] + +(24) Exchange +Input [2]: [ca_zip#9, sum#16] +Arguments: hashpartitioning(ca_zip#9, 5), true, [id=#17] + +(25) HashAggregate [codegen id : 5] +Input [2]: [ca_zip#9, sum#16] +Keys [1]: [ca_zip#9] +Functions [1]: [sum(UnscaledValue(cs_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#3))#18] +Results [2]: [ca_zip#9, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#18,17,2) AS sum(cs_sales_price)#19] + +(26) TakeOrderedAndProject +Input [2]: [ca_zip#9, sum(cs_sales_price)#19] +Arguments: 100, [ca_zip#9 ASC NULLS FIRST], [ca_zip#9, sum(cs_sales_price)#19] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt index aa1157759..9cac0b46f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q15/simplified.txt @@ -1,34 +1,39 @@ TakeOrderedAndProject [ca_zip,sum(cs_sales_price)] - WholeStageCodegen - HashAggregate [ca_zip,sum,sum(UnscaledValue(cs_sales_price))] [sum,sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price)] + WholeStageCodegen (5) + HashAggregate [ca_zip,sum] [sum(UnscaledValue(cs_sales_price)),sum(cs_sales_price),sum] InputAdapter Exchange [ca_zip] #1 - WholeStageCodegen - HashAggregate [ca_zip,cs_sales_price,sum,sum] [sum,sum] - Project [ca_zip,cs_sales_price] + WholeStageCodegen (4) + HashAggregate [ca_zip,cs_sales_price] [sum,sum] + Project [cs_sales_price,ca_zip] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [ca_zip,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_state,ca_zip,cs_sales_price] - Project [c_current_addr_sk,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk] + Project [cs_sold_date_sk,cs_sales_price,ca_zip] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_zip,ca_state,cs_sales_price] + Project [cs_sold_date_sk,cs_sales_price,c_current_addr_sk] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_sales_price] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] + WholeStageCodegen (1) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [ca_address_sk,ca_state,ca_zip] - Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip] [ca_address_sk,ca_state,ca_zip] + WholeStageCodegen (2) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt index ff12892ff..ea7e29839 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/explain.txt @@ -1,37 +1,235 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) -+- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(cs_ext_ship_cost#4)), sum(UnscaledValue(cs_net_profit#5)), count(distinct cs_order_number#6)]) - +- Exchange SinglePartition - +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5)), partial_count(distinct cs_order_number#6)]) - +- *(7) HashAggregate(keys=[cs_order_number#6], functions=[merge_sum(UnscaledValue(cs_ext_ship_cost#4)), merge_sum(UnscaledValue(cs_net_profit#5))]) - +- Exchange hashpartitioning(cs_order_number#6, 5) - +- *(6) HashAggregate(keys=[cs_order_number#6], functions=[partial_sum(UnscaledValue(cs_ext_ship_cost#4)), partial_sum(UnscaledValue(cs_net_profit#5))]) - +- *(6) Project [cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] - +- *(6) BroadcastHashJoin [cs_call_center_sk#7], [cc_call_center_sk#8], Inner, BuildRight - :- *(6) Project [cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] - : +- *(6) BroadcastHashJoin [cs_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight - : :- *(6) Project [cs_ship_addr_sk#9, cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] - : : +- *(6) BroadcastHashJoin [cs_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight - : : :- *(6) BroadcastHashJoin [cs_order_number#6], [cr_order_number#13], LeftAnti, BuildRight - : : : :- *(6) Project [cs_ship_date_sk#11, cs_ship_addr_sk#9, cs_call_center_sk#7, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] - : : : : +- *(6) BroadcastHashJoin [cs_order_number#6], [cs_order_number#6#14], LeftSemi, BuildRight, NOT (cs_warehouse_sk#15 = cs_warehouse_sk#15#16) - : : : : :- *(6) Project [cs_ship_date_sk#11, cs_ship_addr_sk#9, cs_call_center_sk#7, cs_warehouse_sk#15, cs_order_number#6, cs_ext_ship_cost#4, cs_net_profit#5] - : : : : : +- *(6) Filter ((isnotnull(cs_ship_date_sk#11) && isnotnull(cs_ship_addr_sk#9)) && isnotnull(cs_call_center_sk#7)) - : : : : : +- *(6) FileScan parquet default.catalog_sales[cs_ship_date_sk#11,cs_ship_addr_sk#9,cs_call_center_sk#7,cs_warehouse_sk#15,cs_order_number#6,cs_ext_ship_cost#4,cs_net_profit#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) FileScan parquet default.catalog_returns[cr_order_number#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#12] - : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 2002-02-01)) && (d_date#17 <= 11779)) && isnotnull(d_date_sk#12)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [ca_address_sk#10] - : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = GA)) && isnotnull(ca_address_sk#10)) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [cc_call_center_sk#8] - +- *(5) Filter ((isnotnull(cc_county#19) && (cc_county#19 = Williamson County)) && isnotnull(cc_call_center_sk#8)) - +- *(5) FileScan parquet default.call_center[cc_call_center_sk#8,cc_county#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)], ReadSchema: struct \ No newline at end of file +* Sort (41) ++- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * BroadcastHashJoin LeftAnti BuildRight (13) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin LeftSemi BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.catalog_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Project (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.catalog_sales (4) + : : : +- BroadcastExchange (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.catalog_returns (10) + : : +- BroadcastExchange (18) + : : +- * Project (17) + : : +- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet default.date_dim (14) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.customer_address (21) + +- BroadcastExchange (32) + +- * Project (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet default.call_center (28) + + +(1) Scan parquet default.catalog_sales +Output [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_ship_date_sk), IsNotNull(cs_ship_addr_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(3) Filter [codegen id : 6] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Condition : ((isnotnull(cs_ship_date_sk#1) AND isnotnull(cs_ship_addr_sk#2)) AND isnotnull(cs_call_center_sk#3)) + +(4) Scan parquet default.catalog_sales +Output [2]: [cs_warehouse_sk#4, cs_order_number#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [cs_warehouse_sk#4, cs_order_number#5] + +(6) Project [codegen id : 1] +Output [2]: [cs_warehouse_sk#4 AS cs_warehouse_sk#4#8, cs_order_number#5 AS cs_order_number#5#9] +Input [2]: [cs_warehouse_sk#4, cs_order_number#5] + +(7) BroadcastExchange +Input [2]: [cs_warehouse_sk#4#8, cs_order_number#5#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_order_number#5] +Right keys [1]: [cs_order_number#5#9] +Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#4#8) + +(9) Project [codegen id : 6] +Output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] + +(10) Scan parquet default.catalog_returns +Output [1]: [cr_order_number#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [1]: [cr_order_number#11] + +(12) BroadcastExchange +Input [1]: [cr_order_number#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(13) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_order_number#5] +Right keys [1]: [cr_order_number#11] +Join condition: None + +(14) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2002-02-01), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#13, d_date#14] + +(16) Filter [codegen id : 3] +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 11719)) AND (d_date#14 <= 11779)) AND isnotnull(d_date_sk#13)) + +(17) Project [codegen id : 3] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_date#14] + +(18) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(19) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_ship_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(20) Project [codegen id : 6] +Output [5]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#13] + +(21) Scan parquet default.customer_address +Output [2]: [ca_address_sk#16, ca_state#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_state#17] + +(23) Filter [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = GA)) AND isnotnull(ca_address_sk#16)) + +(24) Project [codegen id : 4] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_state#17] + +(25) BroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#16] +Join condition: None + +(27) Project [codegen id : 6] +Output [4]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [6]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#16] + +(28) Scan parquet default.call_center +Output [2]: [cc_call_center_sk#19, cc_county#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 5] +Input [2]: [cc_call_center_sk#19, cc_county#20] + +(30) Filter [codegen id : 5] +Input [2]: [cc_call_center_sk#19, cc_county#20] +Condition : ((isnotnull(cc_county#20) AND (cc_county#20 = Williamson County)) AND isnotnull(cc_call_center_sk#19)) + +(31) Project [codegen id : 5] +Output [1]: [cc_call_center_sk#19] +Input [2]: [cc_call_center_sk#19, cc_county#20] + +(32) BroadcastExchange +Input [1]: [cc_call_center_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_call_center_sk#3] +Right keys [1]: [cc_call_center_sk#19] +Join condition: None + +(34) Project [codegen id : 6] +Output [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Input [5]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#19] + +(35) HashAggregate [codegen id : 6] +Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] +Keys [1]: [cs_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23] +Results [3]: [cs_order_number#5, sum#24, sum#25] + +(36) Exchange +Input [3]: [cs_order_number#5, sum#24, sum#25] +Arguments: hashpartitioning(cs_order_number#5, 5), true, [id=#26] + +(37) HashAggregate [codegen id : 7] +Input [3]: [cs_order_number#5, sum#24, sum#25] +Keys [1]: [cs_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23] +Results [3]: [cs_order_number#5, sum#24, sum#25] + +(38) HashAggregate [codegen id : 7] +Input [3]: [cs_order_number#5, sum#24, sum#25] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27] +Results [3]: [sum#24, sum#25, count#28] + +(39) Exchange +Input [3]: [sum#24, sum#25, count#28] +Arguments: SinglePartition, true, [id=#29] + +(40) HashAggregate [codegen id : 8] +Input [3]: [sum#24, sum#25, count#28] +Keys: [] +Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27] +Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #32] + +(41) Sort [codegen id : 8] +Input [3]: [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt index fe14e6333..169f07c2d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q16/simplified.txt @@ -1,51 +1,62 @@ -TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] - WholeStageCodegen - HashAggregate [count,count(cs_order_number),sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [count,count(cs_order_number),order count ,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),total net profit ,total shipping cost ] +WholeStageCodegen (8) + Sort [order count ] + HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [count,count(cs_order_number),cs_order_number,sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [count,count,count(cs_order_number),sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] - HashAggregate [cs_order_number,sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] + WholeStageCodegen (7) + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),sum,sum,count,sum,sum,count] + HashAggregate [cs_order_number] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] InputAdapter Exchange [cs_order_number] #2 - WholeStageCodegen - HashAggregate [cs_ext_ship_cost,cs_net_profit,cs_order_number,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] [sum,sum,sum,sum,sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit))] - Project [cs_ext_ship_cost,cs_net_profit,cs_order_number] - BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] - Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number] - BroadcastHashJoin [ca_address_sk,cs_ship_addr_sk] - Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk] + WholeStageCodegen (6) + HashAggregate [cs_order_number,cs_ext_ship_cost,cs_net_profit] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),sum,sum,sum,sum] + Project [cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] + BroadcastHashJoin [cs_ship_addr_sk,ca_address_sk] + Project [cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - BroadcastHashJoin [cr_order_number,cs_order_number] - Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk] + BroadcastHashJoin [cs_order_number,cr_order_number] + Project [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] BroadcastHashJoin [cs_order_number,cs_order_number,cs_warehouse_sk,cs_warehouse_sk] - Project [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] - Filter [cs_call_center_sk,cs_ship_addr_sk,cs_ship_date_sk] - Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] [cs_call_center_sk,cs_ext_ship_cost,cs_net_profit,cs_order_number,cs_ship_addr_sk,cs_ship_date_sk,cs_warehouse_sk] + Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [cs_order_number,cs_warehouse_sk] - Scan parquet default.catalog_sales [cs_order_number,cs_warehouse_sk] [cs_order_number,cs_warehouse_sk] + WholeStageCodegen (1) + Project [cs_warehouse_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_warehouse_sk,cs_order_number] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Scan parquet default.catalog_returns [cr_order_number] [cr_order_number] + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_order_number] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (4) Project [ca_address_sk] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (5) Project [cc_call_center_sk] - Filter [cc_call_center_sk,cc_county] - Scan parquet default.call_center [cc_call_center_sk,cc_county] [cc_call_center_sk,cc_county] + Filter [cc_county,cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_county] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt index 8c8502f7f..4085b4ab9 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/explain.txt @@ -1,50 +1,269 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_state#3 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_state#3,store_sales_quantitycount#4,store_sales_quantityave#5,store_sales_quantitystdev#6,store_sales_quantitycov#7,as_store_returns_quantitycount#8,as_store_returns_quantityave#9,as_store_returns_quantitystdev#10,store_returns_quantitycov#11,catalog_sales_quantitycount#12,catalog_sales_quantityave#13,catalog_sales_quantitystdev#14,catalog_sales_quantitycov#15]) -+- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[count(ss_quantity#16), avg(cast(ss_quantity#16 as bigint)), stddev_samp(cast(ss_quantity#16 as double)), count(sr_return_quantity#17), avg(cast(sr_return_quantity#17 as bigint)), stddev_samp(cast(sr_return_quantity#17 as double)), count(cs_quantity#18), avg(cast(cs_quantity#18 as bigint)), stddev_samp(cast(cs_quantity#18 as double))]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_state#3, 5) - +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_state#3], functions=[partial_count(ss_quantity#16), partial_avg(cast(ss_quantity#16 as bigint)), partial_stddev_samp(cast(ss_quantity#16 as double)), partial_count(sr_return_quantity#17), partial_avg(cast(sr_return_quantity#17 as bigint)), partial_stddev_samp(cast(sr_return_quantity#17 as double)), partial_count(cs_quantity#18), partial_avg(cast(cs_quantity#18 as bigint)), partial_stddev_samp(cast(cs_quantity#18 as double))]) - +- *(8) Project [ss_quantity#16, sr_return_quantity#17, cs_quantity#18, s_state#3, i_item_id#1, i_item_desc#2] - +- *(8) BroadcastHashJoin [ss_item_sk#19], [i_item_sk#20], Inner, BuildRight - :- *(8) Project [ss_item_sk#19, ss_quantity#16, sr_return_quantity#17, cs_quantity#18, s_state#3] - : +- *(8) BroadcastHashJoin [ss_store_sk#21], [s_store_sk#22], Inner, BuildRight - : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_return_quantity#17, cs_quantity#18] - : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight - : : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] - : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight - : : : :- *(8) Project [ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] - : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#27], [d_date_sk#28], Inner, BuildRight - : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_return_quantity#17, cs_sold_date_sk#23, cs_quantity#18] - : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#29, sr_item_sk#30], [cast(cs_bill_customer_sk#31 as bigint), cast(cs_item_sk#32 as bigint)], Inner, BuildRight - : : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_store_sk#21, ss_quantity#16, sr_returned_date_sk#25, sr_item_sk#30, sr_customer_sk#29, sr_return_quantity#17] - : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#33 as bigint), cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#34 as bigint)], [sr_customer_sk#29, sr_item_sk#30, sr_ticket_number#35], Inner, BuildRight - : : : : : : :- *(8) Project [ss_sold_date_sk#27, ss_item_sk#19, ss_customer_sk#33, ss_store_sk#21, ss_ticket_number#34, ss_quantity#16] - : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#34) && isnotnull(ss_item_sk#19)) && isnotnull(ss_customer_sk#33)) && isnotnull(ss_sold_date_sk#27)) && isnotnull(ss_store_sk#21)) - : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#27,ss_item_sk#19,ss_customer_sk#33,ss_store_sk#21,ss_ticket_number#34,ss_quantity#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(3) Project [d_date_sk#28] - : : : : +- *(3) Filter ((isnotnull(d_quarter_name#36) && (d_quarter_name#36 = 2001Q1)) && isnotnull(d_date_sk#28)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#28,d_quarter_name#36] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [d_date_sk#26] - : : : +- *(4) Filter (d_quarter_name#37 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#26)) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#26,d_quarter_name#37] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [d_date_sk#24] - : : +- *(5) Filter (d_quarter_name#38 IN (2001Q1,2001Q2,2001Q3) && isnotnull(d_date_sk#24)) - : : +- *(5) FileScan parquet default.date_dim[d_date_sk#24,d_quarter_name#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [s_store_sk#22, s_state#3] - : +- *(6) Filter isnotnull(s_store_sk#22) - : +- *(6) FileScan parquet default.store[s_store_sk#22,s_state#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [i_item_sk#20, i_item_id#1, i_item_desc#2] - +- *(7) Filter isnotnull(i_item_sk#20) - +- *(7) FileScan parquet default.item[i_item_sk#20,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (48) ++- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet default.date_dim (23) + : : +- ReusedExchange (30) + : +- BroadcastExchange (36) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet default.store (33) + +- BroadcastExchange (42) + +- * Filter (41) + +- * ColumnarToRow (40) + +- Scan parquet default.item (39) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Condition : ((((isnotnull(ss_customer_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) + +(4) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Condition : (((isnotnull(sr_customer_sk#9) AND isnotnull(sr_item_sk#8)) AND isnotnull(sr_ticket_number#10)) AND isnotnull(sr_returned_date_sk#7)) + +(7) BroadcastExchange +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Arguments: HashedRelationBroadcastMode(List(input[2, bigint, false], input[1, bigint, false], input[3, bigint, false]),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10] +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11] +Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] + +(10) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] + +(12) Filter [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Condition : ((isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) AND isnotnull(cs_sold_date_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#17] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] +Right keys [2]: [cast(cs_bill_customer_sk#14 as bigint), cast(cs_item_sk#15 as bigint)] +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#18, d_quarter_name#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#18, d_quarter_name#19] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#18, d_quarter_name#19] +Condition : ((isnotnull(d_quarter_name#19) AND (d_quarter_name#19 = 2001Q1)) AND isnotnull(d_date_sk#18)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_quarter_name#19] + +(20) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#18] + +(23) Scan parquet default.date_dim +Output [2]: [d_date_sk#21, d_quarter_name#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#21, d_quarter_name#22] + +(25) Filter [codegen id : 4] +Input [2]: [d_date_sk#21, d_quarter_name#22] +Condition : (d_quarter_name#22 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#21)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_quarter_name#22] + +(27) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#7] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#21] + +(30) ReusedExchange [Reuses operator id: 27] +Output [1]: [d_date_sk#24] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#24] +Join condition: None + +(32) Project [codegen id : 8] +Output [5]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#24] + +(33) Scan parquet default.store +Output [2]: [s_store_sk#25, s_state#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 6] +Input [2]: [s_store_sk#25, s_state#26] + +(35) Filter [codegen id : 6] +Input [2]: [s_store_sk#25, s_state#26] +Condition : isnotnull(s_store_sk#25) + +(36) BroadcastExchange +Input [2]: [s_store_sk#25, s_state#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(37) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#25] +Join condition: None + +(38) Project [codegen id : 8] +Output [5]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_sk#25, s_state#26] + +(39) Scan parquet default.item +Output [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] + +(41) Filter [codegen id : 7] +Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] +Condition : isnotnull(i_item_sk#28) + +(42) BroadcastExchange +Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] + +(43) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#28] +Join condition: None + +(44) Project [codegen id : 8] +Output [6]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26, i_item_id#29, i_item_desc#30] +Input [8]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26, i_item_sk#28, i_item_id#29, i_item_desc#30] + +(45) HashAggregate [codegen id : 8] +Input [6]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_state#26, i_item_id#29, i_item_desc#30] +Keys [3]: [i_item_id#29, i_item_desc#30, s_state#26] +Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#11), partial_avg(cast(sr_return_quantity#11 as bigint)), partial_stddev_samp(cast(sr_return_quantity#11 as double)), partial_count(cs_quantity#16), partial_avg(cast(cs_quantity#16 as bigint)), partial_stddev_samp(cast(cs_quantity#16 as double))] +Aggregate Attributes [18]: [count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43, count#44, sum#45, count#46, n#47, avg#48, m2#49] +Results [21]: [i_item_id#29, i_item_desc#30, s_state#26, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61, count#62, sum#63, count#64, n#65, avg#66, m2#67] + +(46) Exchange +Input [21]: [i_item_id#29, i_item_desc#30, s_state#26, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61, count#62, sum#63, count#64, n#65, avg#66, m2#67] +Arguments: hashpartitioning(i_item_id#29, i_item_desc#30, s_state#26, 5), true, [id=#68] + +(47) HashAggregate [codegen id : 9] +Input [21]: [i_item_id#29, i_item_desc#30, s_state#26, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61, count#62, sum#63, count#64, n#65, avg#66, m2#67] +Keys [3]: [i_item_id#29, i_item_desc#30, s_state#26] +Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#11), avg(cast(sr_return_quantity#11 as bigint)), stddev_samp(cast(sr_return_quantity#11 as double)), count(cs_quantity#16), avg(cast(cs_quantity#16 as bigint)), stddev_samp(cast(cs_quantity#16 as double))] +Aggregate Attributes [9]: [count(ss_quantity#6)#69, avg(cast(ss_quantity#6 as bigint))#70, stddev_samp(cast(ss_quantity#6 as double))#71, count(sr_return_quantity#11)#72, avg(cast(sr_return_quantity#11 as bigint))#73, stddev_samp(cast(sr_return_quantity#11 as double))#74, count(cs_quantity#16)#75, avg(cast(cs_quantity#16 as bigint))#76, stddev_samp(cast(cs_quantity#16 as double))#77] +Results [15]: [i_item_id#29, i_item_desc#30, s_state#26, count(ss_quantity#6)#69 AS store_sales_quantitycount#78, avg(cast(ss_quantity#6 as bigint))#70 AS store_sales_quantityave#79, stddev_samp(cast(ss_quantity#6 as double))#71 AS store_sales_quantitystdev#80, (stddev_samp(cast(ss_quantity#6 as double))#71 / avg(cast(ss_quantity#6 as bigint))#70) AS store_sales_quantitycov#81, count(sr_return_quantity#11)#72 AS as_store_returns_quantitycount#82, avg(cast(sr_return_quantity#11 as bigint))#73 AS as_store_returns_quantityave#83, stddev_samp(cast(sr_return_quantity#11 as double))#74 AS as_store_returns_quantitystdev#84, (stddev_samp(cast(sr_return_quantity#11 as double))#74 / avg(cast(sr_return_quantity#11 as bigint))#73) AS store_returns_quantitycov#85, count(cs_quantity#16)#75 AS catalog_sales_quantitycount#86, avg(cast(cs_quantity#16 as bigint))#76 AS catalog_sales_quantityave#87, (stddev_samp(cast(cs_quantity#16 as double))#77 / avg(cast(cs_quantity#16 as bigint))#76) AS catalog_sales_quantitystdev#88, (stddev_samp(cast(cs_quantity#16 as double))#77 / avg(cast(cs_quantity#16 as bigint))#76) AS catalog_sales_quantitycov#89] + +(48) TakeOrderedAndProject +Input [15]: [i_item_id#29, i_item_desc#30, s_state#26, store_sales_quantitycount#78, store_sales_quantityave#79, store_sales_quantitystdev#80, store_sales_quantitycov#81, as_store_returns_quantitycount#82, as_store_returns_quantityave#83, as_store_returns_quantitystdev#84, store_returns_quantitycov#85, catalog_sales_quantitycount#86, catalog_sales_quantityave#87, catalog_sales_quantitystdev#88, catalog_sales_quantitycov#89] +Arguments: 100, [i_item_id#29 ASC NULLS FIRST, i_item_desc#30 ASC NULLS FIRST, s_state#26 ASC NULLS FIRST], [i_item_id#29, i_item_desc#30, s_state#26, store_sales_quantitycount#78, store_sales_quantityave#79, store_sales_quantitystdev#80, store_sales_quantitycov#81, as_store_returns_quantitycount#82, as_store_returns_quantityave#83, as_store_returns_quantitystdev#84, store_returns_quantitycov#85, catalog_sales_quantitycount#86, catalog_sales_quantityave#87, catalog_sales_quantitystdev#88, catalog_sales_quantitycov#89] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt index 780e633e2..e9b95747c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q17/simplified.txt @@ -1,66 +1,71 @@ -TakeOrderedAndProject [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,i_item_desc,i_item_id,s_state,store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev] - WholeStageCodegen - HashAggregate [avg,avg,avg,avg(cast(cs_quantity as bigint)),avg(cast(sr_return_quantity as bigint)),avg(cast(ss_quantity as bigint)),count,count,count,count,count,count,count(cs_quantity),count(sr_return_quantity),count(ss_quantity),i_item_desc,i_item_id,m2,m2,m2,n,n,n,s_state,stddev_samp(cast(cs_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),stddev_samp(cast(ss_quantity as double)),sum,sum,sum] [as_store_returns_quantityave,as_store_returns_quantitycount,as_store_returns_quantitystdev,avg,avg,avg,avg(cast(cs_quantity as bigint)),avg(cast(sr_return_quantity as bigint)),avg(cast(ss_quantity as bigint)),catalog_sales_quantityave,catalog_sales_quantitycount,catalog_sales_quantitycov,catalog_sales_quantitystdev,count,count,count,count,count,count,count(cs_quantity),count(sr_return_quantity),count(ss_quantity),m2,m2,m2,n,n,n,stddev_samp(cast(cs_quantity as double)),stddev_samp(cast(sr_return_quantity as double)),stddev_samp(cast(ss_quantity as double)),store_returns_quantitycov,store_sales_quantityave,store_sales_quantitycount,store_sales_quantitycov,store_sales_quantitystdev,sum,sum,sum] +TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov] + WholeStageCodegen (9) + HashAggregate [i_item_id,i_item_desc,s_state,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] [count(ss_quantity),avg(cast(ss_quantity as bigint)),stddev_samp(cast(ss_quantity as double)),count(sr_return_quantity),avg(cast(sr_return_quantity as bigint)),stddev_samp(cast(sr_return_quantity as double)),count(cs_quantity),avg(cast(cs_quantity as bigint)),stddev_samp(cast(cs_quantity as double)),store_sales_quantitycount,store_sales_quantityave,store_sales_quantitystdev,store_sales_quantitycov,as_store_returns_quantitycount,as_store_returns_quantityave,as_store_returns_quantitystdev,store_returns_quantitycov,catalog_sales_quantitycount,catalog_sales_quantityave,catalog_sales_quantitystdev,catalog_sales_quantitycov,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] InputAdapter - Exchange [i_item_desc,i_item_id,s_state] #1 - WholeStageCodegen - HashAggregate [avg,avg,avg,avg,avg,avg,count,count,count,count,count,count,count,count,count,count,count,count,cs_quantity,i_item_desc,i_item_id,m2,m2,m2,m2,m2,m2,n,n,n,n,n,n,s_state,sr_return_quantity,ss_quantity,sum,sum,sum,sum,sum,sum] [avg,avg,avg,avg,avg,avg,count,count,count,count,count,count,count,count,count,count,count,count,m2,m2,m2,m2,m2,m2,n,n,n,n,n,n,sum,sum,sum,sum,sum,sum] - Project [cs_quantity,i_item_desc,i_item_id,s_state,sr_return_quantity,ss_quantity] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [cs_quantity,s_state,sr_return_quantity,ss_item_sk,ss_quantity] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [cs_quantity,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] + Exchange [i_item_id,i_item_desc,s_state] #1 + WholeStageCodegen (8) + HashAggregate [i_item_id,i_item_desc,s_state,ss_quantity,sr_return_quantity,cs_quantity] [count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2] + Project [ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_id,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] - Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_sold_date_sk,cs_quantity] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_return_quantity,cs_sold_date_sk,cs_quantity] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_return_quantity,cs_sold_date_sk,cs_quantity] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] - Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] - Filter [d_date_sk,d_quarter_name] - Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] + Filter [d_quarter_name,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_quarter_name] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) Project [d_date_sk] - Filter [d_date_sk,d_quarter_name] - Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] + Filter [d_quarter_name,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_quarter_name] InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_quarter_name] - Scan parquet default.date_dim [d_date_sk,d_quarter_name] [d_date_sk,d_quarter_name] + ReusedExchange [d_date_sk] #5 InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [s_state,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_state] InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [i_item_desc,i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt index c13386440..3b213efa6 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/explain.txt @@ -1,45 +1,264 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ca_country#1 ASC NULLS FIRST,ca_state#2 ASC NULLS FIRST,ca_county#3 ASC NULLS FIRST,i_item_id#4 ASC NULLS FIRST], output=[i_item_id#4,ca_country#1,ca_state#2,ca_county#3,agg1#5,agg2#6,agg3#7,agg4#8,agg5#9,agg6#10,agg7#11]) -+- *(8) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[avg(cast(cs_quantity#13 as decimal(12,2))), avg(cast(cs_list_price#14 as decimal(12,2))), avg(cast(cs_coupon_amt#15 as decimal(12,2))), avg(cast(cs_sales_price#16 as decimal(12,2))), avg(cast(cs_net_profit#17 as decimal(12,2))), avg(cast(c_birth_year#18 as decimal(12,2))), avg(cast(cd_dep_count#19 as decimal(12,2)))]) - +- Exchange hashpartitioning(i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12, 5) - +- *(7) HashAggregate(keys=[i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12], functions=[partial_avg(cast(cs_quantity#13 as decimal(12,2))), partial_avg(cast(cs_list_price#14 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#15 as decimal(12,2))), partial_avg(cast(cs_sales_price#16 as decimal(12,2))), partial_avg(cast(cs_net_profit#17 as decimal(12,2))), partial_avg(cast(c_birth_year#18 as decimal(12,2))), partial_avg(cast(cd_dep_count#19 as decimal(12,2)))]) - +- *(7) Expand [List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, ca_county#23, 0), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, ca_state#22, null, 1), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, ca_country#21, null, null, 3), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#20, null, null, null, 7), List(cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, null, null, null, null, 15)], [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#4, ca_country#1, ca_state#2, ca_county#3, spark_grouping_id#12] - +- *(7) Project [cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, i_item_id#24 AS i_item_id#20, ca_country#25 AS ca_country#21, ca_state#26 AS ca_state#22, ca_county#27 AS ca_county#23] - +- *(7) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#29], Inner, BuildRight - :- *(7) Project [cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, ca_county#27, ca_state#26, ca_country#25] - : +- *(7) BroadcastHashJoin [cs_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight - : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_birth_year#18, ca_county#27, ca_state#26, ca_country#25] - : : +- *(7) BroadcastHashJoin [c_current_addr_sk#32], [ca_address_sk#33], Inner, BuildRight - : : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_current_addr_sk#32, c_birth_year#18] - : : : +- *(7) BroadcastHashJoin [c_current_cdemo_sk#34], [cd_demo_sk#35], Inner, BuildRight - : : : :- *(7) Project [cs_sold_date_sk#30, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19, c_current_cdemo_sk#34, c_current_addr_sk#32, c_birth_year#18] - : : : : +- *(7) BroadcastHashJoin [cs_bill_customer_sk#36], [c_customer_sk#37], Inner, BuildRight - : : : : :- *(7) Project [cs_sold_date_sk#30, cs_bill_customer_sk#36, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17, cd_dep_count#19] - : : : : : +- *(7) BroadcastHashJoin [cs_bill_cdemo_sk#38], [cd_demo_sk#39], Inner, BuildRight - : : : : : :- *(7) Project [cs_sold_date_sk#30, cs_bill_customer_sk#36, cs_bill_cdemo_sk#38, cs_item_sk#28, cs_quantity#13, cs_list_price#14, cs_sales_price#16, cs_coupon_amt#15, cs_net_profit#17] - : : : : : : +- *(7) Filter (((isnotnull(cs_bill_cdemo_sk#38) && isnotnull(cs_bill_customer_sk#36)) && isnotnull(cs_sold_date_sk#30)) && isnotnull(cs_item_sk#28)) - : : : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#30,cs_bill_customer_sk#36,cs_bill_cdemo_sk#38,cs_item_sk#28,cs_quantity#13,cs_list_price#14,cs_sales_price#16,cs_coupon_amt#15,cs_net_profit#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNu..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [c_customer_sk#37, c_current_cdemo_sk#34, c_current_addr_sk#32, c_birth_year#18] - : : : : +- *(2) Filter (((c_birth_month#42 IN (1,6,8,9,12,2) && isnotnull(c_customer_sk#37)) && isnotnull(c_current_cdemo_sk#34)) && isnotnull(c_current_addr_sk#32)) - : : : : +- *(2) FileScan parquet default.customer[c_customer_sk#37,c_current_cdemo_sk#34,c_current_addr_sk#32,c_birth_month#42,c_birth_year#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNo..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [ca_address_sk#33, ca_county#27, ca_state#26, ca_country#25] - : : +- *(4) Filter (ca_state#26 IN (MS,IN,ND,OK,NM,VA) && isnotnull(ca_address_sk#33)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#33,ca_county#27,ca_state#26,ca_country#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(5) Project [d_date_sk#31] - : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 1998)) && isnotnull(d_date_sk#31)) - : +- *(5) FileScan parquet default.date_dim[d_date_sk#31,d_year#43] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(6) Project [i_item_sk#29, i_item_id#24] - +- *(6) Filter isnotnull(i_item_sk#29) - +- *(6) FileScan parquet default.item[i_item_sk#29,i_item_id#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Expand (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (23) + : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.customer_demographics (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.customer (11) + : : : +- BroadcastExchange (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.customer_demographics (18) + : : +- BroadcastExchange (27) + : : +- * Filter (26) + : : +- * ColumnarToRow (25) + : : +- Scan parquet default.customer_address (24) + : +- BroadcastExchange (34) + : +- * Project (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet default.date_dim (30) + +- BroadcastExchange (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.item (37) + + +(1) Scan parquet default.catalog_sales +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] + +(3) Filter [codegen id : 7] +Input [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9] +Condition : (((isnotnull(cs_bill_cdemo_sk#3) AND isnotnull(cs_bill_customer_sk#2)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_education_status,Unknown), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] +Condition : ((((isnotnull(cd_gender#11) AND isnotnull(cd_education_status#12)) AND (cd_gender#11 = F)) AND (cd_education_status#12 = Unknown)) AND isnotnull(cd_demo_sk#10)) + +(7) Project [codegen id : 1] +Output [2]: [cd_demo_sk#10, cd_dep_count#13] +Input [4]: [cd_demo_sk#10, cd_gender#11, cd_education_status#12, cd_dep_count#13] + +(8) BroadcastExchange +Input [2]: [cd_demo_sk#10, cd_dep_count#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(10) Project [codegen id : 7] +Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13] +Input [11]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_demo_sk#10, cd_dep_count#13] + +(11) Scan parquet default.customer +Output [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] + +(13) Filter [codegen id : 2] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] +Condition : (((c_birth_month#18 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#15)) AND isnotnull(c_current_cdemo_sk#16)) AND isnotnull(c_current_addr_sk#17)) + +(14) Project [codegen id : 2] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] + +(15) BroadcastExchange +Input [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#15] +Join condition: None + +(17) Project [codegen id : 7] +Output [11]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] + +(18) Scan parquet default.customer_demographics +Output [1]: [cd_demo_sk#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [cd_demo_sk#21] + +(20) Filter [codegen id : 3] +Input [1]: [cd_demo_sk#21] +Condition : isnotnull(cd_demo_sk#21) + +(21) BroadcastExchange +Input [1]: [cd_demo_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(22) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#21] +Join condition: None + +(23) Project [codegen id : 7] +Output [10]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#21] + +(24) Scan parquet default.customer_address +Output [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] + +(26) Filter [codegen id : 4] +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Condition : (ca_state#25 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#23)) + +(27) BroadcastExchange +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#23] +Join condition: None + +(29) Project [codegen id : 7] +Output [12]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26] +Input [14]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] + +(30) Scan parquet default.date_dim +Output [2]: [d_date_sk#28, d_year#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#28, d_year#29] + +(32) Filter [codegen id : 5] +Input [2]: [d_date_sk#28, d_year#29] +Condition : ((isnotnull(d_year#29) AND (d_year#29 = 1998)) AND isnotnull(d_date_sk#28)) + +(33) Project [codegen id : 5] +Output [1]: [d_date_sk#28] +Input [2]: [d_date_sk#28, d_year#29] + +(34) BroadcastExchange +Input [1]: [d_date_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(35) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#28] +Join condition: None + +(36) Project [codegen id : 7] +Output [11]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26] +Input [13]: [cs_sold_date_sk#1, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26, d_date_sk#28] + +(37) Scan parquet default.item +Output [2]: [i_item_sk#31, i_item_id#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#31, i_item_id#32] + +(39) Filter [codegen id : 6] +Input [2]: [i_item_sk#31, i_item_id#32] +Condition : isnotnull(i_item_sk#31) + +(40) BroadcastExchange +Input [2]: [i_item_sk#31, i_item_id#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cs_item_sk#4] +Right keys [1]: [i_item_sk#31] +Join condition: None + +(42) Project [codegen id : 7] +Output [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, ca_county#24] +Input [13]: [cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, ca_county#24, ca_state#25, ca_country#26, i_item_sk#31, i_item_id#32] + +(43) Expand [codegen id : 7] +Input [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, ca_county#24] +Arguments: [List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, ca_county#24, 0), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, ca_state#25, null, 1), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, ca_country#26, null, null, 3), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#32, null, null, null, 7), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, null, null, null, null, 15)], [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] + +(44) HashAggregate [codegen id : 7] +Input [12]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#19, i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] +Keys [5]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] +Functions [7]: [partial_avg(cast(cs_quantity#5 as decimal(12,2))), partial_avg(cast(cs_list_price#6 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#8 as decimal(12,2))), partial_avg(cast(cs_sales_price#7 as decimal(12,2))), partial_avg(cast(cs_net_profit#9 as decimal(12,2))), partial_avg(cast(c_birth_year#19 as decimal(12,2))), partial_avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [14]: [sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48, sum#49, count#50, sum#51, count#52] +Results [19]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66] + +(45) Exchange +Input [19]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66] +Arguments: hashpartitioning(i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, 5), true, [id=#67] + +(46) HashAggregate [codegen id : 8] +Input [19]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66] +Keys [5]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, spark_grouping_id#38] +Functions [7]: [avg(cast(cs_quantity#5 as decimal(12,2))), avg(cast(cs_list_price#6 as decimal(12,2))), avg(cast(cs_coupon_amt#8 as decimal(12,2))), avg(cast(cs_sales_price#7 as decimal(12,2))), avg(cast(cs_net_profit#9 as decimal(12,2))), avg(cast(c_birth_year#19 as decimal(12,2))), avg(cast(cd_dep_count#13 as decimal(12,2)))] +Aggregate Attributes [7]: [avg(cast(cs_quantity#5 as decimal(12,2)))#68, avg(cast(cs_list_price#6 as decimal(12,2)))#69, avg(cast(cs_coupon_amt#8 as decimal(12,2)))#70, avg(cast(cs_sales_price#7 as decimal(12,2)))#71, avg(cast(cs_net_profit#9 as decimal(12,2)))#72, avg(cast(c_birth_year#19 as decimal(12,2)))#73, avg(cast(cd_dep_count#13 as decimal(12,2)))#74] +Results [11]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, avg(cast(cs_quantity#5 as decimal(12,2)))#68 AS agg1#75, avg(cast(cs_list_price#6 as decimal(12,2)))#69 AS agg2#76, avg(cast(cs_coupon_amt#8 as decimal(12,2)))#70 AS agg3#77, avg(cast(cs_sales_price#7 as decimal(12,2)))#71 AS agg4#78, avg(cast(cs_net_profit#9 as decimal(12,2)))#72 AS agg5#79, avg(cast(c_birth_year#19 as decimal(12,2)))#73 AS agg6#80, avg(cast(cd_dep_count#13 as decimal(12,2)))#74 AS agg7#81] + +(47) TakeOrderedAndProject +Input [11]: [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, agg1#75, agg2#76, agg3#77, agg4#78, agg5#79, agg6#80, agg7#81] +Arguments: 100, [ca_country#35 ASC NULLS FIRST, ca_state#36 ASC NULLS FIRST, ca_county#37 ASC NULLS FIRST, i_item_id#34 ASC NULLS FIRST], [i_item_id#34, ca_country#35, ca_state#36, ca_county#37, agg1#75, agg2#76, agg3#77, agg4#78, agg5#79, agg6#80, agg7#81] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt index 5a4e9d503..0d4c8cb7f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q18/simplified.txt @@ -1,59 +1,69 @@ -TakeOrderedAndProject [agg1,agg2,agg3,agg4,agg5,agg6,agg7,ca_country,ca_county,ca_state,i_item_id] - WholeStageCodegen - HashAggregate [avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),ca_country,ca_county,ca_state,count,count,count,count,count,count,count,i_item_id,spark_grouping_id,sum,sum,sum,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,agg5,agg6,agg7,avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum] +TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,agg4,agg5,agg6,agg7] + WholeStageCodegen (8) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(cast(cs_quantity as decimal(12,2))),avg(cast(cs_list_price as decimal(12,2))),avg(cast(cs_coupon_amt as decimal(12,2))),avg(cast(cs_sales_price as decimal(12,2))),avg(cast(cs_net_profit as decimal(12,2))),avg(cast(c_birth_year as decimal(12,2))),avg(cast(cd_dep_count as decimal(12,2))),agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] InputAdapter - Exchange [ca_country,ca_county,ca_state,i_item_id,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id,spark_grouping_id,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Expand [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] - Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,i_item_id] + Exchange [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id] #1 + WholeStageCodegen (7) + HashAggregate [i_item_id,ca_country,ca_state,ca_county,spark_grouping_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price,cs_net_profit,c_birth_year,cd_dep_count] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Expand [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county] + Project [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,i_item_id,ca_country,ca_state,ca_county] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price] + Project [cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [c_birth_year,ca_country,ca_county,ca_state,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_birth_year,ca_county,ca_state,ca_country] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_birth_year,c_current_addr_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_current_addr_sk,c_birth_year] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,cd_dep_count,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [cd_dep_count,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] - Project [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] - Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_net_profit,cs_quantity,cs_sales_price,cs_sold_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cd_dep_count] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_sold_date_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [cd_demo_sk,cd_dep_count] - Filter [cd_demo_sk,cd_education_status,cd_gender] - Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] [cd_demo_sk,cd_dep_count,cd_education_status,cd_gender] + Filter [cd_gender,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] - Filter [c_birth_month,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] - Scan parquet default.customer [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] [c_birth_month,c_birth_year,c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + WholeStageCodegen (2) + Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year] + Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [cd_demo_sk] - Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk] [cd_demo_sk] + WholeStageCodegen (3) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [ca_address_sk,ca_country,ca_county,ca_state] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_country,ca_county,ca_state] [ca_address_sk,ca_country,ca_county,ca_state] + WholeStageCodegen (4) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (5) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt index 00e737c79..cb75374a8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/explain.txt @@ -1,38 +1,221 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand#2 ASC NULLS FIRST,brand_id#3 ASC NULLS FIRST,i_manufact_id#4 ASC NULLS FIRST,i_manufact#5 ASC NULLS FIRST], output=[brand_id#3,brand#2,i_manufact_id#4,i_manufact#5,ext_price#1]) -+- *(7) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[sum(UnscaledValue(ss_ext_sales_price#8))]) - +- Exchange hashpartitioning(i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5, 5) - +- *(6) HashAggregate(keys=[i_brand#6, i_brand_id#7, i_manufact_id#4, i_manufact#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#8))]) - +- *(6) Project [ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5] - +- *(6) BroadcastHashJoin [ss_store_sk#9], [s_store_sk#10], Inner, BuildRight, NOT (substring(ca_zip#11, 1, 5) = substring(s_zip#12, 1, 5)) - :- *(6) Project [ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5, ca_zip#11] - : +- *(6) BroadcastHashJoin [c_current_addr_sk#13], [ca_address_sk#14], Inner, BuildRight - : :- *(6) Project [ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5, c_current_addr_sk#13] - : : +- *(6) BroadcastHashJoin [ss_customer_sk#15], [c_customer_sk#16], Inner, BuildRight - : : :- *(6) Project [ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8, i_brand_id#7, i_brand#6, i_manufact_id#4, i_manufact#5] - : : : +- *(6) BroadcastHashJoin [ss_item_sk#17], [i_item_sk#18], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#17, ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8] - : : : : +- *(6) BroadcastHashJoin [d_date_sk#19], [ss_sold_date_sk#20], Inner, BuildRight - : : : : :- *(6) Project [d_date_sk#19] - : : : : : +- *(6) Filter ((((isnotnull(d_moy#21) && isnotnull(d_year#22)) && (d_moy#21 = 11)) && (d_year#22 = 1998)) && isnotnull(d_date_sk#19)) - : : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#19,d_year#22,d_moy#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [ss_sold_date_sk#20, ss_item_sk#17, ss_customer_sk#15, ss_store_sk#9, ss_ext_sales_price#8] - : : : : +- *(1) Filter (((isnotnull(ss_sold_date_sk#20) && isnotnull(ss_item_sk#17)) && isnotnull(ss_customer_sk#15)) && isnotnull(ss_store_sk#9)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_item_sk#17,ss_customer_sk#15,ss_store_sk#9,ss_ext_sales_price#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [ca_address_sk#14, ca_zip#11] - : +- *(4) Filter (isnotnull(ca_address_sk#14) && isnotnull(ca_zip#11)) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#14,ca_zip#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [s_store_sk#10, s_zip#12] - +- *(5) Filter (isnotnull(s_zip#12) && isnotnull(s_store_sk#10)) - +- *(5) FileScan parquet default.store[s_store_sk#10,s_zip#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (39) ++- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (23) + : : +- * BroadcastHashJoin Inner BuildRight (22) + : : :- * Project (17) + : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Project (4) + : : : : : +- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.date_dim (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Filter (7) + : : : : +- * ColumnarToRow (6) + : : : : +- Scan parquet default.store_sales (5) + : : : +- BroadcastExchange (15) + : : : +- * Project (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.item (11) + : : +- BroadcastExchange (21) + : : +- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.customer (18) + : +- BroadcastExchange (27) + : +- * Filter (26) + : +- * ColumnarToRow (25) + : +- Scan parquet default.customer_address (24) + +- BroadcastExchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.store (30) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 6] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1998)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 6] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] + +(7) Filter [codegen id : 1] +Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Condition : (((isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) AND isnotnull(ss_customer_sk#6)) AND isnotnull(ss_store_sk#7)) + +(8) BroadcastExchange +Input [5]: [ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 6] +Output [4]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] +Input [6]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] + +(11) Scan parquet default.item +Output [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] + +(13) Filter [codegen id : 2] +Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Condition : ((isnotnull(i_manager_id#15) AND (i_manager_id#15 = 8)) AND isnotnull(i_item_sk#10)) + +(14) Project [codegen id : 2] +Output [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] + +(15) BroadcastExchange +Input [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#10] +Join condition: None + +(17) Project [codegen id : 6] +Output [7]: [ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Input [9]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] + +(18) Scan parquet default.customer +Output [2]: [c_customer_sk#17, c_current_addr_sk#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [c_customer_sk#17, c_current_addr_sk#18] + +(20) Filter [codegen id : 3] +Input [2]: [c_customer_sk#17, c_current_addr_sk#18] +Condition : (isnotnull(c_customer_sk#17) AND isnotnull(c_current_addr_sk#18)) + +(21) BroadcastExchange +Input [2]: [c_customer_sk#17, c_current_addr_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#6] +Right keys [1]: [c_customer_sk#17] +Join condition: None + +(23) Project [codegen id : 6] +Output [7]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18] +Input [9]: [ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_customer_sk#17, c_current_addr_sk#18] + +(24) Scan parquet default.customer_address +Output [2]: [ca_address_sk#20, ca_zip#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_zip#21] + +(26) Filter [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_zip#21] +Condition : (isnotnull(ca_address_sk#20) AND isnotnull(ca_zip#21)) + +(27) BroadcastExchange +Input [2]: [ca_address_sk#20, ca_zip#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#18] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(29) Project [codegen id : 6] +Output [7]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21] +Input [9]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18, ca_address_sk#20, ca_zip#21] + +(30) Scan parquet default.store +Output [2]: [s_store_sk#23, s_zip#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [s_store_sk#23, s_zip#24] + +(32) Filter [codegen id : 5] +Input [2]: [s_store_sk#23, s_zip#24] +Condition : (isnotnull(s_zip#24) AND isnotnull(s_store_sk#23)) + +(33) BroadcastExchange +Input [2]: [s_store_sk#23, s_zip#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#23] +Join condition: NOT (substr(ca_zip#21, 1, 5) = substr(s_zip#24, 1, 5)) + +(35) Project [codegen id : 6] +Output [5]: [ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Input [9]: [ss_store_sk#7, ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21, s_store_sk#23, s_zip#24] + +(36) HashAggregate [codegen id : 6] +Input [5]: [ss_ext_sales_price#8, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#8))] +Aggregate Attributes [1]: [sum#26] +Results [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] + +(37) Exchange +Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] +Arguments: hashpartitioning(i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, 5), true, [id=#28] + +(38) HashAggregate [codegen id : 7] +Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] +Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#8))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#8))#29] +Results [5]: [i_brand_id#11 AS brand_id#30, i_brand#12 AS brand#31, i_manufact_id#13, i_manufact#14, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#8))#29,17,2) AS ext_price#32] + +(39) TakeOrderedAndProject +Input [5]: [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] +Arguments: 100, [ext_price#32 DESC NULLS LAST, brand#31 ASC NULLS FIRST, brand_id#30 ASC NULLS FIRST, i_manufact_id#13 ASC NULLS FIRST, i_manufact#14 ASC NULLS FIRST], [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt index fb69c76b7..1bbbf35e4 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q19/simplified.txt @@ -1,50 +1,58 @@ -TakeOrderedAndProject [brand,brand_id,ext_price,i_manufact,i_manufact_id] - WholeStageCodegen - HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] +TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact] + WholeStageCodegen (7) + HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] InputAdapter - Exchange [i_brand,i_brand_id,i_manufact,i_manufact_id] #1 - WholeStageCodegen - HashAggregate [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price] - BroadcastHashJoin [ca_zip,s_store_sk,s_zip,ss_store_sk] - Project [ca_zip,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] + Exchange [i_brand,i_brand_id,i_manufact_id,i_manufact] #1 + WholeStageCodegen (6) + HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + BroadcastHashJoin [ss_store_sk,s_store_sk,ca_zip,s_zip] + Project [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,ca_zip] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,i_brand,i_brand_id,i_manufact,i_manufact_id,ss_ext_sales_price,ss_store_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [i_brand,i_brand_id,i_manufact,i_manufact_id,ss_customer_sk,ss_ext_sales_price,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_store_sk] + Project [ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,c_current_addr_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (1) + Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_brand_id,i_item_sk,i_manufact,i_manufact_id] - Filter [i_item_sk,i_manager_id] - Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] [i_brand,i_brand_id,i_item_sk,i_manager_id,i_manufact,i_manufact_id] + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] + WholeStageCodegen (3) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [ca_address_sk,ca_zip] - Filter [ca_address_sk,ca_zip] - Scan parquet default.customer_address [ca_address_sk,ca_zip] [ca_address_sk,ca_zip] + WholeStageCodegen (4) + Filter [ca_address_sk,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_zip] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [s_store_sk,s_zip] - Filter [s_store_sk,s_zip] - Scan parquet default.store [s_store_sk,s_zip] [s_store_sk,s_zip] + WholeStageCodegen (5) + Filter [s_zip,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_zip] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt index 6e455f962..be37495fd 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/explain.txt @@ -1,36 +1,218 @@ == Physical Plan == -*(13) Sort [d_week_seq1#1 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(d_week_seq1#1 ASC NULLS FIRST, 5) - +- *(12) Project [d_week_seq1#1, round(CheckOverflow((promote_precision(sun_sales1#2) / promote_precision(sun_sales2#3)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#4, round(CheckOverflow((promote_precision(mon_sales1#5) / promote_precision(mon_sales2#6)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#7, round(CheckOverflow((promote_precision(tue_sales1#8) / promote_precision(tue_sales2#9)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#10, round(CheckOverflow((promote_precision(wed_sales1#11) / promote_precision(wed_sales2#12)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#13, round(CheckOverflow((promote_precision(thu_sales1#14) / promote_precision(thu_sales2#15)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#16, round(CheckOverflow((promote_precision(fri_sales1#17) / promote_precision(fri_sales2#18)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#19, round(CheckOverflow((promote_precision(sat_sales1#20) / promote_precision(sat_sales2#21)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#22] - +- *(12) BroadcastHashJoin [d_week_seq1#1], [(d_week_seq2#23 - 53)], Inner, BuildRight - :- *(12) Project [d_week_seq#24 AS d_week_seq1#1, sun_sales#25 AS sun_sales1#2, mon_sales#26 AS mon_sales1#5, tue_sales#27 AS tue_sales1#8, wed_sales#28 AS wed_sales1#11, thu_sales#29 AS thu_sales1#14, fri_sales#30 AS fri_sales1#17, sat_sales#31 AS sat_sales1#20] - : +- *(12) BroadcastHashJoin [d_week_seq#24], [d_week_seq#32], Inner, BuildRight - : :- *(12) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) - : : +- Exchange hashpartitioning(d_week_seq#24, 5) - : : +- *(4) HashAggregate(keys=[d_week_seq#24], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) - : : +- *(4) Project [sales_price#34, d_week_seq#24, d_day_name#33] - : : +- *(4) BroadcastHashJoin [sold_date_sk#35], [d_date_sk#36], Inner, BuildRight - : : :- Union - : : : :- *(1) Project [ws_sold_date_sk#37 AS sold_date_sk#35, ws_ext_sales_price#38 AS sales_price#34] - : : : : +- *(1) Filter isnotnull(ws_sold_date_sk#37) - : : : : +- *(1) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_ext_sales_price#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : +- *(2) Project [cs_sold_date_sk#39 AS sold_date_sk#40, cs_ext_sales_price#41 AS sales_price#42] - : : : +- *(2) Filter isnotnull(cs_sold_date_sk#39) - : : : +- *(2) FileScan parquet default.catalog_sales[cs_sold_date_sk#39,cs_ext_sales_price#41] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#36, d_week_seq#24, d_day_name#33] - : : +- *(3) Filter (isnotnull(d_date_sk#36) && isnotnull(d_week_seq#24)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#36,d_week_seq#24,d_day_name#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(5) Project [d_week_seq#32] - : +- *(5) Filter ((isnotnull(d_year#43) && (d_year#43 = 2001)) && isnotnull(d_week_seq#32)) - : +- *(5) FileScan parquet default.date_dim[d_week_seq#32,d_year#43] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint))) - +- *(11) Project [d_week_seq#24 AS d_week_seq2#23, sun_sales#25 AS sun_sales2#3, mon_sales#26 AS mon_sales2#6, tue_sales#27 AS tue_sales2#9, wed_sales#28 AS wed_sales2#12, thu_sales#29 AS thu_sales2#15, fri_sales#30 AS fri_sales2#18, sat_sales#31 AS sat_sales2#21] - +- *(11) BroadcastHashJoin [d_week_seq#24], [d_week_seq#44], Inner, BuildRight - :- *(11) HashAggregate(keys=[d_week_seq#24], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#33 = Sunday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Monday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Tuesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Wednesday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Thursday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Friday) THEN sales_price#34 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#33 = Saturday) THEN sales_price#34 ELSE null END))]) - : +- ReusedExchange [d_week_seq#24, sum#45, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51], Exchange hashpartitioning(d_week_seq#24, 5) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(10) Project [d_week_seq#44] - +- *(10) Filter ((isnotnull(d_year#52) && (d_year#52 = 2002)) && isnotnull(d_week_seq#44)) - +- *(10) FileScan parquet default.date_dim[d_week_seq#44,d_year#52] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)], ReadSchema: struct \ No newline at end of file +* Sort (39) ++- Exchange (38) + +- * Project (37) + +- * BroadcastHashJoin Inner BuildRight (36) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * HashAggregate (18) + : : +- Exchange (17) + : : +- * HashAggregate (16) + : : +- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- Union (9) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet default.catalog_sales (5) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.date_dim (10) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.date_dim (19) + +- BroadcastExchange (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * HashAggregate (27) + : +- ReusedExchange (26) + +- BroadcastExchange (32) + +- * Project (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet default.date_dim (28) + + +(1) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] + +(3) Filter [codegen id : 1] +Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] +Condition : isnotnull(ws_sold_date_sk#1) + +(4) Project [codegen id : 1] +Output [2]: [ws_sold_date_sk#1 AS sold_date_sk#3, ws_ext_sales_price#2 AS sales_price#4] +Input [2]: [ws_sold_date_sk#1, ws_ext_sales_price#2] + +(5) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] + +(7) Filter [codegen id : 2] +Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] +Condition : isnotnull(cs_sold_date_sk#5) + +(8) Project [codegen id : 2] +Output [2]: [cs_sold_date_sk#5 AS sold_date_sk#7, cs_ext_sales_price#6 AS sales_price#8] +Input [2]: [cs_sold_date_sk#5, cs_ext_sales_price#6] + +(9) Union + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] + +(12) Filter [codegen id : 3] +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [sold_date_sk#3] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_name#11] + +(16) HashAggregate [codegen id : 4] +Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] +Keys [1]: [d_week_seq#10] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))] +Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] + +(17) Exchange +Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Arguments: hashpartitioning(d_week_seq#10, 5), true, [id=#27] + +(18) HashAggregate [codegen id : 12] +Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))#34] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))#34,17,2) AS sat_sales#41] + +(19) Scan parquet default.date_dim +Output [2]: [d_week_seq#42, d_year#43] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 5] +Input [2]: [d_week_seq#42, d_year#43] + +(21) Filter [codegen id : 5] +Input [2]: [d_week_seq#42, d_year#43] +Condition : ((isnotnull(d_year#43) AND (d_year#43 = 2001)) AND isnotnull(d_week_seq#42)) + +(22) Project [codegen id : 5] +Output [1]: [d_week_seq#42] +Input [2]: [d_week_seq#42, d_year#43] + +(23) BroadcastExchange +Input [1]: [d_week_seq#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] + +(24) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [d_week_seq#10] +Right keys [1]: [d_week_seq#42] +Join condition: None + +(25) Project [codegen id : 12] +Output [8]: [d_week_seq#10 AS d_week_seq1#45, sun_sales#35 AS sun_sales1#46, mon_sales#36 AS mon_sales1#47, tue_sales#37 AS tue_sales1#48, wed_sales#38 AS wed_sales1#49, thu_sales#39 AS thu_sales1#50, fri_sales#40 AS fri_sales1#51, sat_sales#41 AS sat_sales1#52] +Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42] + +(26) ReusedExchange [Reuses operator id: 17] +Output [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] + +(27) HashAggregate [codegen id : 11] +Input [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] +Keys [1]: [d_week_seq#10] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END))#60, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))#66] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 ELSE null END))#60,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 ELSE null END))#61,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 ELSE null END))#62,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 ELSE null END))#64,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 ELSE null END))#65,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 ELSE null END))#66,17,2) AS sat_sales#41] + +(28) Scan parquet default.date_dim +Output [2]: [d_week_seq#67, d_year#68] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 10] +Input [2]: [d_week_seq#67, d_year#68] + +(30) Filter [codegen id : 10] +Input [2]: [d_week_seq#67, d_year#68] +Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2002)) AND isnotnull(d_week_seq#67)) + +(31) Project [codegen id : 10] +Output [1]: [d_week_seq#67] +Input [2]: [d_week_seq#67, d_year#68] + +(32) BroadcastExchange +Input [1]: [d_week_seq#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#69] + +(33) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [d_week_seq#10] +Right keys [1]: [d_week_seq#67] +Join condition: None + +(34) Project [codegen id : 11] +Output [8]: [d_week_seq#10 AS d_week_seq2#70, sun_sales#35 AS sun_sales2#71, mon_sales#36 AS mon_sales2#72, tue_sales#37 AS tue_sales2#73, wed_sales#38 AS wed_sales2#74, thu_sales#39 AS thu_sales2#75, fri_sales#40 AS fri_sales2#76, sat_sales#41 AS sat_sales2#77] +Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#67] + +(35) BroadcastExchange +Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] +Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [id=#78] + +(36) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [d_week_seq1#45] +Right keys [1]: [(d_week_seq2#70 - 53)] +Join condition: None + +(37) Project [codegen id : 12] +Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#71)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#79, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#72)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#80, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#73)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#81, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#74)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#82, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#75)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#83, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#76)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#84, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#77)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#85] +Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77] + +(38) Exchange +Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85] +Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), true, [id=#86] + +(39) Sort [codegen id : 13] +Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85] +Arguments: [d_week_seq1#45 ASC NULLS FIRST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt index 7e604b292..f8028aa54 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q2/simplified.txt @@ -1,52 +1,61 @@ -WholeStageCodegen +WholeStageCodegen (13) Sort [d_week_seq1] InputAdapter Exchange [d_week_seq1] #1 - WholeStageCodegen - Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] + WholeStageCodegen (12) + Project [d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] BroadcastHashJoin [d_week_seq1,d_week_seq2] - Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] InputAdapter Exchange [d_week_seq] #2 - WholeStageCodegen - HashAggregate [d_day_name,d_week_seq,sales_price,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_day_name,d_week_seq,sales_price] - BroadcastHashJoin [d_date_sk,sold_date_sk] + WholeStageCodegen (4) + HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [sold_date_sk,d_date_sk] InputAdapter Union - WholeStageCodegen - Project [ws_ext_sales_price,ws_sold_date_sk] + WholeStageCodegen (1) + Project [ws_sold_date_sk,ws_ext_sales_price] Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_sold_date_sk] [ws_ext_sales_price,ws_sold_date_sk] - WholeStageCodegen - Project [cs_ext_sales_price,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price] + WholeStageCodegen (2) + Project [cs_sold_date_sk,cs_ext_sales_price] Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk] [cs_ext_sales_price,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_day_name,d_week_seq] - Filter [d_date_sk,d_week_seq] - Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] [d_date_sk,d_day_name,d_week_seq] + WholeStageCodegen (3) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (5) Project [d_week_seq] - Filter [d_week_seq,d_year] - Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] + Filter [d_year,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_week_seq,d_year] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [d_week_seq,fri_sales,mon_sales,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + WholeStageCodegen (11) + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] InputAdapter - ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 + ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2 InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (10) Project [d_week_seq] - Filter [d_week_seq,d_year] - Scan parquet default.date_dim [d_week_seq,d_year] [d_week_seq,d_year] + Filter [d_year,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_week_seq,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt index c1f528c67..4234fba2b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/explain.txt @@ -1,24 +1,137 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 ASC NULLS FIRST,i_item_id#3 ASC NULLS FIRST,i_item_desc#4 ASC NULLS FIRST,revenueratio#5 ASC NULLS FIRST], output=[i_item_desc#4,i_category#1,i_class#2,i_current_price#6,itemrevenue#7,revenueratio#5]) -+- *(6) Project [i_item_desc#4, i_category#1, i_class#2, i_current_price#6, itemrevenue#7, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#5, i_item_id#3] - +- Window [sum(_w1#10) windowspecdefinition(i_class#2, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#2] - +- *(5) Sort [i_class#2 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_class#2, 5) - +- *(4) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[sum(UnscaledValue(cs_ext_sales_price#11))]) - +- Exchange hashpartitioning(i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6, 5) - +- *(3) HashAggregate(keys=[i_item_id#3, i_item_desc#4, i_category#1, i_class#2, i_current_price#6], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#11))]) - +- *(3) Project [cs_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] - +- *(3) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - :- *(3) Project [cs_sold_date_sk#12, cs_ext_sales_price#11, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] - : +- *(3) BroadcastHashJoin [cs_item_sk#14], [i_item_sk#15], Inner, BuildRight - : :- *(3) Project [cs_sold_date_sk#12, cs_item_sk#14, cs_ext_sales_price#11] - : : +- *(3) Filter (isnotnull(cs_item_sk#14) && isnotnull(cs_sold_date_sk#12)) - : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_item_sk#14,cs_ext_sales_price#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [i_item_sk#15, i_item_id#3, i_item_desc#4, i_current_price#6, i_class#2, i_category#1] - : +- *(1) Filter (i_category#1 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) - : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#3,i_item_desc#4,i_current_price#6,i_class#2,i_category#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) - +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (24) ++- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.catalog_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet default.date_dim (10) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [cs_sold_date_sk#1, cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_date#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] +Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [cs_sold_date_sk#1, cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] + +(17) HashAggregate [codegen id : 3] +Input [6]: [cs_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#3))#17] +Results [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#3))#17,17,2) AS _w1#20, i_item_id#5] + +(20) Exchange +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] + +(21) Sort [codegen id : 5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#5] +Input [9]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5, _we0#22] + +(24) TakeOrderedAndProject +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] +Arguments: 100, [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt index a6a8b7457..6259c1d53 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q20/simplified.txt @@ -1,34 +1,38 @@ -TakeOrderedAndProject [i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue,revenueratio] - WholeStageCodegen - Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] +TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_current_price,itemrevenue] + WholeStageCodegen (6) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] InputAdapter Window [_w1,i_class] - WholeStageCodegen + WholeStageCodegen (5) Sort [i_class] InputAdapter Exchange [i_class] #1 - WholeStageCodegen - HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(cs_ext_sales_price))] + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,_w1,sum] InputAdapter - Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #2 - WholeStageCodegen - HashAggregate [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum] [sum,sum] - Project [cs_ext_sales_price,i_category,i_class,i_current_price,i_item_desc,i_item_id] + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,cs_ext_sales_price] [sum,sum] + Project [cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_sales_price,cs_sold_date_sk,i_category,i_class,i_current_price,i_item_desc,i_item_id] + Project [cs_sold_date_sk,cs_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] - Filter [i_category,i_item_sk] - Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt index 86adb689b..788d1affd 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/explain.txt @@ -1,27 +1,155 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST,i_item_id#2 ASC NULLS FIRST], output=[w_warehouse_name#1,i_item_id#2,inv_before#3,inv_after#4]) -+- *(5) Filter ((CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END >= 0.666667) && (CASE WHEN (inv_before#3 > 0) THEN (cast(inv_after#4 as double) / cast(inv_before#3 as double)) ELSE null END <= 1.5)) - +- *(5) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(w_warehouse_name#1, i_item_id#2, 5) - +- *(4) HashAggregate(keys=[w_warehouse_name#1, i_item_id#2], functions=[partial_sum(cast(CASE WHEN (d_date#5 < 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#5 >= 11027) THEN inv_quantity_on_hand#6 ELSE 0 END as bigint))]) - +- *(4) Project [inv_quantity_on_hand#6, w_warehouse_name#1, i_item_id#2, d_date#5] - +- *(4) BroadcastHashJoin [inv_date_sk#7], [d_date_sk#8], Inner, BuildRight - :- *(4) Project [inv_date_sk#7, inv_quantity_on_hand#6, w_warehouse_name#1, i_item_id#2] - : +- *(4) BroadcastHashJoin [inv_item_sk#9], [i_item_sk#10], Inner, BuildRight - : :- *(4) Project [inv_date_sk#7, inv_item_sk#9, inv_quantity_on_hand#6, w_warehouse_name#1] - : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#11], [w_warehouse_sk#12], Inner, BuildRight - : : :- *(4) Project [inv_date_sk#7, inv_item_sk#9, inv_warehouse_sk#11, inv_quantity_on_hand#6] - : : : +- *(4) Filter ((isnotnull(inv_warehouse_sk#11) && isnotnull(inv_item_sk#9)) && isnotnull(inv_date_sk#7)) - : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#7,inv_item_sk#9,inv_warehouse_sk#11,inv_quantity_on_hand#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [w_warehouse_sk#12, w_warehouse_name#1] - : : +- *(1) Filter isnotnull(w_warehouse_sk#12) - : : +- *(1) FileScan parquet default.warehouse[w_warehouse_sk#12,w_warehouse_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [i_item_sk#10, i_item_id#2] - : +- *(2) Filter (((isnotnull(i_current_price#13) && (i_current_price#13 >= 0.99)) && (i_current_price#13 <= 1.49)) && isnotnull(i_item_sk#10)) - : +- *(2) FileScan parquet default.item[i_item_sk#10,i_item_id#2,i_current_price#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [d_date_sk#8, d_date#5] - +- *(3) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#8)) - +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_date#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (27) ++- * Filter (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.inventory (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.warehouse (4) + : +- BroadcastExchange (14) + : +- * Project (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.item (10) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.date_dim (17) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/inventory] +PushedFilters: [IsNotNull(inv_warehouse_sk), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_warehouse_sk#3) AND isnotnull(inv_item_sk#2)) AND isnotnull(inv_date_sk#1)) + +(4) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Condition : isnotnull(w_warehouse_sk#5) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#5, w_warehouse_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#5] +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#4, w_warehouse_name#6] +Input [6]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, w_warehouse_sk#5, w_warehouse_name#6] + +(10) Scan parquet default.item +Output [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] + +(12) Filter [codegen id : 2] +Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] +Condition : (((isnotnull(i_current_price#10) AND (i_current_price#10 >= 0.99)) AND (i_current_price#10 <= 1.49)) AND isnotnull(i_item_sk#8)) + +(13) Project [codegen id : 2] +Output [2]: [i_item_sk#8, i_item_id#9] +Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] + +(14) BroadcastExchange +Input [2]: [i_item_sk#8, i_item_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(16) Project [codegen id : 4] +Output [4]: [inv_date_sk#1, inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9] +Input [6]: [inv_date_sk#1, inv_item_sk#2, inv_quantity_on_hand#4, w_warehouse_name#6, i_item_sk#8, i_item_id#9] + +(17) Scan parquet default.date_dim +Output [2]: [d_date_sk#12, d_date#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#12, d_date#13] + +(19) Filter [codegen id : 3] +Input [2]: [d_date_sk#12, d_date#13] +Condition : (((isnotnull(d_date#13) AND (d_date#13 >= 10997)) AND (d_date#13 <= 11057)) AND isnotnull(d_date_sk#12)) + +(20) BroadcastExchange +Input [2]: [d_date_sk#12, d_date#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(22) Project [codegen id : 4] +Output [4]: [inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9, d_date#13] +Input [6]: [inv_date_sk#1, inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9, d_date_sk#12, d_date#13] + +(23) HashAggregate [codegen id : 4] +Input [4]: [inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9, d_date#13] +Keys [2]: [w_warehouse_name#6, i_item_id#9] +Functions [2]: [partial_sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] +Aggregate Attributes [2]: [sum#15, sum#16] +Results [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] + +(24) Exchange +Input [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] +Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#9, 5), true, [id=#19] + +(25) HashAggregate [codegen id : 5] +Input [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18] +Keys [2]: [w_warehouse_name#6, i_item_id#9] +Functions [2]: [sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))] +Aggregate Attributes [2]: [sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20, sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21] +Results [4]: [w_warehouse_name#6, i_item_id#9, sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20 AS inv_before#22, sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21 AS inv_after#23] + +(26) Filter [codegen id : 5] +Input [4]: [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23] +Condition : ((CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END >= 0.666667) AND (CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END <= 1.5)) + +(27) TakeOrderedAndProject +Input [4]: [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23] +Arguments: 100, [w_warehouse_name#6 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST], [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt index 472388330..9b5483bd7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q21/simplified.txt @@ -1,35 +1,40 @@ -TakeOrderedAndProject [i_item_id,inv_after,inv_before,w_warehouse_name] - WholeStageCodegen - Filter [inv_after,inv_before] - HashAggregate [i_item_id,sum,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),w_warehouse_name] [inv_after,inv_before,sum,sum,sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint))] +TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after] + WholeStageCodegen (5) + Filter [inv_before,inv_after] + HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),inv_before,inv_after,sum,sum] InputAdapter - Exchange [i_item_id,w_warehouse_name] #1 - WholeStageCodegen - HashAggregate [d_date,i_item_id,inv_quantity_on_hand,sum,sum,sum,sum,w_warehouse_name] [sum,sum,sum,sum] - Project [d_date,i_item_id,inv_quantity_on_hand,w_warehouse_name] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_item_id,inv_date_sk,inv_quantity_on_hand,w_warehouse_name] - BroadcastHashJoin [i_item_sk,inv_item_sk] + Exchange [w_warehouse_name,i_item_id] #1 + WholeStageCodegen (4) + HashAggregate [w_warehouse_name,i_item_id,d_date,inv_quantity_on_hand] [sum,sum,sum,sum] + Project [inv_quantity_on_hand,w_warehouse_name,i_item_id,d_date] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_date_sk,inv_quantity_on_hand,w_warehouse_name,i_item_id] + BroadcastHashJoin [inv_item_sk,i_item_sk] Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Filter [inv_warehouse_sk,inv_item_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_item_id,i_item_sk] + WholeStageCodegen (2) + Project [i_item_sk,i_item_id] Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_current_price,i_item_id,i_item_sk] [i_current_price,i_item_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_current_price] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + WholeStageCodegen (3) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt index 1ec1ce194..6aae0b0c8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/explain.txt @@ -1,27 +1,155 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[qoh#1 ASC NULLS FIRST,i_product_name#2 ASC NULLS FIRST,i_brand#3 ASC NULLS FIRST,i_class#4 ASC NULLS FIRST,i_category#5 ASC NULLS FIRST], output=[i_product_name#2,i_brand#3,i_class#4,i_category#5,qoh#1]) -+- *(5) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[avg(cast(inv_quantity_on_hand#7 as bigint))]) - +- Exchange hashpartitioning(i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6, 5) - +- *(4) HashAggregate(keys=[i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6], functions=[partial_avg(cast(inv_quantity_on_hand#7 as bigint))]) - +- *(4) Expand [List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, i_category#11, 0), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, i_class#10, null, 1), List(inv_quantity_on_hand#7, i_product_name#8, i_brand#9, null, null, 3), List(inv_quantity_on_hand#7, i_product_name#8, null, null, null, 7), List(inv_quantity_on_hand#7, null, null, null, null, 15)], [inv_quantity_on_hand#7, i_product_name#2, i_brand#3, i_class#4, i_category#5, spark_grouping_id#6] - +- *(4) Project [inv_quantity_on_hand#7, i_product_name#12 AS i_product_name#8, i_brand#13 AS i_brand#9, i_class#14 AS i_class#10, i_category#15 AS i_category#11] - +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#17], Inner, BuildRight - :- *(4) Project [inv_warehouse_sk#16, inv_quantity_on_hand#7, i_brand#13, i_class#14, i_category#15, i_product_name#12] - : +- *(4) BroadcastHashJoin [inv_item_sk#18], [i_item_sk#19], Inner, BuildRight - : :- *(4) Project [inv_item_sk#18, inv_warehouse_sk#16, inv_quantity_on_hand#7] - : : +- *(4) BroadcastHashJoin [inv_date_sk#20], [d_date_sk#21], Inner, BuildRight - : : :- *(4) Project [inv_date_sk#20, inv_item_sk#18, inv_warehouse_sk#16, inv_quantity_on_hand#7] - : : : +- *(4) Filter ((isnotnull(inv_date_sk#20) && isnotnull(inv_item_sk#18)) && isnotnull(inv_warehouse_sk#16)) - : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#20,inv_item_sk#18,inv_warehouse_sk#16,inv_quantity_on_hand#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [d_date_sk#21] - : : +- *(1) Filter (((isnotnull(d_month_seq#22) && (d_month_seq#22 >= 1200)) && (d_month_seq#22 <= 1211)) && isnotnull(d_date_sk#21)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_month_seq#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [i_item_sk#19, i_brand#13, i_class#14, i_category#15, i_product_name#12] - : +- *(2) Filter isnotnull(i_item_sk#19) - : +- *(2) FileScan parquet default.item[i_item_sk#19,i_brand#13,i_class#14,i_category#15,i_product_name#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [w_warehouse_sk#17] - +- *(3) Filter isnotnull(w_warehouse_sk#17) - +- *(3) FileScan parquet default.warehouse[w_warehouse_sk#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (27) ++- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Expand (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.inventory (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.item (11) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.warehouse (17) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/inventory] +PushedFilters: [IsNotNull(inv_date_sk), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_date_sk#1) AND isnotnull(inv_item_sk#2)) AND isnotnull(inv_warehouse_sk#3)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_month_seq#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 4] +Output [3]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, d_date_sk#5] + +(11) Scan parquet default.item +Output [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] + +(13) Filter [codegen id : 2] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Condition : isnotnull(i_item_sk#8) + +(14) BroadcastExchange +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Input [8]: [inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] + +(17) Scan parquet default.warehouse +Output [1]: [w_warehouse_sk#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [1]: [w_warehouse_sk#14] + +(19) Filter [codegen id : 3] +Input [1]: [w_warehouse_sk#14] +Condition : isnotnull(w_warehouse_sk#14) + +(20) BroadcastExchange +Input [1]: [w_warehouse_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#14] +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, i_category#11] +Input [7]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_brand#9, i_class#10, i_category#11, i_product_name#12, w_warehouse_sk#14] + +(23) Expand [codegen id : 4] +Input [5]: [inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, i_category#11] +Arguments: [List(inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, i_category#11, 0), List(inv_quantity_on_hand#4, i_product_name#12, i_brand#9, i_class#10, null, 1), List(inv_quantity_on_hand#4, i_product_name#12, i_brand#9, null, null, 3), List(inv_quantity_on_hand#4, i_product_name#12, null, null, null, 7), List(inv_quantity_on_hand#4, null, null, null, null, 15)], [inv_quantity_on_hand#4, i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] + +(24) HashAggregate [codegen id : 4] +Input [6]: [inv_quantity_on_hand#4, i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] +Keys [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] +Functions [1]: [partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [sum#21, count#22] +Results [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] + +(25) Exchange +Input [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] +Arguments: hashpartitioning(i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, 5), true, [id=#25] + +(26) HashAggregate [codegen id : 5] +Input [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] +Keys [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] +Functions [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [1]: [avg(cast(inv_quantity_on_hand#4 as bigint))#26] +Results [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, avg(cast(inv_quantity_on_hand#4 as bigint))#26 AS qoh#27] + +(27) TakeOrderedAndProject +Input [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, qoh#27] +Arguments: 100, [qoh#27 ASC NULLS FIRST, i_product_name#16 ASC NULLS FIRST, i_brand#17 ASC NULLS FIRST, i_class#18 ASC NULLS FIRST, i_category#19 ASC NULLS FIRST], [i_product_name#16, i_brand#17, i_class#18, i_category#19, qoh#27] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt index d10f9d4ef..233babdf3 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q22/simplified.txt @@ -1,35 +1,40 @@ -TakeOrderedAndProject [i_brand,i_category,i_class,i_product_name,qoh] - WholeStageCodegen - HashAggregate [avg(cast(inv_quantity_on_hand as bigint)),count,i_brand,i_category,i_class,i_product_name,spark_grouping_id,sum] [avg(cast(inv_quantity_on_hand as bigint)),count,qoh,sum] +TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category] + WholeStageCodegen (5) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,sum,count] [avg(cast(inv_quantity_on_hand as bigint)),qoh,sum,count] InputAdapter - Exchange [i_brand,i_category,i_class,i_product_name,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [count,count,i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,spark_grouping_id,sum,sum] [count,count,sum,sum] - Expand [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] - Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand] + Exchange [i_product_name,i_brand,i_class,i_category,spark_grouping_id] #1 + WholeStageCodegen (4) + HashAggregate [i_product_name,i_brand,i_class,i_category,spark_grouping_id,inv_quantity_on_hand] [sum,count,sum,count] + Expand [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] + Project [inv_quantity_on_hand,i_product_name,i_brand,i_class,i_category] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_brand,i_category,i_class,i_product_name,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Project [inv_warehouse_sk,inv_quantity_on_hand,i_brand,i_class,i_category,i_product_name] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Project [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_category,i_class,i_item_sk,i_product_name] - Filter [i_item_sk] - Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] [i_brand,i_category,i_class,i_item_sk,i_product_name] + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_sk] [w_warehouse_sk] + WholeStageCodegen (3) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt index b415920d5..15ae5bfe2 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/explain.txt @@ -1,89 +1,537 @@ == Physical Plan == -CollectLimit 100 -+- *(20) HashAggregate(keys=[], functions=[sum(sales#1)]) - +- Exchange SinglePartition - +- *(19) HashAggregate(keys=[], functions=[partial_sum(sales#1)]) - +- Union - :- *(9) Project [CheckOverflow((promote_precision(cast(cast(cs_quantity#2 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#3 as decimal(12,2)))), DecimalType(18,2)) AS sales#1] - : +- *(9) BroadcastHashJoin [cs_sold_date_sk#4], [d_date_sk#5], Inner, BuildRight - : :- *(9) Project [cs_sold_date_sk#4, cs_quantity#2, cs_list_price#3] - : : +- *(9) BroadcastHashJoin [cs_bill_customer_sk#6], [c_customer_sk#7], LeftSemi, BuildRight - : : :- *(9) Project [cs_sold_date_sk#4, cs_bill_customer_sk#6, cs_quantity#2, cs_list_price#3] - : : : +- *(9) BroadcastHashJoin [cs_item_sk#8], [item_sk#9], LeftSemi, BuildRight - : : : :- *(9) Project [cs_sold_date_sk#4, cs_bill_customer_sk#6, cs_item_sk#8, cs_quantity#2, cs_list_price#3] - : : : : +- *(9) Filter isnotnull(cs_sold_date_sk#4) - : : : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#4,cs_bill_customer_sk#6,cs_item_sk#8,cs_quantity#2,cs_list_price#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct 4) - : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[count(1)]) - : : : +- Exchange hashpartitioning(substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14, 5) - : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#11, 1, 30) AS substring(i_item_desc#11, 1, 30)#12, i_item_sk#13, d_date#14], functions=[partial_count(1)]) - : : : +- *(3) Project [d_date#14, i_item_sk#13, i_item_desc#11] - : : : +- *(3) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#13], Inner, BuildRight - : : : :- *(3) Project [ss_item_sk#15, d_date#14] - : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight - : : : : :- *(3) Project [ss_sold_date_sk#16, ss_item_sk#15] - : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#16) && isnotnull(ss_item_sk#15)) - : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_item_sk#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [d_date_sk#5, d_date#14] - : : : : +- *(1) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#5,d_date#14,d_year#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [i_item_sk#13, i_item_desc#11] - : : : +- *(2) Filter isnotnull(i_item_sk#13) - : : : +- *(2) FileScan parquet default.item[i_item_sk#13,i_item_desc#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(7) Project [c_customer_sk#7] - : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))#20 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3052 as decimal(32,6)))), DecimalType(38,8)))) - : : : +- Subquery subquery3052 - : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#21)]) - : : : +- Exchange SinglePartition - : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#21)]) - : : : +- *(4) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) - : : : +- Exchange hashpartitioning(c_customer_sk#7, 5) - : : : +- *(3) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) - : : : +- *(3) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] - : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#5], Inner, BuildRight - : : : :- *(3) Project [ss_sold_date_sk#16, ss_quantity#18, ss_sales_price#19, c_customer_sk#7] - : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight - : : : : :- *(3) Project [ss_sold_date_sk#16, ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] - : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#22) && isnotnull(ss_sold_date_sk#16)) - : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [c_customer_sk#7] - : : : : +- *(1) Filter isnotnull(c_customer_sk#7) - : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [d_date_sk#5] - : : : +- *(2) Filter (d_year#17 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#5)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#5,d_year#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- *(7) HashAggregate(keys=[c_customer_sk#7], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- Exchange hashpartitioning(c_customer_sk#7, 5) - : : +- *(6) HashAggregate(keys=[c_customer_sk#7], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#18 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#19 as decimal(12,2)))), DecimalType(18,2)))]) - : : +- *(6) Project [ss_quantity#18, ss_sales_price#19, c_customer_sk#7] - : : +- *(6) BroadcastHashJoin [ss_customer_sk#22], [c_customer_sk#7], Inner, BuildRight - : : :- *(6) Project [ss_customer_sk#22, ss_quantity#18, ss_sales_price#19] - : : : +- *(6) Filter isnotnull(ss_customer_sk#22) - : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#22,ss_quantity#18,ss_sales_price#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [c_customer_sk#7] - : : +- *(5) Filter isnotnull(c_customer_sk#7) - : : +- *(5) FileScan parquet default.customer[c_customer_sk#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(8) Project [d_date_sk#5] - : +- *(8) Filter ((((isnotnull(d_year#17) && isnotnull(d_moy#23)) && (d_year#17 = 2000)) && (d_moy#23 = 2)) && isnotnull(d_date_sk#5)) - : +- *(8) FileScan parquet default.date_dim[d_date_sk#5,d_year#17,d_moy#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct - +- *(18) Project [CheckOverflow((promote_precision(cast(cast(ws_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#25 as decimal(12,2)))), DecimalType(18,2)) AS sales#26] - +- *(18) BroadcastHashJoin [ws_sold_date_sk#27], [d_date_sk#5], Inner, BuildRight - :- *(18) Project [ws_sold_date_sk#27, ws_quantity#24, ws_list_price#25] - : +- *(18) BroadcastHashJoin [ws_bill_customer_sk#28], [c_customer_sk#7], LeftSemi, BuildRight - : :- *(18) Project [ws_sold_date_sk#27, ws_bill_customer_sk#28, ws_quantity#24, ws_list_price#25] - : : +- *(18) BroadcastHashJoin [ws_item_sk#29], [item_sk#9], LeftSemi, BuildRight - : : :- *(18) Project [ws_sold_date_sk#27, ws_item_sk#29, ws_bill_customer_sk#28, ws_quantity#24, ws_list_price#25] - : : : +- *(18) Filter isnotnull(ws_sold_date_sk#27) - : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#27,ws_item_sk#29,ws_bill_customer_sk#28,ws_quantity#24,ws_list_price#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(3) Filter [codegen id : 9] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Condition : isnotnull(cs_sold_date_sk#1) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#6, ss_item_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 3] +Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] + +(6) Filter [codegen id : 3] +Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] +Condition : (isnotnull(ss_sold_date_sk#6) AND isnotnull(ss_item_sk#7)) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(10) Project [codegen id : 1] +Output [2]: [d_date_sk#8, d_date#9] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(11) BroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(12) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(13) Project [codegen id : 3] +Output [2]: [ss_item_sk#7, d_date#9] +Input [4]: [ss_sold_date_sk#6, ss_item_sk#7, d_date_sk#8, d_date#9] + +(14) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_desc#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [i_item_sk#12, i_item_desc#13] + +(16) Filter [codegen id : 2] +Input [2]: [i_item_sk#12, i_item_desc#13] +Condition : isnotnull(i_item_sk#12) + +(17) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_desc#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(18) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#7] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(19) Project [codegen id : 3] +Output [3]: [d_date#9, i_item_sk#12, i_item_desc#13] +Input [4]: [ss_item_sk#7, d_date#9, i_item_sk#12, i_item_desc#13] + +(20) HashAggregate [codegen id : 3] +Input [3]: [d_date#9, i_item_sk#12, i_item_desc#13] +Keys [3]: [substr(i_item_desc#13, 1, 30) AS substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#16] +Results [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] + +(21) Exchange +Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] +Arguments: hashpartitioning(substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, 5), true, [id=#18] + +(22) HashAggregate [codegen id : 4] +Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] +Keys [3]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#19] +Results [2]: [i_item_sk#12 AS item_sk#20, count(1)#19 AS count(1)#21] + +(23) Filter [codegen id : 4] +Input [2]: [item_sk#20, count(1)#21] +Condition : (count(1)#21 > 4) + +(24) Project [codegen id : 4] +Output [1]: [item_sk#20] +Input [2]: [item_sk#20, count(1)#21] + +(25) BroadcastExchange +Input [1]: [item_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(26) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [item_sk#20] +Join condition: None + +(27) Project [codegen id : 9] +Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(28) Scan parquet default.store_sales +Output [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 6] +Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] + +(30) Filter [codegen id : 6] +Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Condition : isnotnull(ss_customer_sk#23) + +(31) Scan parquet default.customer +Output [1]: [c_customer_sk#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 5] +Input [1]: [c_customer_sk#26] + +(33) Filter [codegen id : 5] +Input [1]: [c_customer_sk#26] +Condition : isnotnull(c_customer_sk#26) + +(34) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#23] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [4]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] + +(37) HashAggregate [codegen id : 6] +Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Keys [1]: [c_customer_sk#26] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#28, isEmpty#29] +Results [3]: [c_customer_sk#26, sum#30, isEmpty#31] + +(38) Exchange +Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] +Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#32] + +(39) HashAggregate [codegen id : 7] +Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(40) Filter [codegen id : 7] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(41) Project [codegen id : 7] +Output [1]: [c_customer_sk#26] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(42) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#37] + +(43) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(44) Project [codegen id : 9] +Output [3]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5] +Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] + +(45) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_year#10, d_moy#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 8] +Input [3]: [d_date_sk#8, d_year#10, d_moy#38] + +(47) Filter [codegen id : 8] +Input [3]: [d_date_sk#8, d_year#10, d_moy#38] +Condition : ((((isnotnull(d_year#10) AND isnotnull(d_moy#38)) AND (d_year#10 = 2000)) AND (d_moy#38 = 2)) AND isnotnull(d_date_sk#8)) + +(48) Project [codegen id : 8] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#10, d_moy#38] + +(49) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] + +(50) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(51) Project [codegen id : 9] +Output [1]: [CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true) AS sales#40] +Input [4]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, d_date_sk#8] + +(52) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 18] +Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] + +(54) Filter [codegen id : 18] +Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] +Condition : isnotnull(ws_sold_date_sk#41) + +(55) ReusedExchange [Reuses operator id: 25] +Output [1]: [item_sk#20] + +(56) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_item_sk#42] +Right keys [1]: [item_sk#20] +Join condition: None + +(57) Project [codegen id : 18] +Output [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] +Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] + +(58) ReusedExchange [Reuses operator id: 38] +Output [3]: [c_customer_sk#26, sum#46, isEmpty#47] + +(59) HashAggregate [codegen id : 16] +Input [3]: [c_customer_sk#26, sum#46, isEmpty#47] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#48] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#48 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49] + +(60) Filter [codegen id : 16] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(61) Project [codegen id : 16] +Output [1]: [c_customer_sk#26] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#49] + +(62) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] + +(63) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_bill_customer_sk#43] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(64) Project [codegen id : 18] +Output [3]: [ws_sold_date_sk#41, ws_quantity#44, ws_list_price#45] +Input [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45] + +(65) ReusedExchange [Reuses operator id: 49] +Output [1]: [d_date_sk#8] + +(66) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#41] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(67) Project [codegen id : 18] +Output [1]: [CheckOverflow((promote_precision(cast(cast(ws_quantity#44 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#45 as decimal(12,2)))), DecimalType(18,2), true) AS sales#51] +Input [4]: [ws_sold_date_sk#41, ws_quantity#44, ws_list_price#45, d_date_sk#8] + +(68) Union + +(69) HashAggregate [codegen id : 19] +Input [1]: [sales#40] +Keys: [] +Functions [1]: [partial_sum(sales#40)] +Aggregate Attributes [2]: [sum#52, isEmpty#53] +Results [2]: [sum#54, isEmpty#55] + +(70) Exchange +Input [2]: [sum#54, isEmpty#55] +Arguments: SinglePartition, true, [id=#56] + +(71) HashAggregate [codegen id : 20] +Input [2]: [sum#54, isEmpty#55] +Keys: [] +Functions [1]: [sum(sales#40)] +Aggregate Attributes [1]: [sum(sales#40)#57] +Results [1]: [sum(sales#40)#57 AS sum(sales)#58] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#35, [id=#36] +* HashAggregate (93) ++- Exchange (92) + +- * HashAggregate (91) + +- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- * Project (87) + +- * BroadcastHashJoin Inner BuildRight (86) + :- * Project (80) + : +- * BroadcastHashJoin Inner BuildRight (79) + : :- * Filter (74) + : : +- * ColumnarToRow (73) + : : +- Scan parquet default.store_sales (72) + : +- BroadcastExchange (78) + : +- * Filter (77) + : +- * ColumnarToRow (76) + : +- Scan parquet default.customer (75) + +- BroadcastExchange (85) + +- * Project (84) + +- * Filter (83) + +- * ColumnarToRow (82) + +- Scan parquet default.date_dim (81) + + +(72) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] + +(74) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Condition : (isnotnull(ss_customer_sk#23) AND isnotnull(ss_sold_date_sk#6)) + +(75) Scan parquet default.customer +Output [1]: [c_customer_sk#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 1] +Input [1]: [c_customer_sk#26] + +(77) Filter [codegen id : 1] +Input [1]: [c_customer_sk#26] +Condition : isnotnull(c_customer_sk#26) + +(78) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#59] + +(79) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#23] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(80) Project [codegen id : 3] +Output [4]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [5]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] + +(81) Scan parquet default.date_dim +Output [2]: [d_date_sk#8, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(82) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#10] + +(83) Filter [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(84) Project [codegen id : 2] +Output [1]: [d_date_sk#8] +Input [2]: [d_date_sk#8, d_year#10] + +(85) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60] + +(86) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(87) Project [codegen id : 3] +Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [5]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26, d_date_sk#8] + +(88) HashAggregate [codegen id : 3] +Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Keys [1]: [c_customer_sk#26] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#61, isEmpty#62] +Results [3]: [c_customer_sk#26, sum#63, isEmpty#64] + +(89) Exchange +Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] +Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#65] + +(90) HashAggregate [codegen id : 4] +Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66] +Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66 AS csales#67] + +(91) HashAggregate [codegen id : 4] +Input [1]: [csales#67] +Keys: [] +Functions [1]: [partial_max(csales#67)] +Aggregate Attributes [1]: [max#68] +Results [1]: [max#69] + +(92) Exchange +Input [1]: [max#69] +Arguments: SinglePartition, true, [id=#70] + +(93) HashAggregate [codegen id : 5] +Input [1]: [max#69] +Keys: [] +Functions [1]: [max(csales#67)] +Aggregate Attributes [1]: [max(csales#67)#71] +Results [1]: [max(csales#67)#71 AS tpcds_cmax#72] + +Subquery:2 Hosting operator id = 60 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] + + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/simplified.txt index a75e4e5f9..aebe2bd3e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23a/simplified.txt @@ -1,122 +1,142 @@ -CollectLimit - WholeStageCodegen - HashAggregate [sum,sum(sales)] [sum,sum(sales),sum(sales)] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [sales,sum,sum] [sum,sum] - InputAdapter - Union - WholeStageCodegen - Project [cs_list_price,cs_quantity] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_list_price,cs_quantity,cs_sold_date_sk] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [cs_bill_customer_sk,cs_list_price,cs_quantity,cs_sold_date_sk] - BroadcastHashJoin [cs_item_sk,item_sk] - Project [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [item_sk] - Filter [count(1)] - HashAggregate [count,count(1),d_date,i_item_sk,substring(i_item_desc, 1, 30)] [count,count(1),count(1),item_sk] - InputAdapter - Exchange [d_date,i_item_sk,substring(i_item_desc, 1, 30)] #3 - WholeStageCodegen - HashAggregate [count,count,d_date,i_item_desc,i_item_sk,substring(i_item_desc, 1, 30)] [count,count,substring(i_item_desc, 1, 30)] - Project [d_date,i_item_desc,i_item_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [d_date,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date,d_date_sk,d_year] [d_date,d_date_sk,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [i_item_desc,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_desc,i_item_sk] [i_item_desc,i_item_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] - Subquery #1 - WholeStageCodegen - HashAggregate [max,max(csales)] [max,max(csales),tpcds_cmax] - InputAdapter - Exchange #9 - WholeStageCodegen - HashAggregate [csales,max,max] [max,max] - HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [csales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] - InputAdapter - Exchange [c_customer_sk] #10 - WholeStageCodegen - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price,sum,sum] [sum,sum] - Project [c_customer_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] +WholeStageCodegen (20) + HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] + InputAdapter + Exchange #1 + WholeStageCodegen (19) + HashAggregate [sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (9) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] + BroadcastHashJoin [cs_item_sk,item_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (4) + Project [item_sk] + Filter [count(1)] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] + InputAdapter + Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3 + WholeStageCodegen (3) + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_year,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [c_customer_sk] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk] [c_customer_sk] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] - InputAdapter - Exchange [c_customer_sk] #7 - WholeStageCodegen - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price,sum,sum] [sum,sum] - Project [c_customer_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [ss_customer_sk,ss_quantity,ss_sales_price] - Filter [ss_customer_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] [ss_customer_sk,ss_quantity,ss_sales_price] + Scan parquet default.date_dim [d_date_sk,d_date,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_desc] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (5) + HashAggregate [max] [max(csales),tpcds_cmax,max] + InputAdapter + Exchange #9 + WholeStageCodegen (4) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [c_customer_sk] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk] [c_customer_sk] - InputAdapter - BroadcastExchange #12 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - WholeStageCodegen - Project [ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_list_price,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [item_sk,ws_item_sk] - Project [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] - InputAdapter - ReusedExchange [item_sk] [item_sk] #2 - InputAdapter - ReusedExchange [c_customer_sk] [c_customer_sk] #6 - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #12 + Exchange [c_customer_sk] #10 + WholeStageCodegen (3) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (1) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (6) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (8) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (18) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + BroadcastHashJoin [ws_item_sk,item_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [item_sk] #2 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (16) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + ReusedExchange [d_date_sk] #13 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/explain.txt index 2d94d466d..b5213786c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/explain.txt @@ -1,101 +1,689 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,sales#3 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,sales#3]) -+- Union - :- *(14) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2)))]) - : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 5) - : +- *(13) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2)))]) - : +- *(13) Project [cs_quantity#4, cs_list_price#5, c_first_name#2, c_last_name#1] - : +- *(13) BroadcastHashJoin [cs_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight - : :- *(13) Project [cs_sold_date_sk#6, cs_quantity#4, cs_list_price#5, c_first_name#2, c_last_name#1] - : : +- *(13) BroadcastHashJoin [cs_bill_customer_sk#8], [c_customer_sk#9], Inner, BuildRight - : : :- *(13) BroadcastHashJoin [cs_bill_customer_sk#8], [c_customer_sk#9#10], LeftSemi, BuildRight - : : : :- *(13) Project [cs_sold_date_sk#6, cs_bill_customer_sk#8, cs_quantity#4, cs_list_price#5] - : : : : +- *(13) BroadcastHashJoin [cs_item_sk#11], [item_sk#12], LeftSemi, BuildRight - : : : : :- *(13) Project [cs_sold_date_sk#6, cs_bill_customer_sk#8, cs_item_sk#11, cs_quantity#4, cs_list_price#5] - : : : : : +- *(13) Filter (isnotnull(cs_bill_customer_sk#8) && isnotnull(cs_sold_date_sk#6)) - : : : : : +- *(13) FileScan parquet default.catalog_sales[cs_sold_date_sk#6,cs_bill_customer_sk#8,cs_item_sk#11,cs_quantity#4,cs_list_price#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct 4) - : : : : +- *(4) HashAggregate(keys=[substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[count(1)]) - : : : : +- Exchange hashpartitioning(substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17, 5) - : : : : +- *(3) HashAggregate(keys=[substring(i_item_desc#14, 1, 30) AS substring(i_item_desc#14, 1, 30)#15, i_item_sk#16, d_date#17], functions=[partial_count(1)]) - : : : : +- *(3) Project [d_date#17, i_item_sk#16, i_item_desc#14] - : : : : +- *(3) BroadcastHashJoin [ss_item_sk#18], [i_item_sk#16], Inner, BuildRight - : : : : :- *(3) Project [ss_item_sk#18, d_date#17] - : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight - : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_item_sk#18] - : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#19) && isnotnull(ss_item_sk#18)) - : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(1) Project [d_date_sk#7, d_date#17] - : : : : : +- *(1) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) - : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#7,d_date#17,d_year#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [i_item_sk#16, i_item_desc#14] - : : : : +- *(2) Filter isnotnull(i_item_sk#16) - : : : : +- *(2) FileScan parquet default.item[i_item_sk#16,i_item_desc#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(7) Project [c_customer_sk#9 AS c_customer_sk#9#10] - : : : +- *(7) Filter (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23) && (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#23 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery subquery3144 as decimal(32,6)))), DecimalType(38,8)))) - : : : : +- Subquery subquery3144 - : : : : +- *(5) HashAggregate(keys=[], functions=[max(csales#24)]) - : : : : +- Exchange SinglePartition - : : : : +- *(4) HashAggregate(keys=[], functions=[partial_max(csales#24)]) - : : : : +- *(4) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) - : : : : +- Exchange hashpartitioning(c_customer_sk#9, 5) - : : : : +- *(3) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) - : : : : +- *(3) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] - : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#7], Inner, BuildRight - : : : : :- *(3) Project [ss_sold_date_sk#19, ss_quantity#21, ss_sales_price#22, c_customer_sk#9] - : : : : : +- *(3) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight - : : : : : :- *(3) Project [ss_sold_date_sk#19, ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] - : : : : : : +- *(3) Filter (isnotnull(ss_customer_sk#25) && isnotnull(ss_sold_date_sk#19)) - : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(1) Project [c_customer_sk#9] - : : : : : +- *(1) Filter isnotnull(c_customer_sk#9) - : : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [d_date_sk#7] - : : : : +- *(2) Filter (d_year#20 IN (2000,2001,2002,2003) && isnotnull(d_date_sk#7)) - : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_year#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- *(7) HashAggregate(keys=[c_customer_sk#9], functions=[sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) - : : : +- Exchange hashpartitioning(c_customer_sk#9, 5) - : : : +- *(6) HashAggregate(keys=[c_customer_sk#9], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#21 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]) - : : : +- *(6) Project [ss_quantity#21, ss_sales_price#22, c_customer_sk#9] - : : : +- *(6) BroadcastHashJoin [ss_customer_sk#25], [c_customer_sk#9], Inner, BuildRight - : : : :- *(6) Project [ss_customer_sk#25, ss_quantity#21, ss_sales_price#22] - : : : : +- *(6) Filter isnotnull(ss_customer_sk#25) - : : : : +- *(6) FileScan parquet default.store_sales[ss_customer_sk#25,ss_quantity#21,ss_sales_price#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(5) Project [c_customer_sk#9] - : : : +- *(5) Filter isnotnull(c_customer_sk#9) - : : : +- *(5) FileScan parquet default.customer[c_customer_sk#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(11) BroadcastHashJoin [c_customer_sk#9], [c_customer_sk#9#10], LeftSemi, BuildRight - : : :- *(11) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] - : : : +- *(11) Filter isnotnull(c_customer_sk#9) - : : : +- *(11) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : : +- ReusedExchange [c_customer_sk#9#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(12) Project [d_date_sk#7] - : +- *(12) Filter ((((isnotnull(d_year#20) && isnotnull(d_moy#26)) && (d_year#20 = 2000)) && (d_moy#26 = 2)) && isnotnull(d_date_sk#7)) - : +- *(12) FileScan parquet default.date_dim[d_date_sk#7,d_year#20,d_moy#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct - +- *(28) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, 5) - +- *(27) HashAggregate(keys=[c_last_name#1, c_first_name#2], functions=[partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#27 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#28 as decimal(12,2)))), DecimalType(18,2)))]) - +- *(27) Project [ws_quantity#27, ws_list_price#28, c_first_name#2, c_last_name#1] - +- *(27) BroadcastHashJoin [ws_sold_date_sk#29], [d_date_sk#7], Inner, BuildRight - :- *(27) Project [ws_sold_date_sk#29, ws_quantity#27, ws_list_price#28, c_first_name#2, c_last_name#1] - : +- *(27) BroadcastHashJoin [ws_bill_customer_sk#30], [c_customer_sk#9], Inner, BuildRight - : :- *(27) BroadcastHashJoin [ws_bill_customer_sk#30], [c_customer_sk#9#31], LeftSemi, BuildRight - : : :- *(27) Project [ws_sold_date_sk#29, ws_bill_customer_sk#30, ws_quantity#27, ws_list_price#28] - : : : +- *(27) BroadcastHashJoin [ws_item_sk#32], [item_sk#12], LeftSemi, BuildRight - : : : :- *(27) Project [ws_sold_date_sk#29, ws_item_sk#32, ws_bill_customer_sk#30, ws_quantity#27, ws_list_price#28] - : : : : +- *(27) Filter (isnotnull(ws_bill_customer_sk#30) && isnotnull(ws_sold_date_sk#29)) - : : : : +- *(27) FileScan parquet default.web_sales[ws_sold_date_sk#29,ws_item_sk#32,ws_bill_customer_sk#30,ws_quantity#27,ws_list_price#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 13] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(3) Filter [codegen id : 13] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] +Condition : (isnotnull(cs_bill_customer_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#6, ss_item_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 3] +Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] + +(6) Filter [codegen id : 3] +Input [2]: [ss_sold_date_sk#6, ss_item_sk#7] +Condition : (isnotnull(ss_sold_date_sk#6) AND isnotnull(ss_item_sk#7)) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_date#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(10) Project [codegen id : 1] +Output [2]: [d_date_sk#8, d_date#9] +Input [3]: [d_date_sk#8, d_date#9, d_year#10] + +(11) BroadcastExchange +Input [2]: [d_date_sk#8, d_date#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(12) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(13) Project [codegen id : 3] +Output [2]: [ss_item_sk#7, d_date#9] +Input [4]: [ss_sold_date_sk#6, ss_item_sk#7, d_date_sk#8, d_date#9] + +(14) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_desc#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [i_item_sk#12, i_item_desc#13] + +(16) Filter [codegen id : 2] +Input [2]: [i_item_sk#12, i_item_desc#13] +Condition : isnotnull(i_item_sk#12) + +(17) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_desc#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(18) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#7] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(19) Project [codegen id : 3] +Output [3]: [d_date#9, i_item_sk#12, i_item_desc#13] +Input [4]: [ss_item_sk#7, d_date#9, i_item_sk#12, i_item_desc#13] + +(20) HashAggregate [codegen id : 3] +Input [3]: [d_date#9, i_item_sk#12, i_item_desc#13] +Keys [3]: [substr(i_item_desc#13, 1, 30) AS substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#16] +Results [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] + +(21) Exchange +Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] +Arguments: hashpartitioning(substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, 5), true, [id=#18] + +(22) HashAggregate [codegen id : 4] +Input [4]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9, count#17] +Keys [3]: [substr(i_item_desc#13, 1, 30)#15, i_item_sk#12, d_date#9] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#19] +Results [2]: [i_item_sk#12 AS item_sk#20, count(1)#19 AS count(1)#21] + +(23) Filter [codegen id : 4] +Input [2]: [item_sk#20, count(1)#21] +Condition : (count(1)#21 > 4) + +(24) Project [codegen id : 4] +Output [1]: [item_sk#20] +Input [2]: [item_sk#20, count(1)#21] + +(25) BroadcastExchange +Input [1]: [item_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(26) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [item_sk#20] +Join condition: None + +(27) Project [codegen id : 13] +Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5] +Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5] + +(28) Scan parquet default.store_sales +Output [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 6] +Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] + +(30) Filter [codegen id : 6] +Input [3]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Condition : isnotnull(ss_customer_sk#23) + +(31) Scan parquet default.customer +Output [1]: [c_customer_sk#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 5] +Input [1]: [c_customer_sk#26] + +(33) Filter [codegen id : 5] +Input [1]: [c_customer_sk#26] +Condition : isnotnull(c_customer_sk#26) + +(34) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#23] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(36) Project [codegen id : 6] +Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [4]: [ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] + +(37) HashAggregate [codegen id : 6] +Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Keys [1]: [c_customer_sk#26] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#28, isEmpty#29] +Results [3]: [c_customer_sk#26, sum#30, isEmpty#31] + +(38) Exchange +Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] +Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#32] + +(39) HashAggregate [codegen id : 7] +Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(40) Filter [codegen id : 7] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(41) Project [codegen id : 7] +Output [1]: [c_customer_sk#26] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(42) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#37] + +(43) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(44) Scan parquet default.customer +Output [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(45) ColumnarToRow [codegen id : 11] +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] + +(46) Filter [codegen id : 11] +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Condition : isnotnull(c_customer_sk#26) + +(47) ReusedExchange [Reuses operator id: 38] +Output [3]: [c_customer_sk#26, sum#30, isEmpty#31] + +(48) HashAggregate [codegen id : 10] +Input [3]: [c_customer_sk#26, sum#30, isEmpty#31] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(49) Filter [codegen id : 10] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(50) Project [codegen id : 10] +Output [1]: [c_customer_sk#26 AS c_customer_sk#26#40] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#34] + +(51) BroadcastExchange +Input [1]: [c_customer_sk#26#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#41] + +(52) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_customer_sk#26] +Right keys [1]: [c_customer_sk#26#40] +Join condition: None + +(53) BroadcastExchange +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#42] + +(54) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_bill_customer_sk#2] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(55) Project [codegen id : 13] +Output [5]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39] +Input [7]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5, c_customer_sk#26, c_first_name#38, c_last_name#39] + +(56) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_year#10, d_moy#43] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 12] +Input [3]: [d_date_sk#8, d_year#10, d_moy#43] + +(58) Filter [codegen id : 12] +Input [3]: [d_date_sk#8, d_year#10, d_moy#43] +Condition : ((((isnotnull(d_year#10) AND isnotnull(d_moy#43)) AND (d_year#10 = 2000)) AND (d_moy#43 = 2)) AND isnotnull(d_date_sk#8)) + +(59) Project [codegen id : 12] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#10, d_moy#43] + +(60) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] + +(61) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(62) Project [codegen id : 13] +Output [4]: [cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39] +Input [6]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39, d_date_sk#8] + +(63) HashAggregate [codegen id : 13] +Input [4]: [cs_quantity#4, cs_list_price#5, c_first_name#38, c_last_name#39] +Keys [2]: [c_last_name#39, c_first_name#38] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#45, isEmpty#46] +Results [4]: [c_last_name#39, c_first_name#38, sum#47, isEmpty#48] + +(64) Exchange +Input [4]: [c_last_name#39, c_first_name#38, sum#47, isEmpty#48] +Arguments: hashpartitioning(c_last_name#39, c_first_name#38, 5), true, [id=#49] + +(65) HashAggregate [codegen id : 14] +Input [4]: [c_last_name#39, c_first_name#38, sum#47, isEmpty#48] +Keys [2]: [c_last_name#39, c_first_name#38] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#50] +Results [3]: [c_last_name#39, c_first_name#38, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#50 AS sales#51] + +(66) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(67) ColumnarToRow [codegen id : 27] +Input [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] + +(68) Filter [codegen id : 27] +Input [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] +Condition : (isnotnull(ws_bill_customer_sk#54) AND isnotnull(ws_sold_date_sk#52)) + +(69) ReusedExchange [Reuses operator id: 25] +Output [1]: [item_sk#20] + +(70) BroadcastHashJoin [codegen id : 27] +Left keys [1]: [ws_item_sk#53] +Right keys [1]: [item_sk#20] +Join condition: None + +(71) Project [codegen id : 27] +Output [4]: [ws_sold_date_sk#52, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] +Input [5]: [ws_sold_date_sk#52, ws_item_sk#53, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56] + +(72) ReusedExchange [Reuses operator id: 38] +Output [3]: [c_customer_sk#26, sum#57, isEmpty#58] + +(73) HashAggregate [codegen id : 21] +Input [3]: [c_customer_sk#26, sum#57, isEmpty#58] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] + +(74) Filter [codegen id : 21] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(75) Project [codegen id : 21] +Output [1]: [c_customer_sk#26] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] + +(76) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#61] + +(77) BroadcastHashJoin [codegen id : 27] +Left keys [1]: [ws_bill_customer_sk#54] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(78) Scan parquet default.customer +Output [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 25] +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] + +(80) Filter [codegen id : 25] +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Condition : isnotnull(c_customer_sk#26) + +(81) ReusedExchange [Reuses operator id: 38] +Output [3]: [c_customer_sk#26, sum#57, isEmpty#58] + +(82) HashAggregate [codegen id : 24] +Input [3]: [c_customer_sk#26, sum#57, isEmpty#58] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59] +Results [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#59 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] + +(83) Filter [codegen id : 24] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] +Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#35, [id=#36] as decimal(32,6)))), DecimalType(38,8), true))) + +(84) Project [codegen id : 24] +Output [1]: [c_customer_sk#26 AS c_customer_sk#26#62] +Input [2]: [c_customer_sk#26, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#60] + +(85) BroadcastExchange +Input [1]: [c_customer_sk#26#62] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#63] + +(86) BroadcastHashJoin [codegen id : 25] +Left keys [1]: [c_customer_sk#26] +Right keys [1]: [c_customer_sk#26#62] +Join condition: None + +(87) BroadcastExchange +Input [3]: [c_customer_sk#26, c_first_name#38, c_last_name#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#64] + +(88) BroadcastHashJoin [codegen id : 27] +Left keys [1]: [ws_bill_customer_sk#54] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(89) Project [codegen id : 27] +Output [5]: [ws_sold_date_sk#52, ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39] +Input [7]: [ws_sold_date_sk#52, ws_bill_customer_sk#54, ws_quantity#55, ws_list_price#56, c_customer_sk#26, c_first_name#38, c_last_name#39] + +(90) ReusedExchange [Reuses operator id: 60] +Output [1]: [d_date_sk#8] + +(91) BroadcastHashJoin [codegen id : 27] +Left keys [1]: [ws_sold_date_sk#52] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(92) Project [codegen id : 27] +Output [4]: [ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39] +Input [6]: [ws_sold_date_sk#52, ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39, d_date_sk#8] + +(93) HashAggregate [codegen id : 27] +Input [4]: [ws_quantity#55, ws_list_price#56, c_first_name#38, c_last_name#39] +Keys [2]: [c_last_name#39, c_first_name#38] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#65, isEmpty#66] +Results [4]: [c_last_name#39, c_first_name#38, sum#67, isEmpty#68] + +(94) Exchange +Input [4]: [c_last_name#39, c_first_name#38, sum#67, isEmpty#68] +Arguments: hashpartitioning(c_last_name#39, c_first_name#38, 5), true, [id=#69] + +(95) HashAggregate [codegen id : 28] +Input [4]: [c_last_name#39, c_first_name#38, sum#67, isEmpty#68] +Keys [2]: [c_last_name#39, c_first_name#38] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))#70] +Results [3]: [c_last_name#39, c_first_name#38, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#55 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#56 as decimal(12,2)))), DecimalType(18,2), true))#70 AS sales#71] + +(96) Union + +(97) TakeOrderedAndProject +Input [3]: [c_last_name#39, c_first_name#38, sales#51] +Arguments: 100, [c_last_name#39 ASC NULLS FIRST, c_first_name#38 ASC NULLS FIRST, sales#51 ASC NULLS FIRST], [c_last_name#39, c_first_name#38, sales#51] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#35, [id=#36] +* HashAggregate (119) ++- Exchange (118) + +- * HashAggregate (117) + +- * HashAggregate (116) + +- Exchange (115) + +- * HashAggregate (114) + +- * Project (113) + +- * BroadcastHashJoin Inner BuildRight (112) + :- * Project (106) + : +- * BroadcastHashJoin Inner BuildRight (105) + : :- * Filter (100) + : : +- * ColumnarToRow (99) + : : +- Scan parquet default.store_sales (98) + : +- BroadcastExchange (104) + : +- * Filter (103) + : +- * ColumnarToRow (102) + : +- Scan parquet default.customer (101) + +- BroadcastExchange (111) + +- * Project (110) + +- * Filter (109) + +- * ColumnarToRow (108) + +- Scan parquet default.date_dim (107) + + +(98) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(99) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] + +(100) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] +Condition : (isnotnull(ss_customer_sk#23) AND isnotnull(ss_sold_date_sk#6)) + +(101) Scan parquet default.customer +Output [1]: [c_customer_sk#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(102) ColumnarToRow [codegen id : 1] +Input [1]: [c_customer_sk#26] + +(103) Filter [codegen id : 1] +Input [1]: [c_customer_sk#26] +Condition : isnotnull(c_customer_sk#26) + +(104) BroadcastExchange +Input [1]: [c_customer_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#72] + +(105) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_customer_sk#23] +Right keys [1]: [c_customer_sk#26] +Join condition: None + +(106) Project [codegen id : 3] +Output [4]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [5]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] + +(107) Scan parquet default.date_dim +Output [2]: [d_date_sk#8, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(108) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#10] + +(109) Filter [codegen id : 2] +Input [2]: [d_date_sk#8, d_year#10] +Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) + +(110) Project [codegen id : 2] +Output [1]: [d_date_sk#8] +Input [2]: [d_date_sk#8, d_year#10] + +(111) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#73] + +(112) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(113) Project [codegen id : 3] +Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Input [5]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26, d_date_sk#8] + +(114) HashAggregate [codegen id : 3] +Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] +Keys [1]: [c_customer_sk#26] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [2]: [sum#74, isEmpty#75] +Results [3]: [c_customer_sk#26, sum#76, isEmpty#77] + +(115) Exchange +Input [3]: [c_customer_sk#26, sum#76, isEmpty#77] +Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#78] + +(116) HashAggregate [codegen id : 4] +Input [3]: [c_customer_sk#26, sum#76, isEmpty#77] +Keys [1]: [c_customer_sk#26] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#79] +Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#79 AS csales#80] + +(117) HashAggregate [codegen id : 4] +Input [1]: [csales#80] +Keys: [] +Functions [1]: [partial_max(csales#80)] +Aggregate Attributes [1]: [max#81] +Results [1]: [max#82] + +(118) Exchange +Input [1]: [max#82] +Arguments: SinglePartition, true, [id=#83] + +(119) HashAggregate [codegen id : 5] +Input [1]: [max#82] +Keys: [] +Functions [1]: [max(csales#80)] +Aggregate Attributes [1]: [max(csales#80)#84] +Results [1]: [max(csales#80)#84 AS tpcds_cmax#85] + +Subquery:2 Hosting operator id = 49 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] + +Subquery:3 Hosting operator id = 74 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] + +Subquery:4 Hosting operator id = 83 Hosting Expression = ReusedSubquery Subquery scalar-subquery#35, [id=#36] + + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/simplified.txt index 7880be764..f879f38d5 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q23b/simplified.txt @@ -1,138 +1,182 @@ -TakeOrderedAndProject [c_first_name,c_last_name,sales] +TakeOrderedAndProject [c_last_name,c_first_name,sales] Union - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] [sales,sum,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2)))] + WholeStageCodegen (14) + HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty] InputAdapter - Exchange [c_first_name,c_last_name] #1 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,cs_list_price,cs_quantity,sum,sum] [sum,sum] - Project [c_first_name,c_last_name,cs_list_price,cs_quantity] + Exchange [c_last_name,c_first_name] #1 + WholeStageCodegen (13) + HashAggregate [c_last_name,c_first_name,cs_quantity,cs_list_price] [sum,isEmpty,sum,isEmpty] + Project [cs_quantity,cs_list_price,c_first_name,c_last_name] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [c_first_name,c_last_name,cs_list_price,cs_quantity,cs_sold_date_sk] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [cs_bill_customer_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price,c_first_name,c_last_name] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] BroadcastHashJoin [cs_item_sk,item_sk] - Project [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_list_price,cs_quantity,cs_sold_date_sk] + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (4) Project [item_sk] Filter [count(1)] - HashAggregate [count,count(1),d_date,i_item_sk,substring(i_item_desc, 1, 30)] [count,count(1),count(1),item_sk] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] InputAdapter - Exchange [d_date,i_item_sk,substring(i_item_desc, 1, 30)] #3 - WholeStageCodegen - HashAggregate [count,count,d_date,i_item_desc,i_item_sk,substring(i_item_desc, 1, 30)] [count,count,substring(i_item_desc, 1, 30)] - Project [d_date,i_item_desc,i_item_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [d_date,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk] [ss_item_sk,ss_sold_date_sk] + Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3 + WholeStageCodegen (3) + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date,d_date_sk,d_year] [d_date,d_date_sk,d_year] + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_year] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [i_item_desc,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_desc,i_item_sk] [i_item_desc,i_item_sk] + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_desc] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (7) Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] Subquery #1 - WholeStageCodegen - HashAggregate [max,max(csales)] [max,max(csales),tpcds_cmax] + WholeStageCodegen (5) + HashAggregate [max] [max(csales),tpcds_cmax,max] InputAdapter Exchange #9 - WholeStageCodegen - HashAggregate [csales,max,max] [max,max] - HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [csales,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] + WholeStageCodegen (4) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] InputAdapter Exchange [c_customer_sk] #10 - WholeStageCodegen - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price,sum,sum] [sum,sum] - Project [c_customer_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + WholeStageCodegen (3) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [c_customer_sk] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk] [c_customer_sk] + BroadcastExchange #11 + WholeStageCodegen (1) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] InputAdapter - BroadcastExchange #11 - WholeStageCodegen + BroadcastExchange #12 + WholeStageCodegen (2) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - HashAggregate [c_customer_sk,sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] [sum,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2)))] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] InputAdapter Exchange [c_customer_sk] #7 - WholeStageCodegen - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price,sum,sum] [sum,sum] - Project [c_customer_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [ss_customer_sk,ss_quantity,ss_sales_price] - Filter [ss_customer_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] [ss_customer_sk,ss_quantity,ss_sales_price] + WholeStageCodegen (6) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] InputAdapter BroadcastExchange #8 - WholeStageCodegen - Project [c_customer_sk] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk] [c_customer_sk] + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] InputAdapter - BroadcastExchange #12 - WholeStageCodegen + BroadcastExchange #13 + WholeStageCodegen (11) BroadcastHashJoin [c_customer_sk,c_customer_sk] - Project [c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] InputAdapter - ReusedExchange [c_customer_sk] [c_customer_sk] #6 + BroadcastExchange #14 + WholeStageCodegen (10) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 InputAdapter - BroadcastExchange #13 - WholeStageCodegen + BroadcastExchange #15 + WholeStageCodegen (12) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] [sales,sum,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2)))] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (28) + HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty] InputAdapter - Exchange [c_first_name,c_last_name] #14 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,sum,sum,ws_list_price,ws_quantity] [sum,sum] - Project [c_first_name,c_last_name,ws_list_price,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [c_first_name,c_last_name,ws_list_price,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [ws_bill_customer_sk,ws_list_price,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [item_sk,ws_item_sk] - Project [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_list_price,ws_quantity,ws_sold_date_sk] + Exchange [c_last_name,c_first_name] #16 + WholeStageCodegen (27) + HashAggregate [c_last_name,c_first_name,ws_quantity,ws_list_price] [sum,isEmpty,sum,isEmpty] + Project [ws_quantity,ws_list_price,c_first_name,c_last_name] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price,c_first_name,c_last_name] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + BroadcastHashJoin [ws_item_sk,item_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] InputAdapter - ReusedExchange [item_sk] [item_sk] #2 + ReusedExchange [item_sk] #2 InputAdapter - ReusedExchange [c_customer_sk] [c_customer_sk] #6 + BroadcastExchange #17 + WholeStageCodegen (21) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #12 + BroadcastExchange #18 + WholeStageCodegen (25) + BroadcastHashJoin [c_customer_sk,c_customer_sk] + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #19 + WholeStageCodegen (24) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #13 + ReusedExchange [d_date_sk] #15 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/explain.txt index 4cb8ce151..ac5580565 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/explain.txt @@ -1,82 +1,477 @@ == Physical Plan == -*(8) Project [c_last_name#1, c_first_name#2, s_store_name#3, paid#4] -+- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery3246 as decimal(33,8)))) - : +- Subquery subquery3246 - : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) - : +- Exchange SinglePartition - : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) - : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) - : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) - : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] - : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight - : :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] - : : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - : : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] - : : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight - : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] - : : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight - : : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] - : : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) - : : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] - : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : : +- *(3) Filter isnotnull(i_item_sk#22) - : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) - : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] - : +- *(5) Filter isnotnull(ca_zip#18) - : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct - +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 5) - +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) - +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) - +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) - +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] - +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight - :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] - : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight - : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] - : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] - : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight - : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] - : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) - : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] - : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = pale)) && isnotnull(i_item_sk#22)) - : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) - +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] - +- *(5) Filter isnotnull(ca_zip#18) - +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file +* Project (42) ++- * Filter (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_returns (4) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.store (10) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.item (17) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.customer (23) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.customer_address (29) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(3) Filter [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.store_returns +Output [2]: [sr_item_sk#6, sr_ticket_number#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] + +(6) Filter [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) + +(7) BroadcastExchange +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] +Join condition: None + +(9) Project [codegen id : 6] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] + +(10) Scan parquet default.store +Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(12) Filter [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) + +(13) Project [codegen id : 2] +Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(14) BroadcastExchange +Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(16) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] + +(17) Scan parquet default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale), IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(19) Filter [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale)) AND isnotnull(i_item_sk#15)) + +(20) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(22) Project [codegen id : 6] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(23) Scan parquet default.customer +Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(25) Filter [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) + +(26) BroadcastExchange +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(28) Project [codegen id : 6] +Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(29) Scan parquet default.customer_address +Output [3]: [ca_state#27, ca_zip#28, ca_country#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] + +(31) Filter [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Condition : (isnotnull(ca_country#29) AND isnotnull(ca_zip#28)) + +(32) BroadcastExchange +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#30] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [c_birth_country#25, s_zip#13] +Right keys [2]: [upper(ca_country#29), ca_zip#28] +Join condition: None + +(34) Project [codegen id : 6] +Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] + +(35) HashAggregate [codegen id : 6] +Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#31] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] + +(36) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#33] + +(37) HashAggregate [codegen id : 7] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#34] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#10, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#34,17,2) AS netpaid#35] + +(38) HashAggregate [codegen id : 7] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#10, netpaid#35] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] +Functions [1]: [partial_sum(netpaid#35)] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] + +(39) Exchange +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, 5), true, [id=#40] + +(40) HashAggregate [codegen id : 8] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] +Functions [1]: [sum(netpaid#35)] +Aggregate Attributes [1]: [sum(netpaid#35)#41] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum(netpaid#35)#41 AS paid#42, sum(netpaid#35)#41 AS sum(netpaid#35)#43] + +(41) Filter [codegen id : 8] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] +Condition : (isnotnull(sum(netpaid#35)#43) AND (cast(sum(netpaid#35)#43 as decimal(33,8)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(33,8)))) + +(42) Project [codegen id : 8] +Output [4]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 41 Hosting Expression = Subquery scalar-subquery#44, [id=#45] +* HashAggregate (82) ++- Exchange (81) + +- * HashAggregate (80) + +- * HashAggregate (79) + +- Exchange (78) + +- * HashAggregate (77) + +- * Project (76) + +- * BroadcastHashJoin Inner BuildRight (75) + :- * Project (70) + : +- * BroadcastHashJoin Inner BuildRight (69) + : :- * Project (64) + : : +- * BroadcastHashJoin Inner BuildRight (63) + : : :- * Project (58) + : : : +- * BroadcastHashJoin Inner BuildRight (57) + : : : :- * Project (51) + : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : :- * Filter (45) + : : : : : +- * ColumnarToRow (44) + : : : : : +- Scan parquet default.store_sales (43) + : : : : +- BroadcastExchange (49) + : : : : +- * Filter (48) + : : : : +- * ColumnarToRow (47) + : : : : +- Scan parquet default.store_returns (46) + : : : +- BroadcastExchange (56) + : : : +- * Project (55) + : : : +- * Filter (54) + : : : +- * ColumnarToRow (53) + : : : +- Scan parquet default.store (52) + : : +- BroadcastExchange (62) + : : +- * Filter (61) + : : +- * ColumnarToRow (60) + : : +- Scan parquet default.item (59) + : +- BroadcastExchange (68) + : +- * Filter (67) + : +- * ColumnarToRow (66) + : +- Scan parquet default.customer (65) + +- BroadcastExchange (74) + +- * Filter (73) + +- * ColumnarToRow (72) + +- Scan parquet default.customer_address (71) + + +(43) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(45) Filter [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(46) Scan parquet default.store_returns +Output [2]: [sr_item_sk#6, sr_ticket_number#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] + +(48) Filter [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) + +(49) BroadcastExchange +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#46] + +(50) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] +Join condition: None + +(51) Project [codegen id : 6] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] + +(52) Scan parquet default.store +Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(54) Filter [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) + +(55) Project [codegen id : 2] +Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(56) BroadcastExchange +Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47] + +(57) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(58) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] + +(59) Scan parquet default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(61) Filter [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) + +(62) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] + +(63) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(64) Project [codegen id : 6] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(65) Scan parquet default.customer +Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(67) Filter [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) + +(68) BroadcastExchange +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#49] + +(69) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(70) Project [codegen id : 6] +Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(71) Scan parquet default.customer_address +Output [3]: [ca_state#27, ca_zip#28, ca_country#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] + +(73) Filter [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Condition : (isnotnull(ca_country#29) AND isnotnull(ca_zip#28)) + +(74) BroadcastExchange +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#50] + +(75) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [c_birth_country#25, s_zip#13] +Right keys [2]: [upper(ca_country#29), ca_zip#28] +Join condition: None + +(76) Project [codegen id : 6] +Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] + +(77) HashAggregate [codegen id : 6] +Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#51] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] + +(78) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#53] + +(79) HashAggregate [codegen id : 7] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#54] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#54,17,2) AS netpaid#35] + +(80) HashAggregate [codegen id : 7] +Input [1]: [netpaid#35] +Keys: [] +Functions [1]: [partial_avg(netpaid#35)] +Aggregate Attributes [2]: [sum#55, count#56] +Results [2]: [sum#57, count#58] + +(81) Exchange +Input [2]: [sum#57, count#58] +Arguments: SinglePartition, true, [id=#59] + +(82) HashAggregate [codegen id : 8] +Input [2]: [sum#57, count#58] +Keys: [] +Functions [1]: [avg(netpaid#35)] +Aggregate Attributes [1]: [avg(netpaid#35)#60] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#35)#60)), DecimalType(24,8), true) AS (CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6)))#61] + + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt index d8bda2e69..58b57dfaa 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24a/simplified.txt @@ -1,111 +1,125 @@ -WholeStageCodegen - Project [c_first_name,c_last_name,paid,s_store_name] +WholeStageCodegen (8) + Project [c_last_name,c_first_name,s_store_name,paid] Filter [sum(netpaid)] Subquery #1 - WholeStageCodegen - HashAggregate [avg(netpaid),count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] + WholeStageCodegen (8) + HashAggregate [sum,count] [avg(netpaid),(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),sum,count] InputAdapter Exchange #8 - WholeStageCodegen - HashAggregate [count,count,netpaid,sum,sum] [count,count,sum,sum] - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + WholeStageCodegen (7) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #9 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] - Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] - BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] - Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #9 + WholeStageCodegen (6) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + BroadcastExchange #10 + WholeStageCodegen (1) + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_state,s_store_name,s_store_sk,s_zip] + BroadcastExchange #11 + WholeStageCodegen (2) + Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - Filter [i_item_sk] - Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + BroadcastExchange #12 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] - Filter [c_birth_country,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] + BroadcastExchange #13 + WholeStageCodegen (4) + Filter [c_customer_sk,c_birth_country] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [ca_country,ca_state,ca_zip] - Filter [ca_zip] - Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] - HashAggregate [c_first_name,c_last_name,s_store_name,sum,sum(netpaid)] [paid,sum,sum(netpaid),sum(netpaid)] + BroadcastExchange #14 + WholeStageCodegen (5) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum(netpaid),sum,isEmpty] InputAdapter - Exchange [c_first_name,c_last_name,s_store_name] #1 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,netpaid,s_store_name,sum,sum] [sum,sum] - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (7) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] - Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] - BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] - Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (6) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + WholeStageCodegen (1) + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [s_state,s_store_name,s_store_sk,s_zip] + WholeStageCodegen (2) + Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - Filter [i_color,i_item_sk] - Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + WholeStageCodegen (3) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] - Filter [c_birth_country,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen (4) + Filter [c_customer_sk,c_birth_country] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [ca_country,ca_state,ca_zip] - Filter [ca_zip] - Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] + WholeStageCodegen (5) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt index dbaafd47b..0cf8c16a0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/explain.txt @@ -1,82 +1,477 @@ == Physical Plan == -*(8) Project [c_last_name#1, c_first_name#2, s_store_name#3, paid#4] -+- *(8) Filter (isnotnull(sum(netpaid#5)#6) && (cast(sum(netpaid#5)#6 as decimal(33,8)) > cast(Subquery subquery3290 as decimal(33,8)))) - : +- Subquery subquery3290 - : +- *(8) HashAggregate(keys=[], functions=[avg(netpaid#5)]) - : +- Exchange SinglePartition - : +- *(7) HashAggregate(keys=[], functions=[partial_avg(netpaid#5)]) - : +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) - : +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) - : +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] - : +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight - : :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] - : : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - : : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] - : : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight - : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] - : : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight - : : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] - : : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) - : : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] - : : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : : +- *(3) Filter isnotnull(i_item_sk#22) - : : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) - : +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] - : +- *(5) Filter isnotnull(ca_zip#18) - : +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct - +- *(8) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[sum(netpaid#5)]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, 5) - +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3], functions=[partial_sum(netpaid#5)]) - +- *(7) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[sum(UnscaledValue(ss_net_paid#14))]) - +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13, 5) - +- *(6) HashAggregate(keys=[c_last_name#1, c_first_name#2, s_store_name#3, ca_state#7, s_state#8, i_color#9, i_current_price#10, i_manager_id#11, i_units#12, i_size#13], functions=[partial_sum(UnscaledValue(ss_net_paid#14))]) - +- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, ca_state#7] - +- *(6) BroadcastHashJoin [c_birth_country#15, s_zip#16], [upper(ca_country#17), ca_zip#18], Inner, BuildRight - :- *(6) Project [ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11, c_first_name#2, c_last_name#1, c_birth_country#15] - : +- *(6) BroadcastHashJoin [ss_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - : :- *(6) Project [ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : +- *(6) BroadcastHashJoin [ss_item_sk#21], [i_item_sk#22], Inner, BuildRight - : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_net_paid#14, s_store_name#3, s_state#8, s_zip#16] - : : : +- *(6) BroadcastHashJoin [ss_store_sk#23], [s_store_sk#24], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_net_paid#14] - : : : : +- *(6) BroadcastHashJoin [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#21 as bigint)], [sr_ticket_number#26, sr_item_sk#27], Inner, BuildRight - : : : : :- *(6) Project [ss_item_sk#21, ss_customer_sk#19, ss_store_sk#23, ss_ticket_number#25, ss_net_paid#14] - : : : : : +- *(6) Filter (((isnotnull(ss_ticket_number#25) && isnotnull(ss_item_sk#21)) && isnotnull(ss_store_sk#23)) && isnotnull(ss_customer_sk#19)) - : : : : : +- *(6) FileScan parquet default.store_sales[ss_item_sk#21,ss_customer_sk#19,ss_store_sk#23,ss_ticket_number#25,ss_net_paid#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [s_store_sk#24, s_store_name#3, s_state#8, s_zip#16] - : : : +- *(2) Filter (((isnotnull(s_market_id#28) && (s_market_id#28 = 8)) && isnotnull(s_store_sk#24)) && isnotnull(s_zip#16)) - : : : +- *(2) FileScan parquet default.store[s_store_sk#24,s_store_name#3,s_market_id#28,s_state#8,s_zip#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [i_item_sk#22, i_current_price#10, i_size#13, i_color#9, i_units#12, i_manager_id#11] - : : +- *(3) Filter ((isnotnull(i_color#9) && (i_color#9 = chiffon)) && isnotnull(i_item_sk#22)) - : : +- *(3) FileScan parquet default.item[i_item_sk#22,i_current_price#10,i_size#13,i_color#9,i_units#12,i_manager_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon), IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(upper(input[2, string, true]), input[1, string, true])) - +- *(5) Project [ca_state#7, ca_zip#18, ca_country#17] - +- *(5) Filter isnotnull(ca_zip#18) - +- *(5) FileScan parquet default.customer_address[ca_state#7,ca_zip#18,ca_country#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_zip)], ReadSchema: struct \ No newline at end of file +* Project (42) ++- * Filter (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.store_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_returns (4) + : : : +- BroadcastExchange (14) + : : : +- * Project (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.store (10) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.item (17) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.customer (23) + +- BroadcastExchange (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.customer_address (29) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(3) Filter [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.store_returns +Output [2]: [sr_item_sk#6, sr_ticket_number#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] + +(6) Filter [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) + +(7) BroadcastExchange +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] +Join condition: None + +(9) Project [codegen id : 6] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] + +(10) Scan parquet default.store +Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(12) Filter [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) + +(13) Project [codegen id : 2] +Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(14) BroadcastExchange +Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(16) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] + +(17) Scan parquet default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon), IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(19) Filter [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = chiffon)) AND isnotnull(i_item_sk#15)) + +(20) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(22) Project [codegen id : 6] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(23) Scan parquet default.customer +Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(25) Filter [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) + +(26) BroadcastExchange +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(28) Project [codegen id : 6] +Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(29) Scan parquet default.customer_address +Output [3]: [ca_state#27, ca_zip#28, ca_country#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] + +(31) Filter [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Condition : (isnotnull(ca_country#29) AND isnotnull(ca_zip#28)) + +(32) BroadcastExchange +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#30] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [c_birth_country#25, s_zip#13] +Right keys [2]: [upper(ca_country#29), ca_zip#28] +Join condition: None + +(34) Project [codegen id : 6] +Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] + +(35) HashAggregate [codegen id : 6] +Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#31] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] + +(36) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#33] + +(37) HashAggregate [codegen id : 7] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#32] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#34] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#10, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#34,17,2) AS netpaid#35] + +(38) HashAggregate [codegen id : 7] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#10, netpaid#35] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] +Functions [1]: [partial_sum(netpaid#35)] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] + +(39) Exchange +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, 5), true, [id=#40] + +(40) HashAggregate [codegen id : 8] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum#38, isEmpty#39] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#10] +Functions [1]: [sum(netpaid#35)] +Aggregate Attributes [1]: [sum(netpaid#35)#41] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#10, sum(netpaid#35)#41 AS paid#42, sum(netpaid#35)#41 AS sum(netpaid#35)#43] + +(41) Filter [codegen id : 8] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] +Condition : (isnotnull(sum(netpaid#35)#43) AND (cast(sum(netpaid#35)#43 as decimal(33,8)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(33,8)))) + +(42) Project [codegen id : 8] +Output [4]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#10, paid#42, sum(netpaid#35)#43] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 41 Hosting Expression = Subquery scalar-subquery#44, [id=#45] +* HashAggregate (82) ++- Exchange (81) + +- * HashAggregate (80) + +- * HashAggregate (79) + +- Exchange (78) + +- * HashAggregate (77) + +- * Project (76) + +- * BroadcastHashJoin Inner BuildRight (75) + :- * Project (70) + : +- * BroadcastHashJoin Inner BuildRight (69) + : :- * Project (64) + : : +- * BroadcastHashJoin Inner BuildRight (63) + : : :- * Project (58) + : : : +- * BroadcastHashJoin Inner BuildRight (57) + : : : :- * Project (51) + : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : :- * Filter (45) + : : : : : +- * ColumnarToRow (44) + : : : : : +- Scan parquet default.store_sales (43) + : : : : +- BroadcastExchange (49) + : : : : +- * Filter (48) + : : : : +- * ColumnarToRow (47) + : : : : +- Scan parquet default.store_returns (46) + : : : +- BroadcastExchange (56) + : : : +- * Project (55) + : : : +- * Filter (54) + : : : +- * ColumnarToRow (53) + : : : +- Scan parquet default.store (52) + : : +- BroadcastExchange (62) + : : +- * Filter (61) + : : +- * ColumnarToRow (60) + : : +- Scan parquet default.item (59) + : +- BroadcastExchange (68) + : +- * Filter (67) + : +- * ColumnarToRow (66) + : +- Scan parquet default.customer (65) + +- BroadcastExchange (74) + +- * Filter (73) + +- * ColumnarToRow (72) + +- Scan parquet default.customer_address (71) + + +(43) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] + +(45) Filter [codegen id : 6] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] +Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(46) Scan parquet default.store_returns +Output [2]: [sr_item_sk#6, sr_ticket_number#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] + +(48) Filter [codegen id : 1] +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Condition : (isnotnull(sr_ticket_number#7) AND isnotnull(sr_item_sk#6)) + +(49) BroadcastExchange +Input [2]: [sr_item_sk#6, sr_ticket_number#7] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#46] + +(50) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint)] +Right keys [2]: [sr_ticket_number#7, sr_item_sk#6] +Join condition: None + +(51) Project [codegen id : 6] +Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#6, sr_ticket_number#7] + +(52) Scan parquet default.store +Output [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(54) Filter [codegen id : 2] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] +Condition : (((isnotnull(s_market_id#11) AND (s_market_id#11 = 8)) AND isnotnull(s_store_sk#9)) AND isnotnull(s_zip#13)) + +(55) Project [codegen id : 2] +Output [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Input [5]: [s_store_sk#9, s_store_name#10, s_market_id#11, s_state#12, s_zip#13] + +(56) BroadcastExchange +Input [4]: [s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47] + +(57) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#9] +Join condition: None + +(58) Project [codegen id : 6] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#9, s_store_name#10, s_state#12, s_zip#13] + +(59) Scan parquet default.item +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(61) Filter [codegen id : 3] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) + +(62) BroadcastExchange +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] + +(63) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#1] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(64) Project [codegen id : 6] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] + +(65) Scan parquet default.customer +Output [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(67) Filter [codegen id : 4] +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_birth_country#25)) + +(68) BroadcastExchange +Input [4]: [c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#49] + +(69) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#22] +Join condition: None + +(70) Project [codegen id : 6] +Output [12]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] + +(71) Scan parquet default.customer_address +Output [3]: [ca_state#27, ca_zip#28, ca_country#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] + +(73) Filter [codegen id : 5] +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Condition : (isnotnull(ca_country#29) AND isnotnull(ca_zip#28)) + +(74) BroadcastExchange +Input [3]: [ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#50] + +(75) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [c_birth_country#25, s_zip#13] +Right keys [2]: [upper(ca_country#29), ca_zip#28] +Join condition: None + +(76) Project [codegen id : 6] +Output [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [15]: [ss_net_paid#5, s_store_name#10, s_state#12, s_zip#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, c_birth_country#25, ca_state#27, ca_zip#28, ca_country#29] + +(77) HashAggregate [codegen id : 6] +Input [11]: [ss_net_paid#5, s_store_name#10, s_state#12, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum#51] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] + +(78) Exchange +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), true, [id=#53] + +(79) HashAggregate [codegen id : 7] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#52] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#10, ca_state#27, s_state#12, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] +Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#54] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#54,17,2) AS netpaid#35] + +(80) HashAggregate [codegen id : 7] +Input [1]: [netpaid#35] +Keys: [] +Functions [1]: [partial_avg(netpaid#35)] +Aggregate Attributes [2]: [sum#55, count#56] +Results [2]: [sum#57, count#58] + +(81) Exchange +Input [2]: [sum#57, count#58] +Arguments: SinglePartition, true, [id=#59] + +(82) HashAggregate [codegen id : 8] +Input [2]: [sum#57, count#58] +Keys: [] +Functions [1]: [avg(netpaid#35)] +Aggregate Attributes [1]: [avg(netpaid#35)#60] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#35)#60)), DecimalType(24,8), true) AS (CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6)))#61] + + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt index d8bda2e69..58b57dfaa 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q24b/simplified.txt @@ -1,111 +1,125 @@ -WholeStageCodegen - Project [c_first_name,c_last_name,paid,s_store_name] +WholeStageCodegen (8) + Project [c_last_name,c_first_name,s_store_name,paid] Filter [sum(netpaid)] Subquery #1 - WholeStageCodegen - HashAggregate [avg(netpaid),count,sum] [(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),avg(netpaid),count,sum] + WholeStageCodegen (8) + HashAggregate [sum,count] [avg(netpaid),(CAST(0.05 AS DECIMAL(21,6)) * CAST(avg(netpaid) AS DECIMAL(21,6))),sum,count] InputAdapter Exchange #8 - WholeStageCodegen - HashAggregate [count,count,netpaid,sum,sum] [count,count,sum,sum] - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + WholeStageCodegen (7) + HashAggregate [netpaid] [sum,count,sum,count] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #9 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] - Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] - BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] - Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #9 + WholeStageCodegen (6) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + BroadcastExchange #10 + WholeStageCodegen (1) + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [s_state,s_store_name,s_store_sk,s_zip] + BroadcastExchange #11 + WholeStageCodegen (2) + Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - Filter [i_item_sk] - Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + BroadcastExchange #12 + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] - Filter [c_birth_country,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] + BroadcastExchange #13 + WholeStageCodegen (4) + Filter [c_customer_sk,c_birth_country] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [ca_country,ca_state,ca_zip] - Filter [ca_zip] - Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] - HashAggregate [c_first_name,c_last_name,s_store_name,sum,sum(netpaid)] [paid,sum,sum(netpaid),sum(netpaid)] + BroadcastExchange #14 + WholeStageCodegen (5) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] + HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum(netpaid),sum,isEmpty] InputAdapter - Exchange [c_first_name,c_last_name,s_store_name] #1 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,netpaid,s_store_name,sum,sum] [sum,sum] - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,sum,sum(UnscaledValue(ss_net_paid))] [netpaid,sum,sum(UnscaledValue(ss_net_paid))] + Exchange [c_last_name,c_first_name,s_store_name] #1 + WholeStageCodegen (7) + HashAggregate [c_last_name,c_first_name,s_store_name,netpaid] [sum,isEmpty,sum,isEmpty] + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,sum] [sum(UnscaledValue(ss_net_paid)),netpaid,sum] InputAdapter - Exchange [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name] #2 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid,sum,sum] [sum,sum] - Project [c_first_name,c_last_name,ca_state,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,ss_net_paid] - BroadcastHashJoin [c_birth_country,ca_country,ca_zip,s_zip] - Project [c_birth_country,c_first_name,c_last_name,i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_net_paid] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [i_color,i_current_price,i_manager_id,i_size,i_units,s_state,s_store_name,s_zip,ss_customer_sk,ss_net_paid] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_state,s_store_name,s_zip,ss_customer_sk,ss_item_sk,ss_net_paid] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_paid,ss_store_sk,ss_ticket_number] + Exchange [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size] #2 + WholeStageCodegen (6) + HashAggregate [c_last_name,c_first_name,s_store_name,ca_state,s_state,i_color,i_current_price,i_manager_id,i_units,i_size,ss_net_paid] [sum,sum] + Project [ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state] + BroadcastHashJoin [c_birth_country,s_zip,ca_country,ca_zip] + Project [ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_customer_sk,ss_net_paid,s_store_name,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_net_paid] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + WholeStageCodegen (1) + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [s_state,s_store_name,s_store_sk,s_zip] + WholeStageCodegen (2) + Project [s_store_sk,s_store_name,s_state,s_zip] Filter [s_market_id,s_store_sk,s_zip] - Scan parquet default.store [s_market_id,s_state,s_store_name,s_store_sk,s_zip] [s_market_id,s_state,s_store_name,s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] - Filter [i_color,i_item_sk] - Scan parquet default.item [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] [i_color,i_current_price,i_item_sk,i_manager_id,i_size,i_units] + WholeStageCodegen (3) + Filter [i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [c_birth_country,c_customer_sk,c_first_name,c_last_name] - Filter [c_birth_country,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_sk,c_first_name,c_last_name] [c_birth_country,c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen (4) + Filter [c_customer_sk,c_birth_country] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [ca_country,ca_state,ca_zip] - Filter [ca_zip] - Scan parquet default.customer_address [ca_country,ca_state,ca_zip] [ca_country,ca_state,ca_zip] + WholeStageCodegen (5) + Filter [ca_country,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_state,ca_zip,ca_country] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt index 1a7d675da..6bdd709a7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/explain.txt @@ -1,50 +1,269 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_store_id#3 ASC NULLS FIRST,s_store_name#4 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_store_id#3,s_store_name#4,store_sales_profit#5,store_returns_loss#6,catalog_sales_profit#7]) -+- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[sum(UnscaledValue(ss_net_profit#8)), sum(UnscaledValue(sr_net_loss#9)), sum(UnscaledValue(cs_net_profit#10))]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4, 5) - +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[partial_sum(UnscaledValue(ss_net_profit#8)), partial_sum(UnscaledValue(sr_net_loss#9)), partial_sum(UnscaledValue(cs_net_profit#10))]) - +- *(8) Project [ss_net_profit#8, sr_net_loss#9, cs_net_profit#10, s_store_id#3, s_store_name#4, i_item_id#1, i_item_desc#2] - +- *(8) BroadcastHashJoin [ss_item_sk#11], [i_item_sk#12], Inner, BuildRight - :- *(8) Project [ss_item_sk#11, ss_net_profit#8, sr_net_loss#9, cs_net_profit#10, s_store_id#3, s_store_name#4] - : +- *(8) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight - : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_net_loss#9, cs_net_profit#10] - : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#15], [d_date_sk#16], Inner, BuildRight - : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] - : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#17], [cast(d_date_sk#18 as bigint)], Inner, BuildRight - : : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] - : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_net_loss#9, cs_sold_date_sk#15, cs_net_profit#10] - : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#21, sr_item_sk#22], [cast(cs_bill_customer_sk#23 as bigint), cast(cs_item_sk#24 as bigint)], Inner, BuildRight - : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_net_profit#8, sr_returned_date_sk#17, sr_item_sk#22, sr_customer_sk#21, sr_net_loss#9] - : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#25 as bigint), cast(ss_item_sk#11 as bigint), cast(ss_ticket_number#26 as bigint)], [sr_customer_sk#21, sr_item_sk#22, sr_ticket_number#27], Inner, BuildRight - : : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_customer_sk#25, ss_store_sk#13, ss_ticket_number#26, ss_net_profit#8] - : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#26) && isnotnull(ss_item_sk#11)) && isnotnull(ss_customer_sk#25)) && isnotnull(ss_sold_date_sk#19)) && isnotnull(ss_store_sk#13)) - : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_net_profit#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(3) Project [d_date_sk#20] - : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 4)) && (d_year#29 = 2001)) && isnotnull(d_date_sk#20)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [d_date_sk#18] - : : : +- *(4) Filter (((((isnotnull(d_year#30) && isnotnull(d_moy#31)) && (d_moy#31 >= 4)) && (d_moy#31 <= 10)) && (d_year#30 = 2001)) && isnotnull(d_date_sk#18)) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#30,d_moy#31] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [d_date_sk#16] - : : +- *(5) Filter (((((isnotnull(d_year#32) && isnotnull(d_moy#33)) && (d_moy#33 >= 4)) && (d_moy#33 <= 10)) && (d_year#32 = 2001)) && isnotnull(d_date_sk#16)) - : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32,d_moy#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), Equ..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] - : +- *(6) Filter isnotnull(s_store_sk#14) - : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] - +- *(7) Filter isnotnull(i_item_sk#12) - +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (48) ++- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * Project (44) + +- * BroadcastHashJoin Inner BuildRight (43) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet default.date_dim (23) + : : +- ReusedExchange (30) + : +- BroadcastExchange (36) + : +- * Filter (35) + : +- * ColumnarToRow (34) + : +- Scan parquet default.store (33) + +- BroadcastExchange (42) + +- * Filter (41) + +- * ColumnarToRow (40) + +- Scan parquet default.item (39) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6] +Condition : ((((isnotnull(ss_customer_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) + +(4) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] +Condition : (((isnotnull(sr_customer_sk#9) AND isnotnull(sr_item_sk#8)) AND isnotnull(sr_ticket_number#10)) AND isnotnull(sr_returned_date_sk#7)) + +(7) BroadcastExchange +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] +Arguments: HashedRelationBroadcastMode(List(input[2, bigint, false], input[1, bigint, false], input[3, bigint, false]),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10] +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_net_loss#11] +Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_net_profit#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11] + +(10) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] + +(12) Filter [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] +Condition : ((isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) AND isnotnull(cs_sold_date_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#17] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] +Right keys [2]: [cast(cs_bill_customer_sk#14 as bigint), cast(cs_item_sk#15 as bigint)] +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_net_loss#11, cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#18, d_year#19, d_moy#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] +Condition : ((((isnotnull(d_moy#20) AND isnotnull(d_year#19)) AND (d_moy#20 = 4)) AND (d_year#19 = 2001)) AND isnotnull(d_date_sk#18)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#18] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] + +(20) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16, d_date_sk#18] + +(23) Scan parquet default.date_dim +Output [3]: [d_date_sk#22, d_year#23, d_moy#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(25) Filter [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Condition : (((((isnotnull(d_moy#24) AND isnotnull(d_year#23)) AND (d_moy#24 >= 4)) AND (d_moy#24 <= 10)) AND (d_year#23 = 2001)) AND isnotnull(d_date_sk#22)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#22] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(27) BroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#7] +Right keys [1]: [cast(d_date_sk#22 as bigint)] +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_returned_date_sk#7, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16, d_date_sk#22] + +(30) ReusedExchange [Reuses operator id: 27] +Output [1]: [d_date_sk#26] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#26] +Join condition: None + +(32) Project [codegen id : 8] +Output [5]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_sold_date_sk#13, cs_net_profit#16, d_date_sk#26] + +(33) Scan parquet default.store +Output [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 6] +Input [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] + +(35) Filter [codegen id : 6] +Input [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] +Condition : isnotnull(s_store_sk#27) + +(36) BroadcastExchange +Input [3]: [s_store_sk#27, s_store_id#28, s_store_name#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#30] + +(37) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#27] +Join condition: None + +(38) Project [codegen id : 8] +Output [6]: [ss_item_sk#2, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_sk#27, s_store_id#28, s_store_name#29] + +(39) Scan parquet default.item +Output [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] + +(41) Filter [codegen id : 7] +Input [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] +Condition : isnotnull(i_item_sk#31) + +(42) BroadcastExchange +Input [3]: [i_item_sk#31, i_item_id#32, i_item_desc#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34] + +(43) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#31] +Join condition: None + +(44) Project [codegen id : 8] +Output [7]: [ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29, i_item_id#32, i_item_desc#33] +Input [9]: [ss_item_sk#2, ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29, i_item_sk#31, i_item_id#32, i_item_desc#33] + +(45) HashAggregate [codegen id : 8] +Input [7]: [ss_net_profit#6, sr_net_loss#11, cs_net_profit#16, s_store_id#28, s_store_name#29, i_item_id#32, i_item_desc#33] +Keys [4]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#11)), partial_sum(UnscaledValue(cs_net_profit#16))] +Aggregate Attributes [3]: [sum#35, sum#36, sum#37] +Results [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, sum#38, sum#39, sum#40] + +(46) Exchange +Input [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, sum#38, sum#39, sum#40] +Arguments: hashpartitioning(i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, 5), true, [id=#41] + +(47) HashAggregate [codegen id : 9] +Input [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, sum#38, sum#39, sum#40] +Keys [4]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29] +Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#11)), sum(UnscaledValue(cs_net_profit#16))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#42, sum(UnscaledValue(sr_net_loss#11))#43, sum(UnscaledValue(cs_net_profit#16))#44] +Results [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#42,17,2) AS store_sales_profit#45, MakeDecimal(sum(UnscaledValue(sr_net_loss#11))#43,17,2) AS store_returns_loss#46, MakeDecimal(sum(UnscaledValue(cs_net_profit#16))#44,17,2) AS catalog_sales_profit#47] + +(48) TakeOrderedAndProject +Input [7]: [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, store_sales_profit#45, store_returns_loss#46, catalog_sales_profit#47] +Arguments: 100, [i_item_id#32 ASC NULLS FIRST, i_item_desc#33 ASC NULLS FIRST, s_store_id#28 ASC NULLS FIRST, s_store_name#29 ASC NULLS FIRST], [i_item_id#32, i_item_desc#33, s_store_id#28, s_store_name#29, store_sales_profit#45, store_returns_loss#46, catalog_sales_profit#47] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt index df34b61ae..4a40bdaff 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q25/simplified.txt @@ -1,66 +1,71 @@ -TakeOrderedAndProject [catalog_sales_profit,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_loss,store_sales_profit] - WholeStageCodegen - HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit))] [catalog_sales_profit,store_returns_loss,store_sales_profit,sum,sum,sum,sum(UnscaledValue(cs_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(ss_net_profit))] +TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_profit,store_returns_loss,catalog_sales_profit] + WholeStageCodegen (9) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(cs_net_profit)),store_sales_profit,store_returns_loss,catalog_sales_profit,sum,sum,sum] InputAdapter - Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 - WholeStageCodegen - HashAggregate [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [cs_net_profit,i_item_desc,i_item_id,s_store_id,s_store_name,sr_net_loss,ss_net_profit] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [cs_net_profit,s_store_id,s_store_name,sr_net_loss,ss_item_sk,ss_net_profit] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [cs_net_profit,sr_net_loss,ss_item_sk,ss_net_profit,ss_store_sk] + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen (8) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_net_profit,sr_net_loss,cs_net_profit] [sum,sum,sum,sum,sum,sum] + Project [ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_net_profit] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,ss_item_sk,ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [cs_net_profit,cs_sold_date_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] - Project [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_net_loss,cs_sold_date_sk,cs_net_profit] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_net_profit,sr_returned_date_sk,sr_net_loss,cs_sold_date_sk,cs_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_net_profit,sr_returned_date_sk,sr_net_loss,cs_sold_date_sk,cs_net_profit] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_net_profit,sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_net_loss] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] - Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_net_loss,sr_returned_date_sk,sr_ticket_number] + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk] + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + ReusedExchange [d_date_sk] #5 InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [s_store_id,s_store_name,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] + BroadcastExchange #6 + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id,s_store_name] InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [i_item_desc,i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] + BroadcastExchange #7 + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt index 447971a14..12e953427 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/explain.txt @@ -1,32 +1,193 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,agg1#2,agg2#3,agg3#4,agg4#5]) -+- *(6) HashAggregate(keys=[i_item_id#1], functions=[avg(cast(cs_quantity#6 as bigint)), avg(UnscaledValue(cs_list_price#7)), avg(UnscaledValue(cs_coupon_amt#8)), avg(UnscaledValue(cs_sales_price#9))]) - +- Exchange hashpartitioning(i_item_id#1, 5) - +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_avg(cast(cs_quantity#6 as bigint)), partial_avg(UnscaledValue(cs_list_price#7)), partial_avg(UnscaledValue(cs_coupon_amt#8)), partial_avg(UnscaledValue(cs_sales_price#9))]) - +- *(5) Project [cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8, i_item_id#1] - +- *(5) BroadcastHashJoin [cs_promo_sk#10], [p_promo_sk#11], Inner, BuildRight - :- *(5) Project [cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8, i_item_id#1] - : +- *(5) BroadcastHashJoin [cs_item_sk#12], [i_item_sk#13], Inner, BuildRight - : :- *(5) Project [cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] - : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight - : : :- *(5) Project [cs_sold_date_sk#14, cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] - : : : +- *(5) BroadcastHashJoin [cs_bill_cdemo_sk#16], [cd_demo_sk#17], Inner, BuildRight - : : : :- *(5) Project [cs_sold_date_sk#14, cs_bill_cdemo_sk#16, cs_item_sk#12, cs_promo_sk#10, cs_quantity#6, cs_list_price#7, cs_sales_price#9, cs_coupon_amt#8] - : : : : +- *(5) Filter (((isnotnull(cs_bill_cdemo_sk#16) && isnotnull(cs_sold_date_sk#14)) && isnotnull(cs_item_sk#12)) && isnotnull(cs_promo_sk#10)) - : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#14,cs_bill_cdemo_sk#16,cs_item_sk#12,cs_promo_sk#10,cs_quantity#6,cs_list_price#7,cs_sales_price#9,cs_coupon_amt#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_pro..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_date_sk#15] - : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [i_item_sk#13, i_item_id#1] - : +- *(3) Filter isnotnull(i_item_sk#13) - : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [p_promo_sk#11] - +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) - +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.item (18) + +- BroadcastExchange (28) + +- * Project (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet default.promotion (24) + + +(1) Scan parquet default.catalog_sales +Output [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Condition : (((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_item_sk#3)) AND isnotnull(cs_promo_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_bill_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [cs_sold_date_sk#1, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Input [9]: [cs_sold_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2000)) AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8] +Input [8]: [cs_sold_date_sk#1, cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, d_date_sk#14] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_id#18] + +(20) Filter [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(21) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18] +Input [8]: [cs_item_sk#3, cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_sk#17, i_item_id#18] + +(24) Scan parquet default.promotion +Output [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] + +(26) Filter [codegen id : 4] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] +Condition : (((p_channel_email#21 = N) OR (p_channel_event#22 = N)) AND isnotnull(p_promo_sk#20)) + +(27) Project [codegen id : 4] +Output [1]: [p_promo_sk#20] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] + +(28) BroadcastExchange +Input [1]: [p_promo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_promo_sk#4] +Right keys [1]: [p_promo_sk#20] +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18] +Input [7]: [cs_promo_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18, p_promo_sk#20] + +(31) HashAggregate [codegen id : 5] +Input [5]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, i_item_id#18] +Keys [1]: [i_item_id#18] +Functions [4]: [partial_avg(cast(cs_quantity#5 as bigint)), partial_avg(UnscaledValue(cs_list_price#6)), partial_avg(UnscaledValue(cs_coupon_amt#8)), partial_avg(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31] +Results [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] + +(32) Exchange +Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Arguments: hashpartitioning(i_item_id#18, 5), true, [id=#40] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Keys [1]: [i_item_id#18] +Functions [4]: [avg(cast(cs_quantity#5 as bigint)), avg(UnscaledValue(cs_list_price#6)), avg(UnscaledValue(cs_coupon_amt#8)), avg(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [4]: [avg(cast(cs_quantity#5 as bigint))#41, avg(UnscaledValue(cs_list_price#6))#42, avg(UnscaledValue(cs_coupon_amt#8))#43, avg(UnscaledValue(cs_sales_price#7))#44] +Results [5]: [i_item_id#18, avg(cast(cs_quantity#5 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(cs_list_price#6))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(cs_coupon_amt#8))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(cs_sales_price#7))#44 / 100.0) as decimal(11,6)) AS agg4#48] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] +Arguments: 100, [i_item_id#18 ASC NULLS FIRST], [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt index bfb96b05c..94ce760c2 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q26/simplified.txt @@ -1,42 +1,50 @@ -TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] - WholeStageCodegen - HashAggregate [avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_sales_price)),avg(cast(cs_quantity as bigint)),count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_sales_price)),avg(cast(cs_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] +TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + WholeStageCodegen (6) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(cast(cs_quantity as bigint)),avg(UnscaledValue(cs_list_price)),avg(UnscaledValue(cs_coupon_amt)),avg(UnscaledValue(cs_sales_price)),agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [count,count,count,count,count,count,count,count,cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] - Project [cs_coupon_amt,cs_list_price,cs_quantity,cs_sales_price,i_item_id] + WholeStageCodegen (5) + HashAggregate [i_item_id,cs_quantity,cs_list_price,cs_coupon_amt,cs_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_coupon_amt,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,i_item_id] + Project [cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price] + Project [cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] - Project [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] - Filter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_coupon_amt,cs_item_sk,cs_list_price,cs_promo_sk,cs_quantity,cs_sales_price,cs_sold_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Filter [cs_bill_cdemo_sk,cs_sold_date_sk,cs_item_sk,cs_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [cd_demo_sk] - Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) Project [p_promo_sk] Filter [p_channel_email,p_channel_event,p_promo_sk] - Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt index 62dd35f34..16aeff7a2 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/explain.txt @@ -1,33 +1,193 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,s_state#2 ASC NULLS FIRST], output=[i_item_id#1,s_state#2,g_state#3,agg1#4,agg2#5,agg3#6,agg4#7]) -+- *(6) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[avg(cast(ss_quantity#9 as bigint)), avg(UnscaledValue(ss_list_price#10)), avg(UnscaledValue(ss_coupon_amt#11)), avg(UnscaledValue(ss_sales_price#12))]) - +- Exchange hashpartitioning(i_item_id#1, s_state#2, spark_grouping_id#8, 5) - +- *(5) HashAggregate(keys=[i_item_id#1, s_state#2, spark_grouping_id#8], functions=[partial_avg(cast(ss_quantity#9 as bigint)), partial_avg(UnscaledValue(ss_list_price#10)), partial_avg(UnscaledValue(ss_coupon_amt#11)), partial_avg(UnscaledValue(ss_sales_price#12))]) - +- *(5) Expand [List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, s_state#14, 0), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#13, null, 1), List(ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, null, null, 3)], [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#1, s_state#2, spark_grouping_id#8] - +- *(5) Project [ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, i_item_id#15 AS i_item_id#13, s_state#16 AS s_state#14] - +- *(5) BroadcastHashJoin [ss_item_sk#17], [i_item_sk#18], Inner, BuildRight - :- *(5) Project [ss_item_sk#17, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11, s_state#16] - : +- *(5) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#20], Inner, BuildRight - : :- *(5) Project [ss_item_sk#17, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] - : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight - : : :- *(5) Project [ss_sold_date_sk#21, ss_item_sk#17, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] - : : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#23], [cd_demo_sk#24], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#21, ss_item_sk#17, ss_cdemo_sk#23, ss_store_sk#19, ss_quantity#9, ss_list_price#10, ss_sales_price#12, ss_coupon_amt#11] - : : : : +- *(5) Filter (((isnotnull(ss_cdemo_sk#23) && isnotnull(ss_sold_date_sk#21)) && isnotnull(ss_store_sk#19)) && isnotnull(ss_item_sk#17)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_item_sk#17,ss_cdemo_sk#23,ss_store_sk#19,ss_quantity#9,ss_list_price#10,ss_sales_price#12,ss_coupon_amt#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_date_sk#22] - : : +- *(2) Filter ((isnotnull(d_year#28) && (d_year#28 = 2002)) && isnotnull(d_date_sk#22)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#22,d_year#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [s_store_sk#20, s_state#16] - : +- *(3) Filter ((isnotnull(s_state#16) && (s_state#16 = TN)) && isnotnull(s_store_sk#20)) - : +- *(3) FileScan parquet default.store[s_store_sk#20,s_state#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [i_item_sk#18, i_item_id#15] - +- *(4) Filter isnotnull(i_item_sk#18) - +- *(4) FileScan parquet default.item[i_item_sk#18,i_item_id#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Expand (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.store (18) + +- BroadcastExchange (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet default.item (24) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#17, s_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#17, s_state#18] +Condition : ((isnotnull(s_state#18) AND (s_state#18 = TN)) AND isnotnull(s_store_sk#17)) + +(21) BroadcastExchange +Input [2]: [s_store_sk#17, s_state#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17, s_state#18] + +(24) Scan parquet default.item +Output [2]: [i_item_sk#20, i_item_id#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] + +(26) Filter [codegen id : 4] +Input [2]: [i_item_sk#20, i_item_id#21] +Condition : isnotnull(i_item_sk#20) + +(27) BroadcastExchange +Input [2]: [i_item_sk#20, i_item_id#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#20] +Join condition: None + +(29) Project [codegen id : 5] +Output [6]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18] +Input [8]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18, i_item_sk#20, i_item_id#21] + +(30) Expand [codegen id : 5] +Input [6]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18] +Arguments: [List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, s_state#18, 0), List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#21, null, 1), List(ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, null, null, 3)], [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#23, s_state#24, spark_grouping_id#25] + +(31) HashAggregate [codegen id : 5] +Input [7]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#23, s_state#24, spark_grouping_id#25] +Keys [3]: [i_item_id#23, s_state#24, spark_grouping_id#25] +Functions [4]: [partial_avg(cast(ss_quantity#5 as bigint)), partial_avg(UnscaledValue(ss_list_price#6)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [8]: [sum#26, count#27, sum#28, count#29, sum#30, count#31, sum#32, count#33] +Results [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] + +(32) Exchange +Input [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] +Arguments: hashpartitioning(i_item_id#23, s_state#24, spark_grouping_id#25, 5), true, [id=#42] + +(33) HashAggregate [codegen id : 6] +Input [11]: [i_item_id#23, s_state#24, spark_grouping_id#25, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] +Keys [3]: [i_item_id#23, s_state#24, spark_grouping_id#25] +Functions [4]: [avg(cast(ss_quantity#5 as bigint)), avg(UnscaledValue(ss_list_price#6)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#5 as bigint))#43, avg(UnscaledValue(ss_list_price#6))#44, avg(UnscaledValue(ss_coupon_amt#8))#45, avg(UnscaledValue(ss_sales_price#7))#46] +Results [7]: [i_item_id#23, s_state#24, cast((shiftright(spark_grouping_id#25, 0) & 1) as tinyint) AS g_state#47, avg(cast(ss_quantity#5 as bigint))#43 AS agg1#48, cast((avg(UnscaledValue(ss_list_price#6))#44 / 100.0) as decimal(11,6)) AS agg2#49, cast((avg(UnscaledValue(ss_coupon_amt#8))#45 / 100.0) as decimal(11,6)) AS agg3#50, cast((avg(UnscaledValue(ss_sales_price#7))#46 / 100.0) as decimal(11,6)) AS agg4#51] + +(34) TakeOrderedAndProject +Input [7]: [i_item_id#23, s_state#24, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] +Arguments: 100, [i_item_id#23 ASC NULLS FIRST, s_state#24 ASC NULLS FIRST], [i_item_id#23, s_state#24, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt index e3a2d7ca1..204094d44 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q27/simplified.txt @@ -1,43 +1,50 @@ -TakeOrderedAndProject [agg1,agg2,agg3,agg4,g_state,i_item_id,s_state] - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,i_item_id,s_state,spark_grouping_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,g_state,sum,sum,sum,sum] +TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4] + WholeStageCodegen (6) + HashAggregate [i_item_id,s_state,spark_grouping_id,sum,count,sum,count,sum,count,sum,count] [avg(cast(ss_quantity as bigint)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] InputAdapter Exchange [i_item_id,s_state,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [count,count,count,count,count,count,count,count,i_item_id,s_state,spark_grouping_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] - Expand [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] - Project [i_item_id,s_state,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_state,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] - Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_cdemo_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (5) + HashAggregate [i_item_id,s_state,spark_grouping_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Expand [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state] + Project [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id,s_state] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_sold_date_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [cd_demo_sk] - Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [s_state,s_store_sk] - Filter [s_state,s_store_sk] - Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] + WholeStageCodegen (3) + Filter [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_state] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt index 698feb11f..9788040bb 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/explain.txt @@ -1,66 +1,437 @@ == Physical Plan == -CollectLimit 100 -+- BroadcastNestedLoopJoin BuildRight, Inner - :- BroadcastNestedLoopJoin BuildRight, Inner - : :- BroadcastNestedLoopJoin BuildRight, Inner - : : :- BroadcastNestedLoopJoin BuildRight, Inner - : : : :- BroadcastNestedLoopJoin BuildRight, Inner - : : : : :- *(3) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_list_price#1)), count(ss_list_price#1), count(distinct ss_list_price#1)]) - : : : : : +- Exchange SinglePartition - : : : : : +- *(2) HashAggregate(keys=[], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1), partial_count(distinct ss_list_price#1)]) - : : : : : +- *(2) HashAggregate(keys=[ss_list_price#1], functions=[merge_avg(UnscaledValue(ss_list_price#1)), merge_count(ss_list_price#1)]) - : : : : : +- Exchange hashpartitioning(ss_list_price#1, 5) - : : : : : +- *(1) HashAggregate(keys=[ss_list_price#1], functions=[partial_avg(UnscaledValue(ss_list_price#1)), partial_count(ss_list_price#1)]) - : : : : : +- *(1) Project [ss_list_price#1] - : : : : : +- *(1) Filter (((isnotnull(ss_quantity#2) && (ss_quantity#2 >= 0)) && (ss_quantity#2 <= 5)) && ((((ss_list_price#1 >= 8.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 18.00)) || ((ss_coupon_amt#3 >= 459.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1459.00))) || ((ss_wholesale_cost#4 >= 57.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 77.00)))) - : : : : : +- *(1) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)], ReadSchema: struct= 6)) && (ss_quantity#2 <= 10)) && ((((ss_list_price#1 >= 90.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 100.00)) || ((ss_coupon_amt#3 >= 2323.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 3323.00))) || ((ss_wholesale_cost#4 >= 31.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 51.00)))) - : : : : +- *(4) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)], ReadSchema: struct= 11)) && (ss_quantity#2 <= 15)) && ((((ss_list_price#1 >= 142.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 152.00)) || ((ss_coupon_amt#3 >= 12214.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 13214.00))) || ((ss_wholesale_cost#4 >= 79.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 99.00)))) - : : : +- *(7) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)], ReadSchema: struct= 16)) && (ss_quantity#2 <= 20)) && ((((ss_list_price#1 >= 135.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 145.00)) || ((ss_coupon_amt#3 >= 6071.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 7071.00))) || ((ss_wholesale_cost#4 >= 38.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 58.00)))) - : : +- *(10) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct= 21)) && (ss_quantity#2 <= 25)) && ((((ss_list_price#1 >= 122.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 132.00)) || ((ss_coupon_amt#3 >= 836.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 1836.00))) || ((ss_wholesale_cost#4 >= 17.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 37.00)))) - : +- *(13) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)], ReadSchema: struct= 26)) && (ss_quantity#2 <= 30)) && ((((ss_list_price#1 >= 154.00) && (cast(ss_list_price#1 as decimal(12,2)) <= 164.00)) || ((ss_coupon_amt#3 >= 7326.00) && (cast(ss_coupon_amt#3 as decimal(12,2)) <= 8326.00))) || ((ss_wholesale_cost#4 >= 7.00) && (cast(ss_wholesale_cost#4 as decimal(12,2)) <= 27.00)))) - +- *(16) FileScan parquet default.store_sales[ss_quantity#2,ss_wholesale_cost#4,ss_list_price#1,ss_coupon_amt#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 77.00)))) + +(4) Project [codegen id : 1] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(5) HashAggregate [codegen id : 1] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6] +Results [4]: [ss_list_price#3, sum#7, count#8, count#9] + +(6) Exchange +Input [4]: [ss_list_price#3, sum#7, count#8, count#9] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#10] + +(7) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#7, count#8, count#9] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6] +Results [4]: [ss_list_price#3, sum#7, count#8, count#9] + +(8) HashAggregate [codegen id : 2] +Input [4]: [ss_list_price#3, sum#7, count#8, count#9] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6, count(ss_list_price#3)#11] +Results [4]: [sum#7, count#8, count#9, count#12] + +(9) Exchange +Input [4]: [sum#7, count#8, count#9, count#12] +Arguments: SinglePartition, true, [id=#13] + +(10) HashAggregate [codegen id : 3] +Input [4]: [sum#7, count#8, count#9, count#12] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#5, count(ss_list_price#3)#6, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#5 / 100.0) as decimal(11,6)) AS B1_LP#14, count(ss_list_price#3)#6 AS B1_CNT#15, count(ss_list_price#3)#11 AS B1_CNTD#16] + +(11) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 4] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(13) Filter [codegen id : 4] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 51.00)))) + +(14) Project [codegen id : 4] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(15) HashAggregate [codegen id : 4] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18] +Results [4]: [ss_list_price#3, sum#19, count#20, count#21] + +(16) Exchange +Input [4]: [ss_list_price#3, sum#19, count#20, count#21] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#22] + +(17) HashAggregate [codegen id : 5] +Input [4]: [ss_list_price#3, sum#19, count#20, count#21] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18] +Results [4]: [ss_list_price#3, sum#19, count#20, count#21] + +(18) HashAggregate [codegen id : 5] +Input [4]: [ss_list_price#3, sum#19, count#20, count#21] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18, count(ss_list_price#3)#23] +Results [4]: [sum#19, count#20, count#21, count#24] + +(19) Exchange +Input [4]: [sum#19, count#20, count#21, count#24] +Arguments: SinglePartition, true, [id=#25] + +(20) HashAggregate [codegen id : 6] +Input [4]: [sum#19, count#20, count#21, count#24] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#17, count(ss_list_price#3)#18, count(ss_list_price#3)#23] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#17 / 100.0) as decimal(11,6)) AS B2_LP#26, count(ss_list_price#3)#18 AS B2_CNT#27, count(ss_list_price#3)#23 AS B2_CNTD#28] + +(21) BroadcastExchange +Input [3]: [B2_LP#26, B2_CNT#27, B2_CNTD#28] +Arguments: IdentityBroadcastMode, [id=#29] + +(22) BroadcastNestedLoopJoin +Join condition: None + +(23) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 7] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(25) Filter [codegen id : 7] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 99.00)))) + +(26) Project [codegen id : 7] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(27) HashAggregate [codegen id : 7] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31] +Results [4]: [ss_list_price#3, sum#32, count#33, count#34] + +(28) Exchange +Input [4]: [ss_list_price#3, sum#32, count#33, count#34] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#35] + +(29) HashAggregate [codegen id : 8] +Input [4]: [ss_list_price#3, sum#32, count#33, count#34] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31] +Results [4]: [ss_list_price#3, sum#32, count#33, count#34] + +(30) HashAggregate [codegen id : 8] +Input [4]: [ss_list_price#3, sum#32, count#33, count#34] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31, count(ss_list_price#3)#36] +Results [4]: [sum#32, count#33, count#34, count#37] + +(31) Exchange +Input [4]: [sum#32, count#33, count#34, count#37] +Arguments: SinglePartition, true, [id=#38] + +(32) HashAggregate [codegen id : 9] +Input [4]: [sum#32, count#33, count#34, count#37] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#30, count(ss_list_price#3)#31, count(ss_list_price#3)#36] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#30 / 100.0) as decimal(11,6)) AS B3_LP#39, count(ss_list_price#3)#31 AS B3_CNT#40, count(ss_list_price#3)#36 AS B3_CNTD#41] + +(33) BroadcastExchange +Input [3]: [B3_LP#39, B3_CNT#40, B3_CNTD#41] +Arguments: IdentityBroadcastMode, [id=#42] + +(34) BroadcastNestedLoopJoin +Join condition: None + +(35) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(37) Filter [codegen id : 10] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 58.00)))) + +(38) Project [codegen id : 10] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(39) HashAggregate [codegen id : 10] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44] +Results [4]: [ss_list_price#3, sum#45, count#46, count#47] + +(40) Exchange +Input [4]: [ss_list_price#3, sum#45, count#46, count#47] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#48] + +(41) HashAggregate [codegen id : 11] +Input [4]: [ss_list_price#3, sum#45, count#46, count#47] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44] +Results [4]: [ss_list_price#3, sum#45, count#46, count#47] + +(42) HashAggregate [codegen id : 11] +Input [4]: [ss_list_price#3, sum#45, count#46, count#47] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44, count(ss_list_price#3)#49] +Results [4]: [sum#45, count#46, count#47, count#50] + +(43) Exchange +Input [4]: [sum#45, count#46, count#47, count#50] +Arguments: SinglePartition, true, [id=#51] + +(44) HashAggregate [codegen id : 12] +Input [4]: [sum#45, count#46, count#47, count#50] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#43, count(ss_list_price#3)#44, count(ss_list_price#3)#49] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#43 / 100.0) as decimal(11,6)) AS B4_LP#52, count(ss_list_price#3)#44 AS B4_CNT#53, count(ss_list_price#3)#49 AS B4_CNTD#54] + +(45) BroadcastExchange +Input [3]: [B4_LP#52, B4_CNT#53, B4_CNTD#54] +Arguments: IdentityBroadcastMode, [id=#55] + +(46) BroadcastNestedLoopJoin +Join condition: None + +(47) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(49) Filter [codegen id : 13] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 37.00)))) + +(50) Project [codegen id : 13] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(51) HashAggregate [codegen id : 13] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57] +Results [4]: [ss_list_price#3, sum#58, count#59, count#60] + +(52) Exchange +Input [4]: [ss_list_price#3, sum#58, count#59, count#60] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#61] + +(53) HashAggregate [codegen id : 14] +Input [4]: [ss_list_price#3, sum#58, count#59, count#60] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57] +Results [4]: [ss_list_price#3, sum#58, count#59, count#60] + +(54) HashAggregate [codegen id : 14] +Input [4]: [ss_list_price#3, sum#58, count#59, count#60] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57, count(ss_list_price#3)#62] +Results [4]: [sum#58, count#59, count#60, count#63] + +(55) Exchange +Input [4]: [sum#58, count#59, count#60, count#63] +Arguments: SinglePartition, true, [id=#64] + +(56) HashAggregate [codegen id : 15] +Input [4]: [sum#58, count#59, count#60, count#63] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#56, count(ss_list_price#3)#57, count(ss_list_price#3)#62] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#56 / 100.0) as decimal(11,6)) AS B5_LP#65, count(ss_list_price#3)#57 AS B5_CNT#66, count(ss_list_price#3)#62 AS B5_CNTD#67] + +(57) BroadcastExchange +Input [3]: [B5_LP#65, B5_CNT#66, B5_CNTD#67] +Arguments: IdentityBroadcastMode, [id=#68] + +(58) BroadcastNestedLoopJoin +Join condition: None + +(59) Scan parquet default.store_sales +Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 16] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(61) Filter [codegen id : 16] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] +Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 27.00)))) + +(62) Project [codegen id : 16] +Output [1]: [ss_list_price#3] +Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4] + +(63) HashAggregate [codegen id : 16] +Input [1]: [ss_list_price#3] +Keys [1]: [ss_list_price#3] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#3)), partial_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70] +Results [4]: [ss_list_price#3, sum#71, count#72, count#73] + +(64) Exchange +Input [4]: [ss_list_price#3, sum#71, count#72, count#73] +Arguments: hashpartitioning(ss_list_price#3, 5), true, [id=#74] + +(65) HashAggregate [codegen id : 17] +Input [4]: [ss_list_price#3, sum#71, count#72, count#73] +Keys [1]: [ss_list_price#3] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70] +Results [4]: [ss_list_price#3, sum#71, count#72, count#73] + +(66) HashAggregate [codegen id : 17] +Input [4]: [ss_list_price#3, sum#71, count#72, count#73] +Keys: [] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70, count(ss_list_price#3)#75] +Results [4]: [sum#71, count#72, count#73, count#76] + +(67) Exchange +Input [4]: [sum#71, count#72, count#73, count#76] +Arguments: SinglePartition, true, [id=#77] + +(68) HashAggregate [codegen id : 18] +Input [4]: [sum#71, count#72, count#73, count#76] +Keys: [] +Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#69, count(ss_list_price#3)#70, count(ss_list_price#3)#75] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#69 / 100.0) as decimal(11,6)) AS B6_LP#78, count(ss_list_price#3)#70 AS B6_CNT#79, count(ss_list_price#3)#75 AS B6_CNTD#80] + +(69) BroadcastExchange +Input [3]: [B6_LP#78, B6_CNT#79, B6_CNTD#80] +Arguments: IdentityBroadcastMode, [id=#81] + +(70) BroadcastNestedLoopJoin +Join condition: None + +(71) CollectLimit +Input [18]: [B1_LP#14, B1_CNT#15, B1_CNTD#16, B2_LP#26, B2_CNT#27, B2_CNTD#28, B3_LP#39, B3_CNT#40, B3_CNTD#41, B4_LP#52, B4_CNT#53, B4_CNTD#54, B5_LP#65, B5_CNT#66, B5_CNTD#67, B6_LP#78, B6_CNT#79, B6_CNTD#80] +Arguments: 100 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/simplified.txt index 2b46940ec..d896002b0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q28/simplified.txt @@ -4,92 +4,104 @@ CollectLimit BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B1_CNT,B1_CNTD,B1_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + WholeStageCodegen (3) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (2) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter Exchange [ss_list_price] #2 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] - Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] - Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #3 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B2_CNT,B2_CNTD,B2_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + WholeStageCodegen (6) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count] InputAdapter Exchange #4 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (5) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter Exchange [ss_list_price] #5 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (4) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] - Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] - Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #6 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B3_CNT,B3_CNTD,B3_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + WholeStageCodegen (9) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count] InputAdapter Exchange #7 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (8) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter Exchange [ss_list_price] #8 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (7) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] - Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] - Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #9 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B4_CNT,B4_CNTD,B4_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + WholeStageCodegen (12) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count] InputAdapter Exchange #10 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (11) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter Exchange [ss_list_price] #11 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (10) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] - Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] - Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #12 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B5_CNT,B5_CNTD,B5_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + WholeStageCodegen (15) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count] InputAdapter Exchange #13 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (14) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter Exchange [ss_list_price] #14 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (13) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] - Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] - Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] BroadcastExchange #15 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] [B6_CNT,B6_CNTD,B6_LP,avg(UnscaledValue(ss_list_price)),count,count,count,count(ss_list_price),count(ss_list_price),sum] + WholeStageCodegen (18) + HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count] InputAdapter Exchange #16 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count(ss_list_price),count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count,count,count(ss_list_price),count(ss_list_price),sum,sum] - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),ss_list_price,sum,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (17) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count] + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] InputAdapter Exchange [ss_list_price] #17 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_list_price)),count,count,count(ss_list_price),ss_list_price,sum] [avg(UnscaledValue(ss_list_price)),count,count,count,count,count(ss_list_price),sum,sum] + WholeStageCodegen (16) + HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count] Project [ss_list_price] - Filter [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] - Scan parquet default.store_sales [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] [ss_coupon_amt,ss_list_price,ss_quantity,ss_wholesale_cost] + Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/explain.txt index 1db3bf363..d1d99897d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/explain.txt @@ -1,50 +1,292 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST,s_store_id#3 ASC NULLS FIRST,s_store_name#4 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,s_store_id#3,s_store_name#4,store_sales_quantity#5,store_returns_quantity#6,catalog_sales_quantity#7]) -+- *(9) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[sum(cast(ss_quantity#8 as bigint)), sum(cast(sr_return_quantity#9 as bigint)), sum(cast(cs_quantity#10 as bigint))]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4, 5) - +- *(8) HashAggregate(keys=[i_item_id#1, i_item_desc#2, s_store_id#3, s_store_name#4], functions=[partial_sum(cast(ss_quantity#8 as bigint)), partial_sum(cast(sr_return_quantity#9 as bigint)), partial_sum(cast(cs_quantity#10 as bigint))]) - +- *(8) Project [ss_quantity#8, sr_return_quantity#9, cs_quantity#10, s_store_id#3, s_store_name#4, i_item_id#1, i_item_desc#2] - +- *(8) BroadcastHashJoin [ss_item_sk#11], [i_item_sk#12], Inner, BuildRight - :- *(8) Project [ss_item_sk#11, ss_quantity#8, sr_return_quantity#9, cs_quantity#10, s_store_id#3, s_store_name#4] - : +- *(8) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight - : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_quantity#8, sr_return_quantity#9, cs_quantity#10] - : : +- *(8) BroadcastHashJoin [cs_sold_date_sk#15], [d_date_sk#16], Inner, BuildRight - : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_quantity#8, sr_return_quantity#9, cs_sold_date_sk#15, cs_quantity#10] - : : : +- *(8) BroadcastHashJoin [sr_returned_date_sk#17], [cast(d_date_sk#18 as bigint)], Inner, BuildRight - : : : :- *(8) Project [ss_item_sk#11, ss_store_sk#13, ss_quantity#8, sr_returned_date_sk#17, sr_return_quantity#9, cs_sold_date_sk#15, cs_quantity#10] - : : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_quantity#8, sr_returned_date_sk#17, sr_return_quantity#9, cs_sold_date_sk#15, cs_quantity#10] - : : : : : +- *(8) BroadcastHashJoin [sr_customer_sk#21, sr_item_sk#22], [cast(cs_bill_customer_sk#23 as bigint), cast(cs_item_sk#24 as bigint)], Inner, BuildRight - : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_store_sk#13, ss_quantity#8, sr_returned_date_sk#17, sr_item_sk#22, sr_customer_sk#21, sr_return_quantity#9] - : : : : : : +- *(8) BroadcastHashJoin [cast(ss_customer_sk#25 as bigint), cast(ss_item_sk#11 as bigint), cast(ss_ticket_number#26 as bigint)], [sr_customer_sk#21, sr_item_sk#22, sr_ticket_number#27], Inner, BuildRight - : : : : : : :- *(8) Project [ss_sold_date_sk#19, ss_item_sk#11, ss_customer_sk#25, ss_store_sk#13, ss_ticket_number#26, ss_quantity#8] - : : : : : : : +- *(8) Filter ((((isnotnull(ss_ticket_number#26) && isnotnull(ss_item_sk#11)) && isnotnull(ss_customer_sk#25)) && isnotnull(ss_sold_date_sk#19)) && isnotnull(ss_store_sk#13)) - : : : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_item_sk#11,ss_customer_sk#25,ss_store_sk#13,ss_ticket_number#26,ss_quantity#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_sold..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(3) Project [d_date_sk#20] - : : : : +- *(3) Filter ((((isnotnull(d_moy#28) && isnotnull(d_year#29)) && (d_moy#28 = 9)) && (d_year#29 = 1999)) && isnotnull(d_date_sk#20)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_year#29,d_moy#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [d_date_sk#18] - : : : +- *(4) Filter (((((isnotnull(d_moy#30) && isnotnull(d_year#31)) && (d_moy#30 >= 9)) && (d_moy#30 <= 12)) && (d_year#31 = 1999)) && isnotnull(d_date_sk#18)) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), Equ..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [d_date_sk#16] - : : +- *(5) Filter (d_year#32 IN (1999,2000,2001) && isnotnull(d_date_sk#16)) - : : +- *(5) FileScan parquet default.date_dim[d_date_sk#16,d_year#32] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [s_store_sk#14, s_store_id#3, s_store_name#4] - : +- *(6) Filter isnotnull(s_store_sk#14) - : +- *(6) FileScan parquet default.store[s_store_sk#14,s_store_id#3,s_store_name#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [i_item_sk#12, i_item_id#1, i_item_desc#2] - +- *(7) Filter isnotnull(i_item_sk#12) - +- *(7) FileScan parquet default.item[i_item_sk#12,i_item_id#1,i_item_desc#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (52) ++- * HashAggregate (51) + +- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * BroadcastHashJoin Inner BuildRight (47) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Project (29) + : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : :- * Project (22) + : : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.store_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.catalog_sales (10) + : : : : +- BroadcastExchange (20) + : : : : +- * Project (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.date_dim (16) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * Filter (25) + : : : +- * ColumnarToRow (24) + : : : +- Scan parquet default.date_dim (23) + : : +- BroadcastExchange (34) + : : +- * Project (33) + : : +- * Filter (32) + : : +- * ColumnarToRow (31) + : : +- Scan parquet default.date_dim (30) + : +- BroadcastExchange (40) + : +- * Filter (39) + : +- * ColumnarToRow (38) + : +- Scan parquet default.store (37) + +- BroadcastExchange (46) + +- * Filter (45) + +- * ColumnarToRow (44) + +- Scan parquet default.item (43) + + +(1) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] + +(3) Filter [codegen id : 8] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6] +Condition : ((((isnotnull(ss_customer_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_ticket_number#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#4)) + +(4) Scan parquet default.store_returns +Output [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] + +(6) Filter [codegen id : 1] +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Condition : (((isnotnull(sr_customer_sk#9) AND isnotnull(sr_item_sk#8)) AND isnotnull(sr_ticket_number#10)) AND isnotnull(sr_returned_date_sk#7)) + +(7) BroadcastExchange +Input [5]: [sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Arguments: HashedRelationBroadcastMode(List(input[2, bigint, false], input[1, bigint, false], input[3, bigint, false]),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10] +Join condition: None + +(9) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11] +Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11] + +(10) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] + +(12) Filter [codegen id : 2] +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Condition : ((isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) AND isnotnull(cs_sold_date_sk#13)) + +(13) BroadcastExchange +Input [4]: [cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#17] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] +Right keys [2]: [cast(cs_bill_customer_sk#14 as bigint), cast(cs_item_sk#15 as bigint)] +Join condition: None + +(15) Project [codegen id : 8] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, cs_sold_date_sk#13, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#18, d_year#19, d_moy#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] +Condition : ((((isnotnull(d_moy#20) AND isnotnull(d_year#19)) AND (d_moy#20 = 9)) AND (d_year#19 = 1999)) AND isnotnull(d_date_sk#18)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#18] +Input [3]: [d_date_sk#18, d_year#19, d_moy#20] + +(20) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(21) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(22) Project [codegen id : 8] +Output [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#18] + +(23) Scan parquet default.date_dim +Output [3]: [d_date_sk#22, d_year#23, d_moy#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(25) Filter [codegen id : 4] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] +Condition : (((((isnotnull(d_moy#24) AND isnotnull(d_year#23)) AND (d_moy#24 >= 9)) AND (d_moy#24 <= 12)) AND (d_year#23 = 1999)) AND isnotnull(d_date_sk#22)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#22] +Input [3]: [d_date_sk#22, d_year#23, d_moy#24] + +(27) BroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(28) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [sr_returned_date_sk#7] +Right keys [1]: [cast(d_date_sk#22 as bigint)] +Join condition: None + +(29) Project [codegen id : 8] +Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_returned_date_sk#7, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#22] + +(30) Scan parquet default.date_dim +Output [2]: [d_date_sk#26, d_year#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#26, d_year#27] + +(32) Filter [codegen id : 5] +Input [2]: [d_date_sk#26, d_year#27] +Condition : (d_year#27 IN (1999,2000,2001) AND isnotnull(d_date_sk#26)) + +(33) Project [codegen id : 5] +Output [1]: [d_date_sk#26] +Input [2]: [d_date_sk#26, d_year#27] + +(34) BroadcastExchange +Input [1]: [d_date_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#26] +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16] +Input [7]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_sold_date_sk#13, cs_quantity#16, d_date_sk#26] + +(37) Scan parquet default.store +Output [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 6] +Input [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] + +(39) Filter [codegen id : 6] +Input [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] +Condition : isnotnull(s_store_sk#29) + +(40) BroadcastExchange +Input [3]: [s_store_sk#29, s_store_id#30, s_store_name#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#29] +Join condition: None + +(42) Project [codegen id : 8] +Output [6]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31] +Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_sk#29, s_store_id#30, s_store_name#31] + +(43) Scan parquet default.item +Output [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] + +(45) Filter [codegen id : 7] +Input [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] +Condition : isnotnull(i_item_sk#33) + +(46) BroadcastExchange +Input [3]: [i_item_sk#33, i_item_id#34, i_item_desc#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] + +(47) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#33] +Join condition: None + +(48) Project [codegen id : 8] +Output [7]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31, i_item_id#34, i_item_desc#35] +Input [9]: [ss_item_sk#2, ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31, i_item_sk#33, i_item_id#34, i_item_desc#35] + +(49) HashAggregate [codegen id : 8] +Input [7]: [ss_quantity#6, sr_return_quantity#11, cs_quantity#16, s_store_id#30, s_store_name#31, i_item_id#34, i_item_desc#35] +Keys [4]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31] +Functions [3]: [partial_sum(cast(ss_quantity#6 as bigint)), partial_sum(cast(sr_return_quantity#11 as bigint)), partial_sum(cast(cs_quantity#16 as bigint))] +Aggregate Attributes [3]: [sum#37, sum#38, sum#39] +Results [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum#40, sum#41, sum#42] + +(50) Exchange +Input [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum#40, sum#41, sum#42] +Arguments: hashpartitioning(i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, 5), true, [id=#43] + +(51) HashAggregate [codegen id : 9] +Input [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum#40, sum#41, sum#42] +Keys [4]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31] +Functions [3]: [sum(cast(ss_quantity#6 as bigint)), sum(cast(sr_return_quantity#11 as bigint)), sum(cast(cs_quantity#16 as bigint))] +Aggregate Attributes [3]: [sum(cast(ss_quantity#6 as bigint))#44, sum(cast(sr_return_quantity#11 as bigint))#45, sum(cast(cs_quantity#16 as bigint))#46] +Results [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, sum(cast(ss_quantity#6 as bigint))#44 AS store_sales_quantity#47, sum(cast(sr_return_quantity#11 as bigint))#45 AS store_returns_quantity#48, sum(cast(cs_quantity#16 as bigint))#46 AS catalog_sales_quantity#49] + +(52) TakeOrderedAndProject +Input [7]: [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, store_sales_quantity#47, store_returns_quantity#48, catalog_sales_quantity#49] +Arguments: 100, [i_item_id#34 ASC NULLS FIRST, i_item_desc#35 ASC NULLS FIRST, s_store_id#30 ASC NULLS FIRST, s_store_name#31 ASC NULLS FIRST], [i_item_id#34, i_item_desc#35, s_store_id#30, s_store_name#31, store_sales_quantity#47, store_returns_quantity#48, catalog_sales_quantity#49] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt index f8c72710f..26e4e3a55 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q29/simplified.txt @@ -1,66 +1,77 @@ -TakeOrderedAndProject [catalog_sales_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,store_returns_quantity,store_sales_quantity] - WholeStageCodegen - HashAggregate [i_item_desc,i_item_id,s_store_id,s_store_name,sum,sum,sum,sum(cast(cs_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(ss_quantity as bigint))] [catalog_sales_quantity,store_returns_quantity,store_sales_quantity,sum,sum,sum,sum(cast(cs_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(ss_quantity as bigint))] +TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales_quantity,store_returns_quantity,catalog_sales_quantity] + WholeStageCodegen (9) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,sum,sum,sum] [sum(cast(ss_quantity as bigint)),sum(cast(sr_return_quantity as bigint)),sum(cast(cs_quantity as bigint)),store_sales_quantity,store_returns_quantity,catalog_sales_quantity,sum,sum,sum] InputAdapter - Exchange [i_item_desc,i_item_id,s_store_id,s_store_name] #1 - WholeStageCodegen - HashAggregate [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [cs_quantity,i_item_desc,i_item_id,s_store_id,s_store_name,sr_return_quantity,ss_quantity] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [cs_quantity,s_store_id,s_store_name,sr_return_quantity,ss_item_sk,ss_quantity] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [cs_quantity,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] + Exchange [i_item_id,i_item_desc,s_store_id,s_store_name] #1 + WholeStageCodegen (8) + HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_quantity,sr_return_quantity,cs_quantity] [sum,sum,sum,sum,sum,sum] + Project [ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name,i_item_id,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,sr_return_quantity,cs_quantity,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_quantity] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,ss_item_sk,ss_quantity,ss_store_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [cs_quantity,cs_sold_date_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [cs_bill_customer_sk,cs_item_sk,sr_customer_sk,sr_item_sk] - Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_return_quantity,cs_sold_date_sk,cs_quantity] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_return_quantity,cs_sold_date_sk,cs_quantity] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_return_quantity,cs_sold_date_sk,cs_quantity] + BroadcastHashJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_return_quantity] + BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number] + Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] - Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_return_quantity,sr_returned_date_sk,sr_ticket_number] + WholeStageCodegen (1) + Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk] + WholeStageCodegen (2) + Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (5) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [s_store_id,s_store_name,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] + WholeStageCodegen (6) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id,s_store_name] InputAdapter BroadcastExchange #8 - WholeStageCodegen - Project [i_item_desc,i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_desc,i_item_id,i_item_sk] [i_item_desc,i_item_id,i_item_sk] + WholeStageCodegen (7) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt index 7aaaa5fe6..bec06d146 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/explain.txt @@ -1,20 +1,122 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,sum_agg#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,sum_agg#2]) -+- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) - +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 5) - +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) - +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] - +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight - :- *(3) Project [d_year#1, ss_item_sk#8, ss_ext_sales_price#7] - : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight - : :- *(3) Project [d_date_sk#10, d_year#1] - : : +- *(3) Filter ((isnotnull(d_moy#12) && (d_moy#12 = 11)) && isnotnull(d_date_sk#10)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] - : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] - +- *(2) Filter ((isnotnull(i_manufact_id#13) && (i_manufact_id#13 = 128)) && isnotnull(i_item_sk#9)) - +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manufact_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((isnotnull(d_moy#3) AND (d_moy#3 = 11)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Condition : ((isnotnull(i_manufact_id#11) AND (i_manufact_id#11 = 128)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_brand_id#9, i_brand#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] + +(19) Exchange +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] +Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS sum_agg#19] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#17, brand#18, sum_agg#19] +Arguments: 100, [d_year#2 ASC NULLS FIRST, sum_agg#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, sum_agg#19] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt index 23253bbd2..ed3a06904 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q3/simplified.txt @@ -1,26 +1,31 @@ -TakeOrderedAndProject [brand,brand_id,d_year,sum_agg] - WholeStageCodegen - HashAggregate [d_year,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,sum,sum(UnscaledValue(ss_ext_sales_price)),sum_agg] +TakeOrderedAndProject [d_year,sum_agg,brand_id,brand] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,sum_agg,sum] InputAdapter Exchange [d_year,i_brand,i_brand_id] #1 - WholeStageCodegen - HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [d_year,ss_ext_sales_price,ss_item_sk] + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_year] - Filter [d_date_sk,d_moy] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + WholeStageCodegen (1) + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_brand_id,i_item_sk] - Filter [i_item_sk,i_manufact_id] - Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manufact_id] [i_brand,i_brand_id,i_item_sk,i_manufact_id] + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt index 039bb9953..fffcc5ca3 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/explain.txt @@ -1,52 +1,303 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salutation#2 ASC NULLS FIRST,c_first_name#3 ASC NULLS FIRST,c_last_name#4 ASC NULLS FIRST,c_preferred_cust_flag#5 ASC NULLS FIRST,c_birth_day#6 ASC NULLS FIRST,c_birth_month#7 ASC NULLS FIRST,c_birth_year#8 ASC NULLS FIRST,c_birth_country#9 ASC NULLS FIRST,c_login#10 ASC NULLS FIRST,c_email_address#11 ASC NULLS FIRST,c_last_review_date#12 ASC NULLS FIRST,ctr_total_return#13 ASC NULLS FIRST], output=[c_customer_id#1,c_salutation#2,c_first_name#3,c_last_name#4,c_preferred_cust_flag#5,c_birth_day#6,c_birth_month#7,c_birth_year#8,c_birth_country#9,c_login#10,c_email_address#11,c_last_review_date#12,ctr_total_return#13]) -+- *(11) Project [c_customer_id#1, c_salutation#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_day#6, c_birth_month#7, c_birth_year#8, c_birth_country#9, c_login#10, c_email_address#11, c_last_review_date#12, ctr_total_return#13] - +- *(11) BroadcastHashJoin [c_current_addr_sk#14], [ca_address_sk#15], Inner, BuildRight - :- *(11) Project [ctr_total_return#13, c_customer_id#1, c_current_addr_sk#14, c_salutation#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_day#6, c_birth_month#7, c_birth_year#8, c_birth_country#9, c_login#10, c_email_address#11, c_last_review_date#12] - : +- *(11) BroadcastHashJoin [ctr_customer_sk#16], [cast(c_customer_sk#17 as bigint)], Inner, BuildRight - : :- *(11) Project [ctr_customer_sk#16, ctr_total_return#13] - : : +- *(11) BroadcastHashJoin [ctr_state#18], [ctr_state#18#19], Inner, BuildRight, (cast(ctr_total_return#13 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) - : : :- *(11) Filter isnotnull(ctr_total_return#13) - : : : +- *(11) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) - : : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 5) - : : : +- *(3) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) - : : : +- *(3) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] - : : : +- *(3) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight - : : : :- *(3) Project [wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] - : : : : +- *(3) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight - : : : : :- *(3) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] - : : : : : +- *(3) Filter ((isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) && isnotnull(wr_returning_customer_sk#21)) - : : : : : +- *(3) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [ca_address_sk#15, ca_state#22] - : : : +- *(2) Filter (isnotnull(ca_address_sk#15) && isnotnull(ca_state#22)) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) - : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#20) - : : +- *(8) HashAggregate(keys=[ctr_state#18], functions=[avg(ctr_total_return#13)]) - : : +- Exchange hashpartitioning(ctr_state#18, 5) - : : +- *(7) HashAggregate(keys=[ctr_state#18], functions=[partial_avg(ctr_total_return#13)]) - : : +- *(7) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[sum(UnscaledValue(wr_return_amt#23))]) - : : +- Exchange hashpartitioning(wr_returning_customer_sk#21, ca_state#22, 5) - : : +- *(6) HashAggregate(keys=[wr_returning_customer_sk#21, ca_state#22], functions=[partial_sum(UnscaledValue(wr_return_amt#23))]) - : : +- *(6) Project [wr_returning_customer_sk#21, wr_return_amt#23, ca_state#22] - : : +- *(6) BroadcastHashJoin [wr_returning_addr_sk#24], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight - : : :- *(6) Project [wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] - : : : +- *(6) BroadcastHashJoin [wr_returned_date_sk#25], [cast(d_date_sk#26 as bigint)], Inner, BuildRight - : : : :- *(6) Project [wr_returned_date_sk#25, wr_returning_customer_sk#21, wr_returning_addr_sk#24, wr_return_amt#23] - : : : : +- *(6) Filter (isnotnull(wr_returned_date_sk#25) && isnotnull(wr_returning_addr_sk#24)) - : : : : +- *(6) FileScan parquet default.web_returns[wr_returned_date_sk#25,wr_returning_customer_sk#21,wr_returning_addr_sk#24,wr_return_amt#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (53) ++- * Project (52) + +- * BroadcastHashJoin Inner BuildRight (51) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Project (39) + : : +- * BroadcastHashJoin Inner BuildRight (38) + : : :- * Filter (20) + : : : +- * HashAggregate (19) + : : : +- Exchange (18) + : : : +- * HashAggregate (17) + : : : +- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_returns (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.date_dim (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.customer_address (11) + : : +- BroadcastExchange (37) + : : +- * Filter (36) + : : +- * HashAggregate (35) + : : +- Exchange (34) + : : +- * HashAggregate (33) + : : +- * HashAggregate (32) + : : +- Exchange (31) + : : +- * HashAggregate (30) + : : +- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Filter (23) + : : : : +- * ColumnarToRow (22) + : : : : +- Scan parquet default.web_returns (21) + : : : +- ReusedExchange (24) + : : +- ReusedExchange (27) + : +- BroadcastExchange (43) + : +- * Filter (42) + : +- * ColumnarToRow (41) + : +- Scan parquet default.customer (40) + +- BroadcastExchange (50) + +- * Project (49) + +- * Filter (48) + +- * ColumnarToRow (47) + +- Scan parquet default.customer_address (46) + + +(1) Scan parquet default.web_returns +Output [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] + +(3) Filter [codegen id : 3] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Condition : ((isnotnull(wr_returned_date_sk#1) AND isnotnull(wr_returning_addr_sk#3)) AND isnotnull(wr_returning_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2002)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [wr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Input [5]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#8, ca_state#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_state#9] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_state#9] +Condition : (isnotnull(ca_address_sk#8) AND isnotnull(ca_state#9)) + +(14) BroadcastExchange +Input [2]: [ca_address_sk#8, ca_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [wr_returning_addr_sk#3] +Right keys [1]: [cast(ca_address_sk#8 as bigint)] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] +Input [5]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, ca_address_sk#8, ca_state#9] + +(17) HashAggregate [codegen id : 3] +Input [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] +Keys [2]: [wr_returning_customer_sk#2, ca_state#9] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum#11] +Results [3]: [wr_returning_customer_sk#2, ca_state#9, sum#12] + +(18) Exchange +Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#12] +Arguments: hashpartitioning(wr_returning_customer_sk#2, ca_state#9, 5), true, [id=#13] + +(19) HashAggregate [codegen id : 11] +Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#12] +Keys [2]: [wr_returning_customer_sk#2, ca_state#9] +Functions [1]: [sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#4))#14] +Results [3]: [wr_returning_customer_sk#2 AS ctr_customer_sk#15, ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(wr_return_amt#4))#14,17,2) AS ctr_total_return#17] + +(20) Filter [codegen id : 11] +Input [3]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17] +Condition : isnotnull(ctr_total_return#17) + +(21) Scan parquet default.web_returns +Output [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_returning_addr_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] + +(23) Filter [codegen id : 6] +Input [4]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Condition : (isnotnull(wr_returned_date_sk#1) AND isnotnull(wr_returning_addr_sk#3)) + +(24) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [wr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(26) Project [codegen id : 6] +Output [3]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4] +Input [5]: [wr_returned_date_sk#1, wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, d_date_sk#5] + +(27) ReusedExchange [Reuses operator id: 14] +Output [2]: [ca_address_sk#8, ca_state#9] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [wr_returning_addr_sk#3] +Right keys [1]: [cast(ca_address_sk#8 as bigint)] +Join condition: None + +(29) Project [codegen id : 6] +Output [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] +Input [5]: [wr_returning_customer_sk#2, wr_returning_addr_sk#3, wr_return_amt#4, ca_address_sk#8, ca_state#9] + +(30) HashAggregate [codegen id : 6] +Input [3]: [wr_returning_customer_sk#2, wr_return_amt#4, ca_state#9] +Keys [2]: [wr_returning_customer_sk#2, ca_state#9] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [wr_returning_customer_sk#2, ca_state#9, sum#19] + +(31) Exchange +Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#19] +Arguments: hashpartitioning(wr_returning_customer_sk#2, ca_state#9, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 7] +Input [3]: [wr_returning_customer_sk#2, ca_state#9, sum#19] +Keys [2]: [wr_returning_customer_sk#2, ca_state#9] +Functions [1]: [sum(UnscaledValue(wr_return_amt#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#4))#21] +Results [2]: [ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(wr_return_amt#4))#21,17,2) AS ctr_total_return#17] + +(33) HashAggregate [codegen id : 7] +Input [2]: [ctr_state#16, ctr_total_return#17] +Keys [1]: [ctr_state#16] +Functions [1]: [partial_avg(ctr_total_return#17)] +Aggregate Attributes [2]: [sum#22, count#23] +Results [3]: [ctr_state#16, sum#24, count#25] + +(34) Exchange +Input [3]: [ctr_state#16, sum#24, count#25] +Arguments: hashpartitioning(ctr_state#16, 5), true, [id=#26] + +(35) HashAggregate [codegen id : 8] +Input [3]: [ctr_state#16, sum#24, count#25] +Keys [1]: [ctr_state#16] +Functions [1]: [avg(ctr_total_return#17)] +Aggregate Attributes [1]: [avg(ctr_total_return#17)#27] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#17)#27) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16 AS ctr_state#16#29] + +(36) Filter [codegen id : 8] +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] +Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) + +(37) BroadcastExchange +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#30] + +(38) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_state#16] +Right keys [1]: [ctr_state#16#29] +Join condition: (cast(ctr_total_return#17 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) + +(39) Project [codegen id : 11] +Output [2]: [ctr_customer_sk#15, ctr_total_return#17] +Input [5]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] + +(40) Scan parquet default.customer +Output [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] + +(42) Filter [codegen id : 9] +Input [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] +Condition : (isnotnull(c_customer_sk#31) AND isnotnull(c_current_addr_sk#33)) + +(43) BroadcastExchange +Input [14]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#45] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_customer_sk#15] +Right keys [1]: [cast(c_customer_sk#31 as bigint)] +Join condition: None + +(45) Project [codegen id : 11] +Output [14]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] +Input [16]: [ctr_customer_sk#15, ctr_total_return#17, c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44] + +(46) Scan parquet default.customer_address +Output [2]: [ca_address_sk#8, ca_state#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 10] +Input [2]: [ca_address_sk#8, ca_state#9] + +(48) Filter [codegen id : 10] +Input [2]: [ca_address_sk#8, ca_state#9] +Condition : ((isnotnull(ca_state#9) AND (ca_state#9 = GA)) AND isnotnull(ca_address_sk#8)) + +(49) Project [codegen id : 10] +Output [1]: [ca_address_sk#8] +Input [2]: [ca_address_sk#8, ca_state#9] + +(50) BroadcastExchange +Input [1]: [ca_address_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#46] + +(51) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#33] +Right keys [1]: [ca_address_sk#8] +Join condition: None + +(52) Project [codegen id : 11] +Output [13]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ctr_total_return#17] +Input [15]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ca_address_sk#8] + +(53) TakeOrderedAndProject +Input [13]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ctr_total_return#17] +Arguments: 100, [c_customer_id#32 ASC NULLS FIRST, c_salutation#34 ASC NULLS FIRST, c_first_name#35 ASC NULLS FIRST, c_last_name#36 ASC NULLS FIRST, c_preferred_cust_flag#37 ASC NULLS FIRST, c_birth_day#38 ASC NULLS FIRST, c_birth_month#39 ASC NULLS FIRST, c_birth_year#40 ASC NULLS FIRST, c_birth_country#41 ASC NULLS FIRST, c_login#42 ASC NULLS FIRST, c_email_address#43 ASC NULLS FIRST, c_last_review_date#44 ASC NULLS FIRST, ctr_total_return#17 ASC NULLS FIRST], [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_day#38, c_birth_month#39, c_birth_year#40, c_birth_country#41, c_login#42, c_email_address#43, c_last_review_date#44, ctr_total_return#17] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt index a17fc2aea..f9ea8ad58 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q30/simplified.txt @@ -1,70 +1,78 @@ -TakeOrderedAndProject [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] - WholeStageCodegen - Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] +TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] + WholeStageCodegen (11) + Project [c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date,ctr_total_return] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation,ctr_total_return] - BroadcastHashJoin [c_customer_sk,ctr_customer_sk] + Project [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] Project [ctr_customer_sk,ctr_total_return] - BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,ctr_state,ctr_total_return] + BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] Filter [ctr_total_return] - HashAggregate [ca_state,sum,sum(UnscaledValue(wr_return_amt)),wr_returning_customer_sk] [ctr_customer_sk,ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] + HashAggregate [wr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(wr_return_amt)),ctr_customer_sk,ctr_state,ctr_total_return,sum] InputAdapter - Exchange [ca_state,wr_returning_customer_sk] #1 - WholeStageCodegen - HashAggregate [ca_state,sum,sum,wr_return_amt,wr_returning_customer_sk] [sum,sum] - Project [ca_state,wr_return_amt,wr_returning_customer_sk] - BroadcastHashJoin [ca_address_sk,wr_returning_addr_sk] - Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] - BroadcastHashJoin [d_date_sk,wr_returned_date_sk] - Project [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] - Filter [wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] - Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + Exchange [wr_returning_customer_sk,ca_state] #1 + WholeStageCodegen (3) + HashAggregate [wr_returning_customer_sk,ca_state,wr_return_amt] [sum,sum] + Project [wr_returning_customer_sk,wr_return_amt,ca_state] + BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] + Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [ca_address_sk,ca_state] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + WholeStageCodegen (2) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (8) Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [avg(ctr_total_return),count,ctr_state,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_state,sum] + HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,sum,count] InputAdapter Exchange [ctr_state] #5 - WholeStageCodegen - HashAggregate [count,count,ctr_state,ctr_total_return,sum,sum] [count,count,sum,sum] - HashAggregate [ca_state,sum,sum(UnscaledValue(wr_return_amt)),wr_returning_customer_sk] [ctr_state,ctr_total_return,sum,sum(UnscaledValue(wr_return_amt))] + WholeStageCodegen (7) + HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count] + HashAggregate [wr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(wr_return_amt)),ctr_state,ctr_total_return,sum] InputAdapter - Exchange [ca_state,wr_returning_customer_sk] #6 - WholeStageCodegen - HashAggregate [ca_state,sum,sum,wr_return_amt,wr_returning_customer_sk] [sum,sum] - Project [ca_state,wr_return_amt,wr_returning_customer_sk] - BroadcastHashJoin [ca_address_sk,wr_returning_addr_sk] - Project [wr_return_amt,wr_returning_addr_sk,wr_returning_customer_sk] - BroadcastHashJoin [d_date_sk,wr_returned_date_sk] - Project [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] - Filter [wr_returned_date_sk,wr_returning_addr_sk] - Scan parquet default.web_returns [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] [wr_return_amt,wr_returned_date_sk,wr_returning_addr_sk,wr_returning_customer_sk] + Exchange [wr_returning_customer_sk,ca_state] #6 + WholeStageCodegen (6) + HashAggregate [wr_returning_customer_sk,ca_state,wr_return_amt] [sum,sum] + Project [wr_returning_customer_sk,wr_return_amt,ca_state] + BroadcastHashJoin [wr_returning_addr_sk,ca_address_sk] + Project [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_returned_date_sk,wr_returning_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_returned_date_sk,wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #2 + ReusedExchange [d_date_sk] #2 InputAdapter - ReusedExchange [ca_address_sk,ca_state] [ca_address_sk,ca_state] #3 + ReusedExchange [ca_address_sk,ca_state] #3 InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] [c_birth_country,c_birth_day,c_birth_month,c_birth_year,c_current_addr_sk,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_last_review_date,c_login,c_preferred_cust_flag,c_salutation] + WholeStageCodegen (9) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date] InputAdapter BroadcastExchange #8 - WholeStageCodegen + WholeStageCodegen (10) Project [ca_address_sk] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt index 7c12adfde..0ed0929fa 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/explain.txt @@ -1,100 +1,563 @@ == Physical Plan == -*(25) Sort [ca_county#1 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(ca_county#1 ASC NULLS FIRST, 5) - +- *(24) Project [ca_county#1, d_year#2, CheckOverflow((promote_precision(web_sales#3) / promote_precision(web_sales#4)), DecimalType(37,20)) AS web_q1_q2_increase#5, CheckOverflow((promote_precision(store_sales#6) / promote_precision(store_sales#7)), DecimalType(37,20)) AS store_q1_q2_increase#8, CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) AS web_q2_q3_increase#10, CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) AS store_q2_q3_increase#12] - +- *(24) BroadcastHashJoin [ca_county#13], [ca_county#14], Inner, BuildRight, (CASE WHEN (web_sales#3 > 0.00) THEN CheckOverflow((promote_precision(web_sales#9) / promote_precision(web_sales#3)), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#6 > 0.00) THEN CheckOverflow((promote_precision(store_sales#11) / promote_precision(store_sales#6)), DecimalType(37,20)) ELSE null END) - :- *(24) Project [ca_county#1, d_year#2, store_sales#7, store_sales#6, store_sales#11, ca_county#13, web_sales#4, web_sales#3] - : +- *(24) BroadcastHashJoin [ca_county#13], [ca_county#15], Inner, BuildRight, (CASE WHEN (web_sales#4 > 0.00) THEN CheckOverflow((promote_precision(web_sales#3) / promote_precision(web_sales#4)), DecimalType(37,20)) ELSE null END > CASE WHEN (store_sales#7 > 0.00) THEN CheckOverflow((promote_precision(store_sales#6) / promote_precision(store_sales#7)), DecimalType(37,20)) ELSE null END) - : :- *(24) BroadcastHashJoin [ca_county#1], [ca_county#13], Inner, BuildRight - : : :- *(24) Project [ca_county#1, d_year#2, store_sales#7, store_sales#6, store_sales#11] - : : : +- *(24) BroadcastHashJoin [ca_county#16], [ca_county#17], Inner, BuildRight - : : : :- *(24) BroadcastHashJoin [ca_county#1], [ca_county#16], Inner, BuildRight - : : : : :- *(24) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : : : +- Exchange hashpartitioning(ca_county#1, d_qoy#18, d_year#2, 5) - : : : : : +- *(3) HashAggregate(keys=[ca_county#1, d_qoy#18, d_year#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : : : +- *(3) Project [ss_ext_sales_price#19, d_year#2, d_qoy#18, ca_county#1] - : : : : : +- *(3) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#21], Inner, BuildRight - : : : : : :- *(3) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#2, d_qoy#18] - : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight - : : : : : : :- *(3) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] - : : : : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) - : : : : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct - : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- *(1) Project [d_date_sk#23, d_year#2, d_qoy#18] - : : : : : : +- *(1) Filter ((((isnotnull(d_qoy#18) && isnotnull(d_year#2)) && (d_qoy#18 = 1)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#23)) - : : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#23,d_year#2,d_qoy#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(2) Project [ca_address_sk#21, ca_county#1] - : : : : : +- *(2) Filter (isnotnull(ca_address_sk#21) && isnotnull(ca_county#1)) - : : : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#21,ca_county#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : : : +- *(7) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : : +- Exchange hashpartitioning(ca_county#16, d_qoy#24, d_year#25, 5) - : : : : +- *(6) HashAggregate(keys=[ca_county#16, d_qoy#24, d_year#25], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : : +- *(6) Project [ss_ext_sales_price#19, d_year#25, d_qoy#24, ca_county#16] - : : : : +- *(6) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#26], Inner, BuildRight - : : : : :- *(6) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#25, d_qoy#24] - : : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#27], Inner, BuildRight - : : : : : :- *(6) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] - : : : : : : +- *(6) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) - : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(4) Project [d_date_sk#27, d_year#25, d_qoy#24] - : : : : : +- *(4) Filter ((((isnotnull(d_qoy#24) && isnotnull(d_year#25)) && (d_qoy#24 = 2)) && (d_year#25 = 2000)) && isnotnull(d_date_sk#27)) - : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#27,d_year#25,d_qoy#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : : : : +- ReusedExchange [ca_address_sk#26, ca_county#16], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : : +- *(11) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : +- Exchange hashpartitioning(ca_county#17, d_qoy#28, d_year#29, 5) - : : : +- *(10) HashAggregate(keys=[ca_county#17, d_qoy#28, d_year#29], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#19))]) - : : : +- *(10) Project [ss_ext_sales_price#19, d_year#29, d_qoy#28, ca_county#17] - : : : +- *(10) BroadcastHashJoin [ss_addr_sk#20], [ca_address_sk#30], Inner, BuildRight - : : : :- *(10) Project [ss_addr_sk#20, ss_ext_sales_price#19, d_year#29, d_qoy#28] - : : : : +- *(10) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#31], Inner, BuildRight - : : : : :- *(10) Project [ss_sold_date_sk#22, ss_addr_sk#20, ss_ext_sales_price#19] - : : : : : +- *(10) Filter (isnotnull(ss_sold_date_sk#22) && isnotnull(ss_addr_sk#20)) - : : : : : +- *(10) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_addr_sk#20,ss_ext_sales_price#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(8) Project [d_date_sk#31, d_year#29, d_qoy#28] - : : : : +- *(8) Filter ((((isnotnull(d_qoy#28) && isnotnull(d_year#29)) && (d_qoy#28 = 3)) && (d_year#29 = 2000)) && isnotnull(d_date_sk#31)) - : : : : +- *(8) FileScan parquet default.date_dim[d_date_sk#31,d_year#29,d_qoy#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [ca_address_sk#30, ca_county#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : +- *(15) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) - : : +- Exchange hashpartitioning(ca_county#13, d_qoy#32, d_year#33, 5) - : : +- *(14) HashAggregate(keys=[ca_county#13, d_qoy#32, d_year#33], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) - : : +- *(14) Project [ws_ext_sales_price#34, d_year#33, d_qoy#32, ca_county#13] - : : +- *(14) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#36], Inner, BuildRight - : : :- *(14) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#33, d_qoy#32] - : : : +- *(14) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#38], Inner, BuildRight - : : : :- *(14) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] - : : : : +- *(14) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) - : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#38, d_year#33, d_qoy#32], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [ca_address_sk#36, ca_county#13], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- *(19) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) - : +- Exchange hashpartitioning(ca_county#15, d_qoy#39, d_year#40, 5) - : +- *(18) HashAggregate(keys=[ca_county#15, d_qoy#39, d_year#40], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) - : +- *(18) Project [ws_ext_sales_price#34, d_year#40, d_qoy#39, ca_county#15] - : +- *(18) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#41], Inner, BuildRight - : :- *(18) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#40, d_qoy#39] - : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#42], Inner, BuildRight - : : :- *(18) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] - : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) - : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#42, d_year#40, d_qoy#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [ca_address_sk#41, ca_county#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(23) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[sum(UnscaledValue(ws_ext_sales_price#34))]) - +- Exchange hashpartitioning(ca_county#14, d_qoy#43, d_year#44, 5) - +- *(22) HashAggregate(keys=[ca_county#14, d_qoy#43, d_year#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#34))]) - +- *(22) Project [ws_ext_sales_price#34, d_year#44, d_qoy#43, ca_county#14] - +- *(22) BroadcastHashJoin [ws_bill_addr_sk#35], [ca_address_sk#45], Inner, BuildRight - :- *(22) Project [ws_bill_addr_sk#35, ws_ext_sales_price#34, d_year#44, d_qoy#43] - : +- *(22) BroadcastHashJoin [ws_sold_date_sk#37], [d_date_sk#46], Inner, BuildRight - : :- *(22) Project [ws_sold_date_sk#37, ws_bill_addr_sk#35, ws_ext_sales_price#34] - : : +- *(22) Filter (isnotnull(ws_sold_date_sk#37) && isnotnull(ws_bill_addr_sk#35)) - : : +- *(22) FileScan parquet default.web_sales[ws_sold_date_sk#37,ws_bill_addr_sk#35,ws_ext_sales_price#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#46, d_year#44, d_qoy#43], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [ca_address_sk#45, ca_county#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +* Sort (99) ++- Exchange (98) + +- * Project (97) + +- * BroadcastHashJoin Inner BuildRight (96) + :- * Project (82) + : +- * BroadcastHashJoin Inner BuildRight (81) + : :- * BroadcastHashJoin Inner BuildRight (67) + : : :- * Project (53) + : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : :- * BroadcastHashJoin Inner BuildRight (35) + : : : : :- * HashAggregate (18) + : : : : : +- Exchange (17) + : : : : : +- * HashAggregate (16) + : : : : : +- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.date_dim (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.customer_address (10) + : : : : +- BroadcastExchange (34) + : : : : +- * HashAggregate (33) + : : : : +- Exchange (32) + : : : : +- * HashAggregate (31) + : : : : +- * Project (30) + : : : : +- * BroadcastHashJoin Inner BuildRight (29) + : : : : :- * Project (27) + : : : : : +- * BroadcastHashJoin Inner BuildRight (26) + : : : : : :- * Filter (21) + : : : : : : +- * ColumnarToRow (20) + : : : : : : +- Scan parquet default.store_sales (19) + : : : : : +- BroadcastExchange (25) + : : : : : +- * Filter (24) + : : : : : +- * ColumnarToRow (23) + : : : : : +- Scan parquet default.date_dim (22) + : : : : +- ReusedExchange (28) + : : : +- BroadcastExchange (51) + : : : +- * HashAggregate (50) + : : : +- Exchange (49) + : : : +- * HashAggregate (48) + : : : +- * Project (47) + : : : +- * BroadcastHashJoin Inner BuildRight (46) + : : : :- * Project (44) + : : : : +- * BroadcastHashJoin Inner BuildRight (43) + : : : : :- * Filter (38) + : : : : : +- * ColumnarToRow (37) + : : : : : +- Scan parquet default.store_sales (36) + : : : : +- BroadcastExchange (42) + : : : : +- * Filter (41) + : : : : +- * ColumnarToRow (40) + : : : : +- Scan parquet default.date_dim (39) + : : : +- ReusedExchange (45) + : : +- BroadcastExchange (66) + : : +- * HashAggregate (65) + : : +- Exchange (64) + : : +- * HashAggregate (63) + : : +- * Project (62) + : : +- * BroadcastHashJoin Inner BuildRight (61) + : : :- * Project (59) + : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : :- * Filter (56) + : : : : +- * ColumnarToRow (55) + : : : : +- Scan parquet default.web_sales (54) + : : : +- ReusedExchange (57) + : : +- ReusedExchange (60) + : +- BroadcastExchange (80) + : +- * HashAggregate (79) + : +- Exchange (78) + : +- * HashAggregate (77) + : +- * Project (76) + : +- * BroadcastHashJoin Inner BuildRight (75) + : :- * Project (73) + : : +- * BroadcastHashJoin Inner BuildRight (72) + : : :- * Filter (70) + : : : +- * ColumnarToRow (69) + : : : +- Scan parquet default.web_sales (68) + : : +- ReusedExchange (71) + : +- ReusedExchange (74) + +- BroadcastExchange (95) + +- * HashAggregate (94) + +- Exchange (93) + +- * HashAggregate (92) + +- * Project (91) + +- * BroadcastHashJoin Inner BuildRight (90) + :- * Project (88) + : +- * BroadcastHashJoin Inner BuildRight (87) + : :- * Filter (85) + : : +- * ColumnarToRow (84) + : : +- Scan parquet default.web_sales (83) + : +- ReusedExchange (86) + +- ReusedExchange (89) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 1)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#5, d_qoy#6] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#4, d_year#5, d_qoy#6] + +(10) Scan parquet default.customer_address +Output [2]: [ca_address_sk#8, ca_county#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_county#9] + +(12) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_county#9] +Condition : (isnotnull(ca_address_sk#8) AND isnotnull(ca_county#9)) + +(13) BroadcastExchange +Input [2]: [ca_address_sk#8, ca_county#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#8] +Join condition: None + +(15) Project [codegen id : 3] +Output [4]: [ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_county#9] +Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_address_sk#8, ca_county#9] + +(16) HashAggregate [codegen id : 3] +Input [4]: [ss_ext_sales_price#3, d_year#5, d_qoy#6, ca_county#9] +Keys [3]: [ca_county#9, d_qoy#6, d_year#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#11] +Results [4]: [ca_county#9, d_qoy#6, d_year#5, sum#12] + +(17) Exchange +Input [4]: [ca_county#9, d_qoy#6, d_year#5, sum#12] +Arguments: hashpartitioning(ca_county#9, d_qoy#6, d_year#5, 5), true, [id=#13] + +(18) HashAggregate [codegen id : 24] +Input [4]: [ca_county#9, d_qoy#6, d_year#5, sum#12] +Keys [3]: [ca_county#9, d_qoy#6, d_year#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#14] +Results [3]: [ca_county#9, d_year#5, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#14,17,2) AS store_sales#15] + +(19) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 6] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(21) Filter [codegen id : 6] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) + +(22) Scan parquet default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] + +(24) Filter [codegen id : 4] +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Condition : ((((isnotnull(d_qoy#18) AND isnotnull(d_year#17)) AND (d_qoy#18 = 2)) AND (d_year#17 = 2000)) AND isnotnull(d_date_sk#16)) + +(25) BroadcastExchange +Input [3]: [d_date_sk#16, d_year#17, d_qoy#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(27) Project [codegen id : 6] +Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#17, d_qoy#18] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#16, d_year#17, d_qoy#18] + +(28) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#20, ca_county#21] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(30) Project [codegen id : 6] +Output [4]: [ss_ext_sales_price#3, d_year#17, d_qoy#18, ca_county#21] +Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#17, d_qoy#18, ca_address_sk#20, ca_county#21] + +(31) HashAggregate [codegen id : 6] +Input [4]: [ss_ext_sales_price#3, d_year#17, d_qoy#18, ca_county#21] +Keys [3]: [ca_county#21, d_qoy#18, d_year#17] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#22] +Results [4]: [ca_county#21, d_qoy#18, d_year#17, sum#23] + +(32) Exchange +Input [4]: [ca_county#21, d_qoy#18, d_year#17, sum#23] +Arguments: hashpartitioning(ca_county#21, d_qoy#18, d_year#17, 5), true, [id=#24] + +(33) HashAggregate [codegen id : 7] +Input [4]: [ca_county#21, d_qoy#18, d_year#17, sum#23] +Keys [3]: [ca_county#21, d_qoy#18, d_year#17] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#25] +Results [2]: [ca_county#21, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#25,17,2) AS store_sales#26] + +(34) BroadcastExchange +Input [2]: [ca_county#21, store_sales#26] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] + +(35) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#9] +Right keys [1]: [ca_county#21] +Join condition: None + +(36) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 10] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] + +(38) Filter [codegen id : 10] +Input [3]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2)) + +(39) Scan parquet default.date_dim +Output [3]: [d_date_sk#28, d_year#29, d_qoy#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [3]: [d_date_sk#28, d_year#29, d_qoy#30] + +(41) Filter [codegen id : 8] +Input [3]: [d_date_sk#28, d_year#29, d_qoy#30] +Condition : ((((isnotnull(d_qoy#30) AND isnotnull(d_year#29)) AND (d_qoy#30 = 3)) AND (d_year#29 = 2000)) AND isnotnull(d_date_sk#28)) + +(42) BroadcastExchange +Input [3]: [d_date_sk#28, d_year#29, d_qoy#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] + +(43) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#28] +Join condition: None + +(44) Project [codegen id : 10] +Output [4]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#29, d_qoy#30] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, d_date_sk#28, d_year#29, d_qoy#30] + +(45) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#32, ca_county#33] + +(46) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_addr_sk#2] +Right keys [1]: [ca_address_sk#32] +Join condition: None + +(47) Project [codegen id : 10] +Output [4]: [ss_ext_sales_price#3, d_year#29, d_qoy#30, ca_county#33] +Input [6]: [ss_addr_sk#2, ss_ext_sales_price#3, d_year#29, d_qoy#30, ca_address_sk#32, ca_county#33] + +(48) HashAggregate [codegen id : 10] +Input [4]: [ss_ext_sales_price#3, d_year#29, d_qoy#30, ca_county#33] +Keys [3]: [ca_county#33, d_qoy#30, d_year#29] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#34] +Results [4]: [ca_county#33, d_qoy#30, d_year#29, sum#35] + +(49) Exchange +Input [4]: [ca_county#33, d_qoy#30, d_year#29, sum#35] +Arguments: hashpartitioning(ca_county#33, d_qoy#30, d_year#29, 5), true, [id=#36] + +(50) HashAggregate [codegen id : 11] +Input [4]: [ca_county#33, d_qoy#30, d_year#29, sum#35] +Keys [3]: [ca_county#33, d_qoy#30, d_year#29] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#37] +Results [2]: [ca_county#33, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#37,17,2) AS store_sales#38] + +(51) BroadcastExchange +Input [2]: [ca_county#33, store_sales#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#39] + +(52) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#21] +Right keys [1]: [ca_county#33] +Join condition: None + +(53) Project [codegen id : 24] +Output [5]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38] +Input [7]: [ca_county#9, d_year#5, store_sales#15, ca_county#21, store_sales#26, ca_county#33, store_sales#38] + +(54) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 14] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] + +(56) Filter [codegen id : 14] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Condition : (isnotnull(ws_sold_date_sk#40) AND isnotnull(ws_bill_addr_sk#41)) + +(57) ReusedExchange [Reuses operator id: 7] +Output [3]: [d_date_sk#43, d_year#44, d_qoy#45] + +(58) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#40] +Right keys [1]: [d_date_sk#43] +Join condition: None + +(59) Project [codegen id : 14] +Output [4]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#44, d_qoy#45] +Input [6]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42, d_date_sk#43, d_year#44, d_qoy#45] + +(60) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#46, ca_county#47] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_bill_addr_sk#41] +Right keys [1]: [ca_address_sk#46] +Join condition: None + +(62) Project [codegen id : 14] +Output [4]: [ws_ext_sales_price#42, d_year#44, d_qoy#45, ca_county#47] +Input [6]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#44, d_qoy#45, ca_address_sk#46, ca_county#47] + +(63) HashAggregate [codegen id : 14] +Input [4]: [ws_ext_sales_price#42, d_year#44, d_qoy#45, ca_county#47] +Keys [3]: [ca_county#47, d_qoy#45, d_year#44] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum#48] +Results [4]: [ca_county#47, d_qoy#45, d_year#44, sum#49] + +(64) Exchange +Input [4]: [ca_county#47, d_qoy#45, d_year#44, sum#49] +Arguments: hashpartitioning(ca_county#47, d_qoy#45, d_year#44, 5), true, [id=#50] + +(65) HashAggregate [codegen id : 15] +Input [4]: [ca_county#47, d_qoy#45, d_year#44, sum#49] +Keys [3]: [ca_county#47, d_qoy#45, d_year#44] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#42))#51] +Results [2]: [ca_county#47, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#42))#51,17,2) AS web_sales#52] + +(66) BroadcastExchange +Input [2]: [ca_county#47, web_sales#52] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#53] + +(67) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#9] +Right keys [1]: [ca_county#47] +Join condition: None + +(68) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(69) ColumnarToRow [codegen id : 18] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] + +(70) Filter [codegen id : 18] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Condition : (isnotnull(ws_sold_date_sk#40) AND isnotnull(ws_bill_addr_sk#41)) + +(71) ReusedExchange [Reuses operator id: 25] +Output [3]: [d_date_sk#54, d_year#55, d_qoy#56] + +(72) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#40] +Right keys [1]: [d_date_sk#54] +Join condition: None + +(73) Project [codegen id : 18] +Output [4]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#55, d_qoy#56] +Input [6]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42, d_date_sk#54, d_year#55, d_qoy#56] + +(74) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#57, ca_county#58] + +(75) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_bill_addr_sk#41] +Right keys [1]: [ca_address_sk#57] +Join condition: None + +(76) Project [codegen id : 18] +Output [4]: [ws_ext_sales_price#42, d_year#55, d_qoy#56, ca_county#58] +Input [6]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#55, d_qoy#56, ca_address_sk#57, ca_county#58] + +(77) HashAggregate [codegen id : 18] +Input [4]: [ws_ext_sales_price#42, d_year#55, d_qoy#56, ca_county#58] +Keys [3]: [ca_county#58, d_qoy#56, d_year#55] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum#59] +Results [4]: [ca_county#58, d_qoy#56, d_year#55, sum#60] + +(78) Exchange +Input [4]: [ca_county#58, d_qoy#56, d_year#55, sum#60] +Arguments: hashpartitioning(ca_county#58, d_qoy#56, d_year#55, 5), true, [id=#61] + +(79) HashAggregate [codegen id : 19] +Input [4]: [ca_county#58, d_qoy#56, d_year#55, sum#60] +Keys [3]: [ca_county#58, d_qoy#56, d_year#55] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#42))#62] +Results [2]: [ca_county#58, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#42))#62,17,2) AS web_sales#63] + +(80) BroadcastExchange +Input [2]: [ca_county#58, web_sales#63] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#64] + +(81) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#47] +Right keys [1]: [ca_county#58] +Join condition: (CASE WHEN (web_sales#52 > 0.00) THEN CheckOverflow((promote_precision(web_sales#63) / promote_precision(web_sales#52)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#15 > 0.00) THEN CheckOverflow((promote_precision(store_sales#26) / promote_precision(store_sales#15)), DecimalType(37,20), true) ELSE null END) + +(82) Project [codegen id : 24] +Output [8]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38, ca_county#47, web_sales#52, web_sales#63] +Input [9]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38, ca_county#47, web_sales#52, ca_county#58, web_sales#63] + +(83) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk)] +ReadSchema: struct + +(84) ColumnarToRow [codegen id : 22] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] + +(85) Filter [codegen id : 22] +Input [3]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42] +Condition : (isnotnull(ws_sold_date_sk#40) AND isnotnull(ws_bill_addr_sk#41)) + +(86) ReusedExchange [Reuses operator id: 42] +Output [3]: [d_date_sk#65, d_year#66, d_qoy#67] + +(87) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#40] +Right keys [1]: [d_date_sk#65] +Join condition: None + +(88) Project [codegen id : 22] +Output [4]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#66, d_qoy#67] +Input [6]: [ws_sold_date_sk#40, ws_bill_addr_sk#41, ws_ext_sales_price#42, d_date_sk#65, d_year#66, d_qoy#67] + +(89) ReusedExchange [Reuses operator id: 13] +Output [2]: [ca_address_sk#68, ca_county#69] + +(90) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_bill_addr_sk#41] +Right keys [1]: [ca_address_sk#68] +Join condition: None + +(91) Project [codegen id : 22] +Output [4]: [ws_ext_sales_price#42, d_year#66, d_qoy#67, ca_county#69] +Input [6]: [ws_bill_addr_sk#41, ws_ext_sales_price#42, d_year#66, d_qoy#67, ca_address_sk#68, ca_county#69] + +(92) HashAggregate [codegen id : 22] +Input [4]: [ws_ext_sales_price#42, d_year#66, d_qoy#67, ca_county#69] +Keys [3]: [ca_county#69, d_qoy#67, d_year#66] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum#70] +Results [4]: [ca_county#69, d_qoy#67, d_year#66, sum#71] + +(93) Exchange +Input [4]: [ca_county#69, d_qoy#67, d_year#66, sum#71] +Arguments: hashpartitioning(ca_county#69, d_qoy#67, d_year#66, 5), true, [id=#72] + +(94) HashAggregate [codegen id : 23] +Input [4]: [ca_county#69, d_qoy#67, d_year#66, sum#71] +Keys [3]: [ca_county#69, d_qoy#67, d_year#66] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#42))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#42))#73] +Results [2]: [ca_county#69, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#42))#73,17,2) AS web_sales#74] + +(95) BroadcastExchange +Input [2]: [ca_county#69, web_sales#74] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#75] + +(96) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ca_county#47] +Right keys [1]: [ca_county#69] +Join condition: (CASE WHEN (web_sales#63 > 0.00) THEN CheckOverflow((promote_precision(web_sales#74) / promote_precision(web_sales#63)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#26 > 0.00) THEN CheckOverflow((promote_precision(store_sales#38) / promote_precision(store_sales#26)), DecimalType(37,20), true) ELSE null END) + +(97) Project [codegen id : 24] +Output [6]: [ca_county#9, d_year#5, CheckOverflow((promote_precision(web_sales#63) / promote_precision(web_sales#52)), DecimalType(37,20), true) AS web_q1_q2_increase#76, CheckOverflow((promote_precision(store_sales#26) / promote_precision(store_sales#15)), DecimalType(37,20), true) AS store_q1_q2_increase#77, CheckOverflow((promote_precision(web_sales#74) / promote_precision(web_sales#63)), DecimalType(37,20), true) AS web_q2_q3_increase#78, CheckOverflow((promote_precision(store_sales#38) / promote_precision(store_sales#26)), DecimalType(37,20), true) AS store_q2_q3_increase#79] +Input [10]: [ca_county#9, d_year#5, store_sales#15, store_sales#26, store_sales#38, ca_county#47, web_sales#52, web_sales#63, ca_county#69, web_sales#74] + +(98) Exchange +Input [6]: [ca_county#9, d_year#5, web_q1_q2_increase#76, store_q1_q2_increase#77, web_q2_q3_increase#78, store_q2_q3_increase#79] +Arguments: rangepartitioning(ca_county#9 ASC NULLS FIRST, 5), true, [id=#80] + +(99) Sort [codegen id : 25] +Input [6]: [ca_county#9, d_year#5, web_q1_q2_increase#76, store_q1_q2_increase#77, web_q2_q3_increase#78, store_q2_q3_increase#79] +Arguments: [ca_county#9 ASC NULLS FIRST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt index bf41c1709..5a5d61f9d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q31/simplified.txt @@ -1,140 +1,150 @@ -WholeStageCodegen +WholeStageCodegen (25) Sort [ca_county] InputAdapter Exchange [ca_county] #1 - WholeStageCodegen - Project [ca_county,d_year,store_sales,store_sales,store_sales,web_sales,web_sales,web_sales] - BroadcastHashJoin [ca_county,ca_county,store_sales,store_sales,web_sales,web_sales] - Project [ca_county,ca_county,d_year,store_sales,store_sales,store_sales,web_sales,web_sales] - BroadcastHashJoin [ca_county,ca_county,store_sales,store_sales,web_sales,web_sales] + WholeStageCodegen (24) + Project [ca_county,d_year,web_sales,web_sales,store_sales,store_sales,web_sales,store_sales] + BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales] + Project [ca_county,d_year,store_sales,store_sales,store_sales,ca_county,web_sales,web_sales] + BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales] BroadcastHashJoin [ca_county,ca_county] Project [ca_county,d_year,store_sales,store_sales,store_sales] BroadcastHashJoin [ca_county,ca_county] BroadcastHashJoin [ca_county,ca_county] - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #2 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] - Project [ca_county,d_qoy,d_year,ss_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] - Filter [ss_addr_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + WholeStageCodegen (3) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_qoy,d_year] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] + WholeStageCodegen (1) + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [ca_address_sk,ca_county] - Filter [ca_address_sk,ca_county] - Scan parquet default.customer_address [ca_address_sk,ca_county] [ca_address_sk,ca_county] + WholeStageCodegen (2) + Filter [ca_address_sk,ca_county] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county] InputAdapter BroadcastExchange #5 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + WholeStageCodegen (7) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #6 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] - Project [ca_county,d_qoy,d_year,ss_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] - Filter [ss_addr_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + WholeStageCodegen (6) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [d_date_sk,d_qoy,d_year] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] + WholeStageCodegen (4) + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] InputAdapter - ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + ReusedExchange [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #8 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ss_ext_sales_price))] [store_sales,sum,sum(UnscaledValue(ss_ext_sales_price))] + WholeStageCodegen (11) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #9 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price,sum,sum] [sum,sum] - Project [ca_county,d_qoy,d_year,ss_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [d_qoy,d_year,ss_addr_sk,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] - Filter [ss_addr_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk] + WholeStageCodegen (10) + HashAggregate [ca_county,d_qoy,d_year,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_addr_sk,ss_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_addr_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #10 - WholeStageCodegen - Project [d_date_sk,d_qoy,d_year] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] + WholeStageCodegen (8) + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] InputAdapter - ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + ReusedExchange [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #11 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] + WholeStageCodegen (15) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #12 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] - Project [ca_county,d_qoy,d_year,ws_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + WholeStageCodegen (14) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 + ReusedExchange [d_date_sk,d_year,d_qoy] #3 InputAdapter - ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + ReusedExchange [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #13 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] + WholeStageCodegen (19) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #14 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] - Project [ca_county,d_qoy,d_year,ws_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + WholeStageCodegen (18) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #7 + ReusedExchange [d_date_sk,d_year,d_qoy] #7 InputAdapter - ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + ReusedExchange [ca_address_sk,ca_county] #4 InputAdapter BroadcastExchange #15 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),web_sales] + WholeStageCodegen (23) + HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum] InputAdapter Exchange [ca_county,d_qoy,d_year] #16 - WholeStageCodegen - HashAggregate [ca_county,d_qoy,d_year,sum,sum,ws_ext_sales_price] [sum,sum] - Project [ca_county,d_qoy,d_year,ws_ext_sales_price] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [d_qoy,d_year,ws_bill_addr_sk,ws_ext_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk] + WholeStageCodegen (22) + HashAggregate [ca_county,d_qoy,d_year,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,d_year,d_qoy,ca_county] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_bill_addr_sk,ws_ext_sales_price,d_year,d_qoy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #10 + ReusedExchange [d_date_sk,d_year,d_qoy] #10 InputAdapter - ReusedExchange [ca_address_sk,ca_county] [ca_address_sk,ca_county] #4 + ReusedExchange [ca_address_sk,ca_county] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt index 99470fde0..27f93fd7a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/explain.txt @@ -1,30 +1,175 @@ == Physical Plan == -CollectLimit 100 -+- *(6) Project [1 AS excess discount amount #1] - +- *(6) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight - :- *(6) Project [cs_sold_date_sk#2] - : +- *(6) BroadcastHashJoin [i_item_sk#4], [cs_item_sk#5#6], Inner, BuildRight, (cast(cs_ext_discount_amt#7 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) - : :- *(6) Project [cs_sold_date_sk#2, cs_ext_discount_amt#7, i_item_sk#4] - : : +- *(6) BroadcastHashJoin [cs_item_sk#5], [i_item_sk#4], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] - : : : +- *(6) Filter ((isnotnull(cs_item_sk#5) && isnotnull(cs_ext_discount_amt#7)) && isnotnull(cs_sold_date_sk#2)) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [i_item_sk#4] - : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 977)) && isnotnull(i_item_sk#4)) - : : +- *(1) FileScan parquet default.item[i_item_sk#4,i_manufact_id#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#8) - : +- *(4) HashAggregate(keys=[cs_item_sk#5], functions=[avg(UnscaledValue(cs_ext_discount_amt#7))]) - : +- Exchange hashpartitioning(cs_item_sk#5, 5) - : +- *(3) HashAggregate(keys=[cs_item_sk#5], functions=[partial_avg(UnscaledValue(cs_ext_discount_amt#7))]) - : +- *(3) Project [cs_item_sk#5, cs_ext_discount_amt#7] - : +- *(3) BroadcastHashJoin [cs_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight - : :- *(3) Project [cs_sold_date_sk#2, cs_item_sk#5, cs_ext_discount_amt#7] - : : +- *(3) Filter (isnotnull(cs_sold_date_sk#2) && isnotnull(cs_item_sk#5)) - : : +- *(3) FileScan parquet default.catalog_sales[cs_sold_date_sk#2,cs_item_sk#5,cs_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#3] - : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#3)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#3,d_date#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +CollectLimit (31) ++- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.catalog_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.item (4) + : +- BroadcastExchange (25) + : +- * Filter (24) + : +- * HashAggregate (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- * Project (20) + : +- * BroadcastHashJoin Inner BuildRight (19) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.catalog_sales (11) + : +- BroadcastExchange (18) + : +- * Project (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet default.date_dim (14) + +- ReusedExchange (28) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] + +(3) Filter [codegen id : 6] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] +Condition : ((isnotnull(cs_item_sk#2) AND isnotnull(cs_ext_discount_amt#3)) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [2]: [i_item_sk#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 977)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [i_item_sk#4] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(8) BroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(10) Project [codegen id : 6] +Output [3]: [cs_sold_date_sk#1, cs_ext_discount_amt#3, i_item_sk#4] +Input [4]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3, i_item_sk#4] + +(11) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] + +(13) Filter [codegen id : 3] +Input [3]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3] +Condition : (isnotnull(cs_sold_date_sk#1) AND isnotnull(cs_item_sk#2)) + +(14) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] + +(16) Filter [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] +Condition : (((isnotnull(d_date#8) AND (d_date#8 >= 10983)) AND (d_date#8 <= 11073)) AND isnotnull(d_date_sk#7)) + +(17) Project [codegen id : 2] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(18) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(19) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(20) Project [codegen id : 3] +Output [2]: [cs_item_sk#2, cs_ext_discount_amt#3] +Input [4]: [cs_sold_date_sk#1, cs_item_sk#2, cs_ext_discount_amt#3, d_date_sk#7] + +(21) HashAggregate [codegen id : 3] +Input [2]: [cs_item_sk#2, cs_ext_discount_amt#3] +Keys [1]: [cs_item_sk#2] +Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#3))] +Aggregate Attributes [2]: [sum#10, count#11] +Results [3]: [cs_item_sk#2, sum#12, count#13] + +(22) Exchange +Input [3]: [cs_item_sk#2, sum#12, count#13] +Arguments: hashpartitioning(cs_item_sk#2, 5), true, [id=#14] + +(23) HashAggregate [codegen id : 4] +Input [3]: [cs_item_sk#2, sum#12, count#13] +Keys [1]: [cs_item_sk#2] +Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#3))#15] +Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#3))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2 AS cs_item_sk#2#17] + +(24) Filter [codegen id : 4] +Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2#17] +Condition : isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(25) BroadcastExchange +Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#18] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_sk#4] +Right keys [1]: [cs_item_sk#2#17] +Join condition: (cast(cs_ext_discount_amt#3 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(27) Project [codegen id : 6] +Output [1]: [cs_sold_date_sk#1] +Input [5]: [cs_sold_date_sk#1, cs_ext_discount_amt#3, i_item_sk#4, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))#16, cs_item_sk#2#17] + +(28) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#7] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(30) Project [codegen id : 6] +Output [1]: [1 AS excess discount amount #19] +Input [2]: [cs_sold_date_sk#1, d_date_sk#7] + +(31) CollectLimit +Input [1]: [excess discount amount #19] +Arguments: 100 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt index aeba4db83..7e4b826cc 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q32/simplified.txt @@ -1,39 +1,45 @@ CollectLimit - WholeStageCodegen + WholeStageCodegen (6) Project BroadcastHashJoin [cs_sold_date_sk,d_date_sk] Project [cs_sold_date_sk] - BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),cs_ext_discount_amt,cs_item_sk,i_item_sk] - Project [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] + Project [cs_sold_date_sk,cs_ext_discount_amt,i_item_sk] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] - Filter [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] InputAdapter BroadcastExchange #1 - WholeStageCodegen + WholeStageCodegen (1) Project [i_item_sk] - Filter [i_item_sk,i_manufact_id] - Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] + Filter [i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (4) Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [avg(UnscaledValue(cs_ext_discount_amt)),count,cs_item_sk,sum] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(cs_ext_discount_amt)),count,cs_item_sk,sum] + HashAggregate [cs_item_sk,sum,count] [avg(UnscaledValue(cs_ext_discount_amt)),(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(cs_ext_discount_amt) AS DECIMAL(11,6))),cs_item_sk,sum,count] InputAdapter Exchange [cs_item_sk] #3 - WholeStageCodegen - HashAggregate [count,count,cs_ext_discount_amt,cs_item_sk,sum,sum] [count,count,sum,sum] - Project [cs_ext_discount_amt,cs_item_sk] + WholeStageCodegen (3) + HashAggregate [cs_item_sk,cs_ext_discount_amt] [sum,count,sum,count] + Project [cs_item_sk,cs_ext_discount_amt] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] [cs_ext_discount_amt,cs_item_sk,cs_sold_date_sk] + Filter [cs_sold_date_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_discount_amt] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 + ReusedExchange [d_date_sk] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt index 63e591a7c..8d1558a01 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/explain.txt @@ -1,65 +1,378 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_manufact_id#2,total_sales#1]) -+- *(20) HashAggregate(keys=[i_manufact_id#2], functions=[sum(total_sales#3)]) - +- Exchange hashpartitioning(i_manufact_id#2, 5) - +- *(19) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(total_sales#3)]) - +- Union - :- *(6) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- Exchange hashpartitioning(i_manufact_id#2, 5) - : +- *(5) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- *(5) Project [ss_ext_sales_price#4, i_manufact_id#2] - : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] - : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight - : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#10] - : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 5)) && isnotnull(d_date_sk#10)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [ca_address_sk#8] - : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) BroadcastHashJoin [i_manufact_id#2], [i_manufact_id#2#14], LeftSemi, BuildRight - : :- *(4) Project [i_item_sk#6, i_manufact_id#2] - : : +- *(4) Filter isnotnull(i_item_sk#6) - : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_manufact_id#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [i_manufact_id#2 AS i_manufact_id#2#14] - : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Electronics)) - : +- *(3) FileScan parquet default.item[i_category#15,i_manufact_id#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)], ReadSchema: struct - :- *(12) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- Exchange hashpartitioning(i_manufact_id#2, 5) - : +- *(11) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- *(11) Project [cs_ext_sales_price#16, i_manufact_id#2] - : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight - : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] - : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight - : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight - : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(18) HashAggregate(keys=[i_manufact_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) - +- Exchange hashpartitioning(i_manufact_id#2, 5) - +- *(17) HashAggregate(keys=[i_manufact_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) - +- *(17) Project [ws_ext_sales_price#20, i_manufact_id#2] - +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight - :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] - : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight - : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight - : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) - : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [i_item_sk#6, i_manufact_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 5)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_gmt_offset#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] +Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#9] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] +Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#12, i_manufact_id#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#12, i_manufact_id#13] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#12, i_manufact_id#13] +Condition : isnotnull(i_item_sk#12) + +(21) Scan parquet default.item +Output [2]: [i_category#14, i_manufact_id#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_category#14, i_manufact_id#13] + +(23) Filter [codegen id : 3] +Input [2]: [i_category#14, i_manufact_id#13] +Condition : (isnotnull(i_category#14) AND (i_category#14 = Electronics)) + +(24) Project [codegen id : 3] +Output [1]: [i_manufact_id#13 AS i_manufact_id#13#15] +Input [2]: [i_category#14, i_manufact_id#13] + +(25) BroadcastExchange +Input [1]: [i_manufact_id#13#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_manufact_id#13] +Right keys [1]: [i_manufact_id#13#15] +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#12, i_manufact_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#4, i_manufact_id#13] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_manufact_id#13] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#4, i_manufact_id#13] +Keys [1]: [i_manufact_id#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [2]: [i_manufact_id#13, sum#19] + +(31) Exchange +Input [2]: [i_manufact_id#13, sum#19] +Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_manufact_id#13, sum#19] +Keys [1]: [i_manufact_id#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] +Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] + +(33) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] + +(35) Filter [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#23] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#24] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] +Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_manufact_id#13] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#25] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#26, i_manufact_id#13] +Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_manufact_id#13] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#26, i_manufact_id#13] +Keys [1]: [i_manufact_id#13] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_manufact_id#13, sum#28] + +(46) Exchange +Input [2]: [i_manufact_id#13, sum#28] +Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_manufact_id#13, sum#28] +Keys [1]: [i_manufact_id#13] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] +Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] + +(48) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] + +(50) Filter [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#34] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] +Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_manufact_id#13] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#33] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#35, i_manufact_id#13] +Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_manufact_id#13] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#35, i_manufact_id#13] +Keys [1]: [i_manufact_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum#36] +Results [2]: [i_manufact_id#13, sum#37] + +(61) Exchange +Input [2]: [i_manufact_id#13, sum#37] +Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#38] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_manufact_id#13, sum#37] +Keys [1]: [i_manufact_id#13] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] +Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_manufact_id#13, total_sales#22] +Keys [1]: [i_manufact_id#13] +Functions [1]: [partial_sum(total_sales#22)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_manufact_id#13, sum#43, isEmpty#44] + +(65) Exchange +Input [3]: [i_manufact_id#13, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_manufact_id#13, 5), true, [id=#45] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_manufact_id#13, sum#43, isEmpty#44] +Keys [1]: [i_manufact_id#13] +Functions [1]: [sum(total_sales#22)] +Aggregate Attributes [1]: [sum(total_sales#22)#46] +Results [2]: [i_manufact_id#13, sum(total_sales#22)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_manufact_id#13, total_sales#47] +Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_manufact_id#13, total_sales#47] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt index 28a14a1ea..14787f0bb 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q33/simplified.txt @@ -1,91 +1,101 @@ -TakeOrderedAndProject [i_manufact_id,total_sales] - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] +TakeOrderedAndProject [total_sales,i_manufact_id] + WholeStageCodegen (20) + HashAggregate [i_manufact_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] InputAdapter Exchange [i_manufact_id] #1 - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum,total_sales] [sum,sum] + WholeStageCodegen (19) + HashAggregate [i_manufact_id,total_sales] [sum,isEmpty,sum,isEmpty] InputAdapter Union - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] + WholeStageCodegen (6) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] InputAdapter Exchange [i_manufact_id] #2 - WholeStageCodegen - HashAggregate [i_manufact_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_manufact_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + WholeStageCodegen (5) + HashAggregate [i_manufact_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [ca_address_sk] - Filter [ca_address_sk,ca_gmt_offset] - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) BroadcastHashJoin [i_manufact_id,i_manufact_id] - Project [i_item_sk,i_manufact_id] - Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (3) Project [i_manufact_id] Filter [i_category] - Scan parquet default.item [i_category,i_manufact_id] [i_category,i_manufact_id] - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_manufact_id] + WholeStageCodegen (12) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] InputAdapter Exchange [i_manufact_id] #7 - WholeStageCodegen - HashAggregate [cs_ext_sales_price,i_manufact_id,sum,sum] [sum,sum] + WholeStageCodegen (11) + HashAggregate [i_manufact_id,cs_ext_sales_price] [sum,sum] Project [cs_ext_sales_price,i_manufact_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk] - BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 + ReusedExchange [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] #5 - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] + ReusedExchange [i_item_sk,i_manufact_id] #5 + WholeStageCodegen (18) + HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] InputAdapter Exchange [i_manufact_id] #8 - WholeStageCodegen - HashAggregate [i_manufact_id,sum,sum,ws_ext_sales_price] [sum,sum] - Project [i_manufact_id,ws_ext_sales_price] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + WholeStageCodegen (17) + HashAggregate [i_manufact_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 + ReusedExchange [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] #5 + ReusedExchange [i_item_sk,i_manufact_id] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt index 3dccb55ac..18f465cae 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/explain.txt @@ -1,34 +1,203 @@ == Physical Plan == -*(7) Sort [c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST], true, 0 -+- Exchange rangepartitioning(c_last_name#1 ASC NULLS FIRST, c_first_name#2 ASC NULLS FIRST, c_salutation#3 ASC NULLS FIRST, c_preferred_cust_flag#4 DESC NULLS LAST, 5) - +- *(6) Project [c_last_name#1, c_first_name#2, c_salutation#3, c_preferred_cust_flag#4, ss_ticket_number#5, cnt#6] - +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight - :- *(6) Filter ((cnt#6 >= 15) && (cnt#6 <= 20)) - : +- *(6) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[count(1)]) - : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#7, 5) - : +- *(4) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#7], functions=[partial_count(1)]) - : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#5] - : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight - : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_ticket_number#5] - : : +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight - : : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#5] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight - : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#5] - : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#14] - : : : +- *(1) Filter (((((d_dom#15 >= 1) && (d_dom#15 <= 3)) || ((d_dom#15 >= 25) && (d_dom#15 <= 28))) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),Le..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [s_store_sk#12] - : : +- *(2) Filter ((isnotnull(s_county#17) && (s_county#17 = Williamson County)) && isnotnull(s_store_sk#12)) - : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [hd_demo_sk#10] - : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.2)) && isnotnull(hd_demo_sk#10)) - : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [c_customer_sk#8, c_salutation#3, c_first_name#2, c_last_name#1, c_preferred_cust_flag#4] - +- *(5) Filter isnotnull(c_customer_sk#8) - +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#3,c_first_name#2,c_last_name#1,c_preferred_cust_flag#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : (((((d_dom#8 >= 1) AND (d_dom#8 <= 3)) OR ((d_dom#8 >= 25) AND (d_dom#8 <= 28))) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : ((isnotnull(s_county#11) AND (s_county#11 = Williamson County)) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) + +(29) Scan parquet default.customer +Output [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Condition : isnotnull(c_customer_sk#23) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#23] +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(35) Exchange +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), true, [id=#29] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: [c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/simplified.txt index 89b0a6f50..5af07f1d4 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q34/simplified.txt @@ -1,46 +1,54 @@ -WholeStageCodegen - Sort [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] +WholeStageCodegen (7) + Sort [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] InputAdapter - Exchange [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] #1 - WholeStageCodegen - Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Exchange [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag] #1 + WholeStageCodegen (6) + Project [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] Filter [cnt] - HashAggregate [count,count(1),ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + HashAggregate [ss_ticket_number,ss_customer_sk,count] [count(1),cnt,count] InputAdapter - Exchange [ss_customer_sk,ss_ticket_number] #2 - WholeStageCodegen - HashAggregate [count,count,ss_customer_sk,ss_ticket_number] [count,count] + Exchange [ss_ticket_number,ss_customer_sk] #2 + WholeStageCodegen (4) + HashAggregate [ss_ticket_number,ss_customer_sk] [count,count] Project [ss_customer_sk,ss_ticket_number] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] - BroadcastHashJoin [s_store_sk,ss_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_dom,d_year] - Scan parquet default.date_dim [d_date_sk,d_dom,d_year] [d_date_sk,d_dom,d_year] + Filter [d_dom,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_dom] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [s_store_sk] Filter [s_county,s_store_sk] - Scan parquet default.store [s_county,s_store_sk] [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_county] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] - Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/explain.txt index 9ebc25f20..5370b6872 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/explain.txt @@ -1,49 +1,274 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ca_state#1 ASC NULLS FIRST,cd_gender#2 ASC NULLS FIRST,cd_marital_status#3 ASC NULLS FIRST,aggOrder#4 ASC NULLS FIRST,cd_dep_employed_count#5 ASC NULLS FIRST,cd_dep_college_count#6 ASC NULLS FIRST], output=[ca_state#1,cd_gender#2,cd_marital_status#3,cnt1#7,min(cd_dep_count)#8,max(cd_dep_count)#9,avg(cd_dep_count)#10,cd_dep_employed_count#5,cnt2#11,min(cd_dep_employed_count)#12,max(cd_dep_employed_count)#13,avg(cd_dep_employed_count)#14,cd_dep_college_count#6,cnt3#15,min(cd_dep_college_count)#16,max(cd_dep_college_count)#17,avg(cd_dep_college_count)#18]) -+- *(10) HashAggregate(keys=[ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6], functions=[count(1), min(cd_dep_count#19), max(cd_dep_count#19), avg(cast(cd_dep_count#19 as bigint)), min(cd_dep_employed_count#5), max(cd_dep_employed_count#5), avg(cast(cd_dep_employed_count#5 as bigint)), min(cd_dep_college_count#6), max(cd_dep_college_count#6), avg(cast(cd_dep_college_count#6 as bigint))]) - +- Exchange hashpartitioning(ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6, 5) - +- *(9) HashAggregate(keys=[ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6], functions=[partial_count(1), partial_min(cd_dep_count#19), partial_max(cd_dep_count#19), partial_avg(cast(cd_dep_count#19 as bigint)), partial_min(cd_dep_employed_count#5), partial_max(cd_dep_employed_count#5), partial_avg(cast(cd_dep_employed_count#5 as bigint)), partial_min(cd_dep_college_count#6), partial_max(cd_dep_college_count#6), partial_avg(cast(cd_dep_college_count#6 as bigint))]) - +- *(9) Project [ca_state#1, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6] - +- *(9) BroadcastHashJoin [c_current_cdemo_sk#20], [cd_demo_sk#21], Inner, BuildRight - :- *(9) Project [c_current_cdemo_sk#20, ca_state#1] - : +- *(9) BroadcastHashJoin [c_current_addr_sk#22], [ca_address_sk#23], Inner, BuildRight - : :- *(9) Project [c_current_cdemo_sk#20, c_current_addr_sk#22] - : : +- *(9) Filter (exists#24 || exists#25) - : : +- *(9) BroadcastHashJoin [c_customer_sk#26], [cs_ship_customer_sk#27], ExistenceJoin(exists#25), BuildRight - : : :- *(9) BroadcastHashJoin [c_customer_sk#26], [ws_bill_customer_sk#28], ExistenceJoin(exists#24), BuildRight - : : : :- *(9) BroadcastHashJoin [c_customer_sk#26], [ss_customer_sk#29], LeftSemi, BuildRight - : : : : :- *(9) Project [c_customer_sk#26, c_current_cdemo_sk#20, c_current_addr_sk#22] - : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#22) && isnotnull(c_current_cdemo_sk#20)) - : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#26,c_current_cdemo_sk#20,c_current_addr_sk#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [ss_customer_sk#29] - : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight - : : : : :- *(2) Project [ss_sold_date_sk#30, ss_customer_sk#29] - : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#30) - : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_customer_sk#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [d_date_sk#31] - : : : : +- *(1) Filter ((((isnotnull(d_year#32) && isnotnull(d_qoy#33)) && (d_year#32 = 2002)) && (d_qoy#33 < 4)) && isnotnull(d_date_sk#31)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#31,d_year#32,d_qoy#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [ws_bill_customer_sk#28] - : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#34], [d_date_sk#31], Inner, BuildRight - : : : :- *(4) Project [ws_sold_date_sk#34, ws_bill_customer_sk#28] - : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#34) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#34,ws_bill_customer_sk#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [cs_ship_customer_sk#27] - : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#35], [d_date_sk#31], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#35, cs_ship_customer_sk#27] - : : : +- *(6) Filter isnotnull(cs_sold_date_sk#35) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#35,cs_ship_customer_sk#27] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#31], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [ca_address_sk#23, ca_state#1] - : +- *(7) Filter isnotnull(ca_address_sk#23) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#23,ca_state#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [cd_demo_sk#21, cd_gender#2, cd_marital_status#3, cd_dep_count#19, cd_dep_employed_count#5, cd_dep_college_count#6] - +- *(8) Filter isnotnull(cd_demo_sk#21) - +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_gender#2,cd_marital_status#3,cd_dep_count#19,cd_dep_employed_count#5,cd_dep_college_count#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] +Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#6, ss_customer_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] + +(6) Filter [codegen id : 2] +Input [2]: [ss_sold_date_sk#6, ss_customer_sk#7] +Condition : isnotnull(ss_sold_date_sk#6) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : ((((isnotnull(d_year#9) AND isnotnull(d_qoy#10)) AND (d_year#9 = 2002)) AND (d_qoy#10 < 4)) AND isnotnull(d_date_sk#8)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#8] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(11) BroadcastExchange +Input [1]: [d_date_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(13) Project [codegen id : 2] +Output [1]: [ss_customer_sk#7] +Input [3]: [ss_sold_date_sk#6, ss_customer_sk#7, d_date_sk#8] + +(14) BroadcastExchange +Input [1]: [ss_customer_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#7] +Join condition: None + +(16) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] + +(18) Filter [codegen id : 4] +Input [2]: [ws_sold_date_sk#13, ws_bill_customer_sk#14] +Condition : isnotnull(ws_sold_date_sk#13) + +(19) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#8] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#13] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(21) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#14] +Input [3]: [ws_sold_date_sk#13, ws_bill_customer_sk#14, d_date_sk#8] + +(22) BroadcastExchange +Input [1]: [ws_bill_customer_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(23) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ws_bill_customer_sk#14] +Join condition: None + +(24) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 6] +Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] + +(26) Filter [codegen id : 6] +Input [2]: [cs_sold_date_sk#16, cs_ship_customer_sk#17] +Condition : isnotnull(cs_sold_date_sk#16) + +(27) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#8] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(29) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#17] +Input [3]: [cs_sold_date_sk#16, cs_ship_customer_sk#17, d_date_sk#8] + +(30) BroadcastExchange +Input [1]: [cs_ship_customer_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(31) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [cs_ship_customer_sk#17] +Join condition: None + +(32) Filter [codegen id : 9] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] +Condition : (exists#2 OR exists#1) + +(33) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] +Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] + +(34) Scan parquet default.customer_address +Output [2]: [ca_address_sk#19, ca_state#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_state#20] + +(36) Filter [codegen id : 7] +Input [2]: [ca_address_sk#19, ca_state#20] +Condition : isnotnull(ca_address_sk#19) + +(37) BroadcastExchange +Input [2]: [ca_address_sk#19, ca_state#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#5] +Right keys [1]: [ca_address_sk#19] +Join condition: None + +(39) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#4, ca_state#20] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#19, ca_state#20] + +(40) Scan parquet default.customer_demographics +Output [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(42) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#22) + +(43) BroadcastExchange +Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#22] +Join condition: None + +(45) Project [codegen id : 9] +Output [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [8]: [c_current_cdemo_sk#4, ca_state#20, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] + +(46) HashAggregate [codegen id : 9] +Input [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [10]: [partial_count(1), partial_min(cd_dep_count#25), partial_max(cd_dep_count#25), partial_avg(cast(cd_dep_count#25 as bigint)), partial_min(cd_dep_employed_count#26), partial_max(cd_dep_employed_count#26), partial_avg(cast(cd_dep_employed_count#26 as bigint)), partial_min(cd_dep_college_count#27), partial_max(cd_dep_college_count#27), partial_avg(cast(cd_dep_college_count#27 as bigint))] +Aggregate Attributes [13]: [count#29, min#30, max#31, sum#32, count#33, min#34, max#35, sum#36, count#37, min#38, max#39, sum#40, count#41] +Results [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] + +(47) Exchange +Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] +Arguments: hashpartitioning(ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), true, [id=#55] + +(48) HashAggregate [codegen id : 10] +Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] +Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Functions [10]: [count(1), min(cd_dep_count#25), max(cd_dep_count#25), avg(cast(cd_dep_count#25 as bigint)), min(cd_dep_employed_count#26), max(cd_dep_employed_count#26), avg(cast(cd_dep_employed_count#26 as bigint)), min(cd_dep_college_count#27), max(cd_dep_college_count#27), avg(cast(cd_dep_college_count#27 as bigint))] +Aggregate Attributes [10]: [count(1)#56, min(cd_dep_count#25)#57, max(cd_dep_count#25)#58, avg(cast(cd_dep_count#25 as bigint))#59, min(cd_dep_employed_count#26)#60, max(cd_dep_employed_count#26)#61, avg(cast(cd_dep_employed_count#26 as bigint))#62, min(cd_dep_college_count#27)#63, max(cd_dep_college_count#27)#64, avg(cast(cd_dep_college_count#27 as bigint))#65] +Results [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, count(1)#56 AS cnt1#66, min(cd_dep_count#25)#57 AS min(cd_dep_count)#67, max(cd_dep_count#25)#58 AS max(cd_dep_count)#68, avg(cast(cd_dep_count#25 as bigint))#59 AS avg(cd_dep_count)#69, cd_dep_employed_count#26, count(1)#56 AS cnt2#70, min(cd_dep_employed_count#26)#60 AS min(cd_dep_employed_count)#71, max(cd_dep_employed_count#26)#61 AS max(cd_dep_employed_count)#72, avg(cast(cd_dep_employed_count#26 as bigint))#62 AS avg(cd_dep_employed_count)#73, cd_dep_college_count#27, count(1)#56 AS cnt3#74, min(cd_dep_college_count#27)#63 AS min(cd_dep_college_count)#75, max(cd_dep_college_count#27)#64 AS max(cd_dep_college_count)#76, avg(cast(cd_dep_college_count#27 as bigint))#65 AS avg(cd_dep_college_count)#77, cd_dep_count#25 AS aggOrder#78] + +(49) TakeOrderedAndProject +Input [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, cnt1#66, min(cd_dep_count)#67, max(cd_dep_count)#68, avg(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, min(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, avg(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, min(cd_dep_college_count)#75, max(cd_dep_college_count)#76, avg(cd_dep_college_count)#77, aggOrder#78] +Arguments: 100, [ca_state#20 ASC NULLS FIRST, cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, aggOrder#78 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [ca_state#20, cd_gender#23, cd_marital_status#24, cnt1#66, min(cd_dep_count)#67, max(cd_dep_count)#68, avg(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, min(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, avg(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, min(cd_dep_college_count)#75, max(cd_dep_college_count)#76, avg(cd_dep_college_count)#77] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/simplified.txt index e07194d69..9f27114f7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q35/simplified.txt @@ -1,65 +1,73 @@ -TakeOrderedAndProject [aggOrder,avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),ca_state,cd_dep_college_count,cd_dep_employed_count,cd_gender,cd_marital_status,cnt1,cnt2,cnt3,max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_employed_count),min(cd_dep_college_count),min(cd_dep_count),min(cd_dep_employed_count)] - WholeStageCodegen - HashAggregate [avg(cast(cd_dep_college_count as bigint)),avg(cast(cd_dep_count as bigint)),avg(cast(cd_dep_employed_count as bigint)),ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,count,count,count,count,count(1),max,max,max,max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_employed_count),min,min,min,min(cd_dep_college_count),min(cd_dep_count),min(cd_dep_employed_count),sum,sum,sum] [aggOrder,avg(cast(cd_dep_college_count as bigint)),avg(cast(cd_dep_count as bigint)),avg(cast(cd_dep_employed_count as bigint)),avg(cd_dep_college_count),avg(cd_dep_count),avg(cd_dep_employed_count),cnt1,cnt2,cnt3,count,count,count,count,count(1),max,max,max,max(cd_dep_college_count),max(cd_dep_college_count),max(cd_dep_count),max(cd_dep_count),max(cd_dep_employed_count),max(cd_dep_employed_count),min,min,min,min(cd_dep_college_count),min(cd_dep_college_count),min(cd_dep_count),min(cd_dep_count),min(cd_dep_employed_count),min(cd_dep_employed_count),sum,sum,sum] +TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,aggOrder,cd_dep_employed_count,cd_dep_college_count,cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count)] + WholeStageCodegen (10) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] [count(1),min(cd_dep_count),max(cd_dep_count),avg(cast(cd_dep_count as bigint)),min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cast(cd_dep_employed_count as bigint)),min(cd_dep_college_count),max(cd_dep_college_count),avg(cast(cd_dep_college_count as bigint)),cnt1,min(cd_dep_count),max(cd_dep_count),avg(cd_dep_count),cnt2,min(cd_dep_employed_count),max(cd_dep_employed_count),avg(cd_dep_employed_count),cnt3,min(cd_dep_college_count),max(cd_dep_college_count),avg(cd_dep_college_count),aggOrder,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] InputAdapter - Exchange [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] #1 - WholeStageCodegen - HashAggregate [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status,count,count,count,count,count,count,count,count,max,max,max,max,max,max,min,min,min,min,min,min,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,max,max,max,max,max,max,min,min,min,min,min,min,sum,sum,sum,sum,sum,sum] - Project [ca_state,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] + Exchange [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] #1 + WholeStageCodegen (9) + HashAggregate [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] [count,min,max,sum,count,min,max,sum,count,min,max,sum,count,count,min,max,sum,count,min,max,sum,count,min,max,sum,count] + Project [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk,ca_state] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_current_cdemo_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] Filter [exists,exists] BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] - Filter [c_current_addr_sk,c_current_cdemo_sk] - Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (2) Project [ss_customer_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] + Filter [d_year,d_qoy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (4) Project [ws_bill_customer_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_customer_sk,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (6) Project [cs_ship_customer_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ship_customer_sk,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] [cs_ship_customer_sk,cs_sold_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_customer_sk] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [ca_address_sk,ca_state] - Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + WholeStageCodegen (7) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] - Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] [cd_demo_sk,cd_dep_college_count,cd_dep_count,cd_dep_employed_count,cd_gender,cd_marital_status] + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/explain.txt index b39d44f48..73174b735 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/explain.txt @@ -1,31 +1,180 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN i_category#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[gross_margin#4,i_category#2,i_class#5,lochierarchy#1,rank_within_parent#3]) -+- *(7) Project [gross_margin#4, i_category#2, i_class#5, lochierarchy#1, rank_within_parent#3] - +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 ASC NULLS FIRST] - +- *(6) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(_w1#7, _w2#8, 5) - +- *(5) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ss_net_profit#10)), sum(UnscaledValue(ss_ext_sales_price#11))]) - +- Exchange hashpartitioning(i_category#2, i_class#5, spark_grouping_id#9, 5) - +- *(4) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ss_net_profit#10)), partial_sum(UnscaledValue(ss_ext_sales_price#11))]) - +- *(4) Expand [List(ss_ext_sales_price#11, ss_net_profit#10, i_category#12, i_class#13, 0), List(ss_ext_sales_price#11, ss_net_profit#10, i_category#12, null, 1), List(ss_ext_sales_price#11, ss_net_profit#10, null, null, 3)], [ss_ext_sales_price#11, ss_net_profit#10, i_category#2, i_class#5, spark_grouping_id#9] - +- *(4) Project [ss_ext_sales_price#11, ss_net_profit#10, i_category#14 AS i_category#12, i_class#15 AS i_class#13] - +- *(4) BroadcastHashJoin [ss_store_sk#16], [s_store_sk#17], Inner, BuildRight - :- *(4) Project [ss_store_sk#16, ss_ext_sales_price#11, ss_net_profit#10, i_class#15, i_category#14] - : +- *(4) BroadcastHashJoin [ss_item_sk#18], [i_item_sk#19], Inner, BuildRight - : :- *(4) Project [ss_item_sk#18, ss_store_sk#16, ss_ext_sales_price#11, ss_net_profit#10] - : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight - : : :- *(4) Project [ss_sold_date_sk#20, ss_item_sk#18, ss_store_sk#16, ss_ext_sales_price#11, ss_net_profit#10] - : : : +- *(4) Filter ((isnotnull(ss_sold_date_sk#20) && isnotnull(ss_item_sk#18)) && isnotnull(ss_store_sk#16)) - : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_item_sk#18,ss_store_sk#16,ss_ext_sales_price#11,ss_net_profit#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [i_item_sk#19, i_class#15, i_category#14] - : +- *(2) Filter isnotnull(i_item_sk#19) - : +- *(2) FileScan parquet default.item[i_item_sk#19,i_class#15,i_category#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [s_store_sk#17] - +- *(3) Filter ((isnotnull(s_state#23) && (s_state#23 = TN)) && isnotnull(s_store_sk#17)) - +- *(3) FileScan parquet default.store[s_store_sk#17,s_state#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (32) ++- * Project (31) + +- Window (30) + +- * Sort (29) + +- Exchange (28) + +- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Expand (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.item (11) + +- BroadcastExchange (21) + +- * Project (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.store (17) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#6, d_year#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#6, d_year#7] +Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [2]: [d_date_sk#6, d_year#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, d_date_sk#6] + +(11) Scan parquet default.item +Output [3]: [i_item_sk#9, i_class#10, i_category#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#9, i_class#10, i_category#11] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#9, i_class#10, i_category#11] +Condition : isnotnull(i_item_sk#9) + +(14) BroadcastExchange +Input [3]: [i_item_sk#9, i_class#10, i_category#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#9] +Join condition: None + +(16) Project [codegen id : 4] +Output [5]: [ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_class#10, i_category#11] +Input [7]: [ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_item_sk#9, i_class#10, i_category#11] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#13, s_state#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#13, s_state#14] + +(19) Filter [codegen id : 3] +Input [2]: [s_store_sk#13, s_state#14] +Condition : ((isnotnull(s_state#14) AND (s_state#14 = TN)) AND isnotnull(s_store_sk#13)) + +(20) Project [codegen id : 3] +Output [1]: [s_store_sk#13] +Input [2]: [s_store_sk#13, s_state#14] + +(21) BroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(23) Project [codegen id : 4] +Output [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#11, i_class#10] +Input [6]: [ss_store_sk#3, ss_ext_sales_price#4, ss_net_profit#5, i_class#10, i_category#11, s_store_sk#13] + +(24) Expand [codegen id : 4] +Input [4]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#11, i_class#10] +Arguments: [List(ss_ext_sales_price#4, ss_net_profit#5, i_category#11, i_class#10, 0), List(ss_ext_sales_price#4, ss_net_profit#5, i_category#11, null, 1), List(ss_ext_sales_price#4, ss_net_profit#5, null, null, 3)], [ss_ext_sales_price#4, ss_net_profit#5, i_category#16, i_class#17, spark_grouping_id#18] + +(25) HashAggregate [codegen id : 4] +Input [5]: [ss_ext_sales_price#4, ss_net_profit#5, i_category#16, i_class#17, spark_grouping_id#18] +Keys [3]: [i_category#16, i_class#17, spark_grouping_id#18] +Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum#19, sum#20] +Results [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] + +(26) Exchange +Input [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] +Arguments: hashpartitioning(i_category#16, i_class#17, spark_grouping_id#18, 5), true, [id=#23] + +(27) HashAggregate [codegen id : 5] +Input [5]: [i_category#16, i_class#17, spark_grouping_id#18, sum#21, sum#22] +Keys [3]: [i_category#16, i_class#17, spark_grouping_id#18] +Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#24, sum(UnscaledValue(ss_ext_sales_price#4))#25] +Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#24,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#25,17,2))), DecimalType(37,20), true) AS gross_margin#26, i_category#16, i_class#17, (cast((shiftright(spark_grouping_id#18, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint)) AS lochierarchy#27, (cast((shiftright(spark_grouping_id#18, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint)) AS _w1#28, CASE WHEN (cast(cast((shiftright(spark_grouping_id#18, 0) & 1) as tinyint) as int) = 0) THEN i_category#16 END AS _w2#29, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#24,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#25,17,2))), DecimalType(37,20), true) AS _w3#30] + +(28) Exchange +Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: hashpartitioning(_w1#28, _w2#29, 5), true, [id=#31] + +(29) Sort [codegen id : 6] +Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: [_w1#28 ASC NULLS FIRST, _w2#29 ASC NULLS FIRST, _w3#30 ASC NULLS FIRST], false, 0 + +(30) Window +Input [7]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: [rank(_w3#30) windowspecdefinition(_w1#28, _w2#29, _w3#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#32], [_w1#28, _w2#29], [_w3#30 ASC NULLS FIRST] + +(31) Project [codegen id : 7] +Output [5]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] +Input [8]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, _w1#28, _w2#29, _w3#30, rank_within_parent#32] + +(32) TakeOrderedAndProject +Input [5]: [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] +Arguments: 100, [lochierarchy#27 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#27 as int) = 0) THEN i_category#16 END ASC NULLS FIRST, rank_within_parent#32 ASC NULLS FIRST], [gross_margin#26, i_category#16, i_class#17, lochierarchy#27, rank_within_parent#32] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt index 8d87de2ff..d4a081452 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q36/simplified.txt @@ -1,43 +1,49 @@ -TakeOrderedAndProject [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] - WholeStageCodegen +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class] + WholeStageCodegen (7) Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent] InputAdapter - Window [_w1,_w2,_w3] - WholeStageCodegen + Window [_w3,_w1,_w2] + WholeStageCodegen (6) Sort [_w1,_w2,_w3] InputAdapter Exchange [_w1,_w2] #1 - WholeStageCodegen - HashAggregate [i_category,i_class,spark_grouping_id,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] [_w1,_w2,_w3,gross_margin,lochierarchy,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + WholeStageCodegen (5) + HashAggregate [i_category,i_class,spark_grouping_id,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,lochierarchy,_w1,_w2,_w3,sum,sum] InputAdapter Exchange [i_category,i_class,spark_grouping_id] #2 - WholeStageCodegen - HashAggregate [i_category,i_class,spark_grouping_id,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] - Expand [i_category,i_class,ss_ext_sales_price,ss_net_profit] - Project [i_category,i_class,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [i_category,i_class,ss_ext_sales_price,ss_net_profit,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (4) + HashAggregate [i_category,i_class,spark_grouping_id,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum] + Expand [ss_ext_sales_price,ss_net_profit,i_category,i_class] + Project [ss_ext_sales_price,ss_net_profit,i_category,i_class] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [i_category,i_class,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_class,i_category] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (3) Project [s_store_sk] Filter [s_state,s_store_sk] - Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_state] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt index f0f1b6aeb..896d6c571 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/explain.txt @@ -1,26 +1,160 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,i_current_price#3]) -+- *(5) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, i_current_price#3, 5) - +- *(4) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) - +- *(4) Project [i_item_id#1, i_item_desc#2, i_current_price#3] - +- *(4) BroadcastHashJoin [i_item_sk#4], [cs_item_sk#5], Inner, BuildRight - :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] - : +- *(4) BroadcastHashJoin [inv_date_sk#6], [d_date_sk#7], Inner, BuildRight - : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3, inv_date_sk#6] - : : +- *(4) BroadcastHashJoin [i_item_sk#4], [inv_item_sk#8], Inner, BuildRight - : : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] - : : : +- *(4) Filter ((((isnotnull(i_current_price#3) && (i_current_price#3 >= 68.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 98.00)) && i_manufact_id#9 IN (677,940,694,808)) && isnotnull(i_item_sk#4)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), In(i_manufact_id, [677,94..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) - : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#7] - : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 10988)) && (d_date#11 <= 11048)) && isnotnull(d_date_sk#7)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), Is..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [cs_item_sk#5] - +- *(3) Filter isnotnull(cs_item_sk#5) - +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * BroadcastHashJoin Inner BuildRight (23) + :- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.inventory (5) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet default.date_dim (12) + +- BroadcastExchange (22) + +- * Filter (21) + +- * ColumnarToRow (20) + +- Scan parquet default.catalog_sales (19) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,68.00), In(i_manufact_id, [677,940,694,808]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 68.00)) AND (cast(i_current_price#4 as decimal(12,2)) <= 98.00)) AND i_manufact_id#5 IN (677,940,694,808)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(5) Scan parquet default.inventory +Output [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] + +(7) Filter [codegen id : 1] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] +Condition : ((((isnotnull(inv_quantity_on_hand#8) AND (inv_quantity_on_hand#8 >= 100)) AND (inv_quantity_on_hand#8 <= 500)) AND isnotnull(inv_item_sk#7)) AND isnotnull(inv_date_sk#6)) + +(8) Project [codegen id : 1] +Output [2]: [inv_date_sk#6, inv_item_sk#7] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] + +(9) BroadcastExchange +Input [2]: [inv_date_sk#6, inv_item_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#9] + +(10) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [inv_item_sk#7] +Join condition: None + +(11) Project [codegen id : 4] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, inv_item_sk#7] + +(12) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(14) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 10988)) AND (d_date#11 <= 11048)) AND isnotnull(d_date_sk#10)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(16) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(17) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#6] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(18) Project [codegen id : 4] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, d_date_sk#10] + +(19) Scan parquet default.catalog_sales +Output [1]: [cs_item_sk#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 3] +Input [1]: [cs_item_sk#13] + +(21) Filter [codegen id : 3] +Input [1]: [cs_item_sk#13] +Condition : isnotnull(cs_item_sk#13) + +(22) BroadcastExchange +Input [1]: [cs_item_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(26) Exchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), true, [id=#15] + +(27) HashAggregate [codegen id : 5] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(28) TakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt index 23745be9e..6d3216fff 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q37/simplified.txt @@ -1,34 +1,41 @@ -TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] - WholeStageCodegen - HashAggregate [i_current_price,i_item_desc,i_item_id] +TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + WholeStageCodegen (5) + HashAggregate [i_item_id,i_item_desc,i_current_price] InputAdapter - Exchange [i_current_price,i_item_desc,i_item_id] #1 - WholeStageCodegen - HashAggregate [i_current_price,i_item_desc,i_item_id] - Project [i_current_price,i_item_desc,i_item_id] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] + Exchange [i_item_id,i_item_desc,i_current_price] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_current_price] + Project [i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [i_item_sk,cs_item_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk] - Filter [i_current_price,i_item_sk,i_manufact_id] - Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + Filter [i_current_price,i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [inv_date_sk,inv_item_sk] - Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + Filter [inv_quantity_on_hand,inv_item_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [cs_item_sk] - Filter [cs_item_sk] - Scan parquet default.catalog_sales [cs_item_sk] [cs_item_sk] + WholeStageCodegen (3) + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt index a49754fdf..74454cf32 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/explain.txt @@ -1,55 +1,323 @@ == Physical Plan == -CollectLimit 100 -+- *(13) HashAggregate(keys=[], functions=[count(1)]) - +- Exchange SinglePartition - +- *(12) HashAggregate(keys=[], functions=[partial_count(1)]) - +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#4, ), coalesce(c_first_name#5, ), coalesce(d_date#6, 0)], LeftSemi, BuildRight, (((c_last_name#1 <=> c_last_name#4) && (c_first_name#2 <=> c_first_name#5)) && (d_date#3 <=> d_date#6)) - :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftSemi, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) - : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 5) - : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] - : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight - : : :- *(3) Project [ss_customer_sk#10, d_date#3] - : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] - : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#13, d_date#3] - : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] - : : +- *(2) Filter isnotnull(c_customer_sk#11) - : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) - : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) - : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 5) - : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) - : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] - : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight - : :- *(6) Project [cs_bill_customer_sk#15, d_date#9] - : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] - : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) - +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) - +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 5) - +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) - +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] - +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - :- *(10) Project [ws_bill_customer_sk#19, d_date#6] - : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight - : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] - : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) - : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +* HashAggregate (54) ++- Exchange (53) + +- * HashAggregate (52) + +- * HashAggregate (51) + +- * HashAggregate (50) + +- * HashAggregate (49) + +- * HashAggregate (48) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * BroadcastHashJoin LeftSemi BuildRight (44) + :- * BroadcastHashJoin LeftSemi BuildRight (30) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer (11) + : +- BroadcastExchange (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.catalog_sales (17) + : : +- ReusedExchange (20) + : +- ReusedExchange (23) + +- BroadcastExchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.web_sales (31) + : +- ReusedExchange (34) + +- ReusedExchange (37) + + +(1) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 11] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] + +(3) Filter [codegen id : 11] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#3, d_date#4] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(8) BroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#3] +Join condition: None + +(10) Project [codegen id : 11] +Output [2]: [ss_customer_sk#2, d_date#4] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, d_date_sk#3, d_date#4] + +(11) Scan parquet default.customer +Output [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] + +(13) Filter [codegen id : 2] +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Condition : isnotnull(c_customer_sk#7) + +(14) BroadcastExchange +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#7] +Join condition: None + +(16) Project [codegen id : 11] +Output [3]: [d_date#4, c_first_name#8, c_last_name#9] +Input [5]: [ss_customer_sk#2, d_date#4, c_customer_sk#7, c_first_name#8, c_last_name#9] + +(17) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 5] +Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] + +(19) Filter [codegen id : 5] +Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] +Condition : (isnotnull(cs_sold_date_sk#11) AND isnotnull(cs_bill_customer_sk#12)) + +(20) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#13, d_date#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#11] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(22) Project [codegen id : 5] +Output [2]: [cs_bill_customer_sk#12, d_date#14] +Input [4]: [cs_sold_date_sk#11, cs_bill_customer_sk#12, d_date_sk#13, d_date#14] + +(23) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#15, c_first_name#16, c_last_name#17] + +(24) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_bill_customer_sk#12] +Right keys [1]: [c_customer_sk#15] +Join condition: None + +(25) Project [codegen id : 5] +Output [3]: [c_last_name#17, c_first_name#16, d_date#14] +Input [5]: [cs_bill_customer_sk#12, d_date#14, c_customer_sk#15, c_first_name#16, c_last_name#17] + +(26) HashAggregate [codegen id : 5] +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#17, c_first_name#16, d_date#14] + +(27) Exchange +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Arguments: hashpartitioning(c_last_name#17, c_first_name#16, d_date#14, 5), true, [id=#18] + +(28) HashAggregate [codegen id : 6] +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#17, c_first_name#16, d_date#14] + +(29) BroadcastExchange +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#19] + +(30) BroadcastHashJoin [codegen id : 11] +Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#17, ), isnull(c_last_name#17), coalesce(c_first_name#16, ), isnull(c_first_name#16), coalesce(d_date#14, 0), isnull(d_date#14)] +Join condition: None + +(31) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 9] +Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] + +(33) Filter [codegen id : 9] +Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] +Condition : (isnotnull(ws_sold_date_sk#20) AND isnotnull(ws_bill_customer_sk#21)) + +(34) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#22, d_date#23] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#22] +Join condition: None + +(36) Project [codegen id : 9] +Output [2]: [ws_bill_customer_sk#21, d_date#23] +Input [4]: [ws_sold_date_sk#20, ws_bill_customer_sk#21, d_date_sk#22, d_date#23] + +(37) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#24, c_first_name#25, c_last_name#26] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_bill_customer_sk#21] +Right keys [1]: [c_customer_sk#24] +Join condition: None + +(39) Project [codegen id : 9] +Output [3]: [c_last_name#26, c_first_name#25, d_date#23] +Input [5]: [ws_bill_customer_sk#21, d_date#23, c_customer_sk#24, c_first_name#25, c_last_name#26] + +(40) HashAggregate [codegen id : 9] +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#26, c_first_name#25, d_date#23] + +(41) Exchange +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Arguments: hashpartitioning(c_last_name#26, c_first_name#25, d_date#23, 5), true, [id=#27] + +(42) HashAggregate [codegen id : 10] +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#26, c_first_name#25, d_date#23] + +(43) BroadcastExchange +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#28] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#26, ), isnull(c_last_name#26), coalesce(c_first_name#25, ), isnull(c_first_name#25), coalesce(d_date#23, 0), isnull(d_date#23)] +Join condition: None + +(45) HashAggregate [codegen id : 11] +Input [3]: [d_date#4, c_first_name#8, c_last_name#9] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(46) Exchange +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#4, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(48) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(49) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(50) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(51) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results: [] + +(52) HashAggregate [codegen id : 12] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#30] +Results [1]: [count#31] + +(53) Exchange +Input [1]: [count#31] +Arguments: SinglePartition, true, [id=#32] + +(54) HashAggregate [codegen id : 13] +Input [1]: [count#31] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#33] +Results [1]: [count(1)#33 AS count(1)#34] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt index c7e30522f..a5b57a4ac 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q38/simplified.txt @@ -1,75 +1,80 @@ -CollectLimit - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] - HashAggregate [c_first_name,c_last_name,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #2 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [d_date,ss_customer_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #6 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_customer_sk,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 +WholeStageCodegen (13) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (12) + HashAggregate [count,count] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (11) + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + Project [d_date,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + ColumnarToRow InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #8 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [d_date,ws_bill_customer_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_customer_sk,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] - InputAdapter - ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen (5) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,d_date] InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt index 496999a76..b2cc849c6 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/explain.txt @@ -1,51 +1,292 @@ == Physical Plan == -*(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 5) - +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight - :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] - : +- *(10) Filter (CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) - : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 5) - : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) - : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] - : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight - : :- *(4) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12] - : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#1], Inner, BuildRight - : : :- *(4) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#2] - : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight - : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] - : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [i_item_sk#2] - : : : +- *(1) Filter isnotnull(i_item_sk#2) - : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] - : : +- *(2) Filter isnotnull(w_warehouse_sk#1) - : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [d_date_sk#15, d_moy#3] - : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) - : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) - +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] - +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) - +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 5) - +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) - +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] - +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight - :- *(8) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20] - : +- *(8) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#10], Inner, BuildRight - : :- *(8) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#9] - : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight - : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] - : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct - : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [d_date_sk#21, d_moy#6] - +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) - +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file +* Sort (52) ++- Exchange (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (27) + : +- * Filter (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.inventory (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.warehouse (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.date_dim (16) + +- BroadcastExchange (49) + +- * Project (48) + +- * Filter (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.inventory (28) + : : +- ReusedExchange (31) + : +- ReusedExchange (34) + +- BroadcastExchange (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.date_dim (37) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(4) Scan parquet default.item +Output [1]: [i_item_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [i_item_sk#5] + +(6) Filter [codegen id : 1] +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] + +(10) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Condition : isnotnull(w_warehouse_sk#7) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#7] +Join condition: None + +(15) Project [codegen id : 4] +Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] +Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#12)) AND (d_year#11 = 2001)) AND (d_moy#12 = 1)) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 3] +Output [2]: [d_date_sk#10, d_moy#12] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(20) BroadcastExchange +Input [2]: [d_date_sk#10, d_moy#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] +Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_date_sk#10, d_moy#12] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#14, avg#15, m2#16, sum#17, count#18] +Results [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] + +(24) Exchange +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] +Arguments: hashpartitioning(w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, 5), true, [id=#24] + +(25) HashAggregate [codegen id : 10] +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#25, avg(cast(inv_quantity_on_hand#4 as bigint))#26] +Results [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stddev_samp(cast(inv_quantity_on_hand#4 as double))#25 AS stdev#27, avg(cast(inv_quantity_on_hand#4 as bigint))#26 AS mean#28] + +(26) Filter [codegen id : 10] +Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] +Condition : (CASE WHEN (mean#28 = 0.0) THEN 0.0 ELSE (stdev#27 / mean#28) END > 1.0) + +(27) Project [codegen id : 10] +Output [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END AS cov#29] +Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] + +(28) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 8] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(30) Filter [codegen id : 8] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(31) ReusedExchange [Reuses operator id: 7] +Output [1]: [i_item_sk#30] + +(32) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#30] +Join condition: None + +(33) Project [codegen id : 8] +Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] + +(34) ReusedExchange [Reuses operator id: 13] +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#31] +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] + +(37) Scan parquet default.date_dim +Output [3]: [d_date_sk#33, d_year#34, d_moy#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(39) Filter [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) + +(40) Project [codegen id : 7] +Output [2]: [d_date_sk#33, d_moy#35] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(41) BroadcastExchange +Input [2]: [d_date_sk#33, d_moy#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#33] +Join condition: None + +(43) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] + +(44) HashAggregate [codegen id : 8] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#37, avg#38, m2#39, sum#40, count#41] +Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] + +(45) Exchange +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] +Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), true, [id=#47] + +(46) HashAggregate [codegen id : 9] +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#48, avg(cast(inv_quantity_on_hand#4 as bigint))#49] +Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#4 as double))#48 AS stdev#50, avg(cast(inv_quantity_on_hand#4 as bigint))#49 AS mean#51] + +(47) Filter [codegen id : 9] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] +Condition : (CASE WHEN (mean#51 = 0.0) THEN 0.0 ELSE (stdev#50 / mean#51) END > 1.0) + +(48) Project [codegen id : 9] +Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, CASE WHEN (mean#51 = 0.0) THEN null ELSE (stdev#50 / mean#51) END AS cov#52] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] + +(49) BroadcastExchange +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#53] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [i_item_sk#5, w_warehouse_sk#7] +Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] +Join condition: None + +(51) Exchange +Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: rangepartitioning(w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST, 5), true, [id=#54] + +(52) Sort [codegen id : 11] +Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: [w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt index d80f4a4ee..f4e23c837 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39a/simplified.txt @@ -1,69 +1,77 @@ -WholeStageCodegen - Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] +WholeStageCodegen (11) + Sort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] InputAdapter - Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 - WholeStageCodegen - BroadcastHashJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] - Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + Exchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + WholeStageCodegen (10) + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] Filter [mean,stdev] - HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint)),stdev,mean,n,avg,m2,sum,count] InputAdapter - Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #2 - WholeStageCodegen - HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] - Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_date_sk,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_sk] [i_item_sk] + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk,d_moy] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + WholeStageCodegen (9) + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] Filter [mean,stdev] - HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint)),stdev,mean,n,avg,m2,sum,count] InputAdapter - Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #7 - WholeStageCodegen - HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] - Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen (8) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_date_sk,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter - ReusedExchange [i_item_sk] [i_item_sk] #3 + ReusedExchange [i_item_sk] #3 InputAdapter - ReusedExchange [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] #4 + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 InputAdapter BroadcastExchange #8 - WholeStageCodegen + WholeStageCodegen (7) Project [d_date_sk,d_moy] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt index 89e93bc62..92c2d5ed4 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/explain.txt @@ -1,51 +1,292 @@ == Physical Plan == -*(11) Sort [w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(w_warehouse_sk#1 ASC NULLS FIRST, i_item_sk#2 ASC NULLS FIRST, d_moy#3 ASC NULLS FIRST, mean#4 ASC NULLS FIRST, cov#5 ASC NULLS FIRST, d_moy#6 ASC NULLS FIRST, mean#7 ASC NULLS FIRST, cov#8 ASC NULLS FIRST, 5) - +- *(10) BroadcastHashJoin [i_item_sk#2, w_warehouse_sk#1], [i_item_sk#9, w_warehouse_sk#10], Inner, BuildRight - :- *(10) Project [w_warehouse_sk#1, i_item_sk#2, d_moy#3, mean#4, CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END AS cov#5] - : +- *(10) Filter ((CASE WHEN (mean#4 = 0.0) THEN 0.0 ELSE (stdev#11 / mean#4) END > 1.0) && (CASE WHEN (mean#4 = 0.0) THEN null ELSE (stdev#11 / mean#4) END > 1.5)) - : +- *(10) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - : +- Exchange hashpartitioning(w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3, 5) - : +- *(4) HashAggregate(keys=[w_warehouse_name#12, w_warehouse_sk#1, i_item_sk#2, d_moy#3], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) - : +- *(4) Project [inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12, d_moy#3] - : +- *(4) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#15], Inner, BuildRight - : :- *(4) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#2, w_warehouse_sk#1, w_warehouse_name#12] - : : +- *(4) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#1], Inner, BuildRight - : : :- *(4) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#2] - : : : +- *(4) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#2], Inner, BuildRight - : : : :- *(4) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] - : : : : +- *(4) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : : +- *(4) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [i_item_sk#2] - : : : +- *(1) Filter isnotnull(i_item_sk#2) - : : : +- *(1) FileScan parquet default.item[i_item_sk#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [w_warehouse_sk#1, w_warehouse_name#12] - : : +- *(2) Filter isnotnull(w_warehouse_sk#1) - : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#1,w_warehouse_name#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [d_date_sk#15, d_moy#3] - : +- *(3) Filter ((((isnotnull(d_year#18) && isnotnull(d_moy#3)) && (d_year#18 = 2001)) && (d_moy#3 = 1)) && isnotnull(d_date_sk#15)) - : +- *(3) FileScan parquet default.date_dim[d_date_sk#15,d_year#18,d_moy#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) - +- *(9) Project [w_warehouse_sk#10, i_item_sk#9, d_moy#6, mean#7, CASE WHEN (mean#7 = 0.0) THEN null ELSE (stdev#19 / mean#7) END AS cov#8] - +- *(9) Filter (CASE WHEN (mean#7 = 0.0) THEN 0.0 ELSE (stdev#19 / mean#7) END > 1.0) - +- *(9) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[stddev_samp(cast(inv_quantity_on_hand#13 as double)), avg(cast(inv_quantity_on_hand#13 as bigint))]) - +- Exchange hashpartitioning(w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6, 5) - +- *(8) HashAggregate(keys=[w_warehouse_name#20, w_warehouse_sk#10, i_item_sk#9, d_moy#6], functions=[partial_stddev_samp(cast(inv_quantity_on_hand#13 as double)), partial_avg(cast(inv_quantity_on_hand#13 as bigint))]) - +- *(8) Project [inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20, d_moy#6] - +- *(8) BroadcastHashJoin [inv_date_sk#14], [d_date_sk#21], Inner, BuildRight - :- *(8) Project [inv_date_sk#14, inv_quantity_on_hand#13, i_item_sk#9, w_warehouse_sk#10, w_warehouse_name#20] - : +- *(8) BroadcastHashJoin [inv_warehouse_sk#16], [w_warehouse_sk#10], Inner, BuildRight - : :- *(8) Project [inv_date_sk#14, inv_warehouse_sk#16, inv_quantity_on_hand#13, i_item_sk#9] - : : +- *(8) BroadcastHashJoin [inv_item_sk#17], [i_item_sk#9], Inner, BuildRight - : : :- *(8) Project [inv_date_sk#14, inv_item_sk#17, inv_warehouse_sk#16, inv_quantity_on_hand#13] - : : : +- *(8) Filter ((isnotnull(inv_item_sk#17) && isnotnull(inv_warehouse_sk#16)) && isnotnull(inv_date_sk#14)) - : : : +- *(8) FileScan parquet default.inventory[inv_date_sk#14,inv_item_sk#17,inv_warehouse_sk#16,inv_quantity_on_hand#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)], ReadSchema: struct - : : +- ReusedExchange [i_item_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [w_warehouse_sk#10, w_warehouse_name#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [d_date_sk#21, d_moy#6] - +- *(7) Filter ((((isnotnull(d_year#22) && isnotnull(d_moy#6)) && (d_year#22 = 2001)) && (d_moy#6 = 2)) && isnotnull(d_date_sk#21)) - +- *(7) FileScan parquet default.date_dim[d_date_sk#21,d_year#22,d_moy#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file +* Sort (52) ++- Exchange (51) + +- * BroadcastHashJoin Inner BuildRight (50) + :- * Project (27) + : +- * Filter (26) + : +- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.inventory (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.warehouse (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.date_dim (16) + +- BroadcastExchange (49) + +- * Project (48) + +- * Filter (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.inventory (28) + : : +- ReusedExchange (31) + : +- ReusedExchange (34) + +- BroadcastExchange (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.date_dim (37) + + +(1) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(3) Filter [codegen id : 4] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(4) Scan parquet default.item +Output [1]: [i_item_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [i_item_sk#5] + +(6) Filter [codegen id : 1] +Input [1]: [i_item_sk#5] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [1]: [i_item_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(9) Project [codegen id : 4] +Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5] + +(10) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Condition : isnotnull(w_warehouse_sk#7) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#7] +Join condition: None + +(15) Project [codegen id : 4] +Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] +Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#12)) AND (d_year#11 = 2001)) AND (d_moy#12 = 1)) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 3] +Output [2]: [d_date_sk#10, d_moy#12] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(20) BroadcastExchange +Input [2]: [d_date_sk#10, d_moy#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 4] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] +Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_date_sk#10, d_moy#12] + +(23) HashAggregate [codegen id : 4] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#5, w_warehouse_sk#7, w_warehouse_name#8, d_moy#12] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#14, avg#15, m2#16, sum#17, count#18] +Results [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] + +(24) Exchange +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] +Arguments: hashpartitioning(w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, 5), true, [id=#24] + +(25) HashAggregate [codegen id : 10] +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12, n#19, avg#20, m2#21, sum#22, count#23] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#5, d_moy#12] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#25, avg(cast(inv_quantity_on_hand#4 as bigint))#26] +Results [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stddev_samp(cast(inv_quantity_on_hand#4 as double))#25 AS stdev#27, avg(cast(inv_quantity_on_hand#4 as bigint))#26 AS mean#28] + +(26) Filter [codegen id : 10] +Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] +Condition : ((CASE WHEN (mean#28 = 0.0) THEN 0.0 ELSE (stdev#27 / mean#28) END > 1.0) AND (CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END > 1.5)) + +(27) Project [codegen id : 10] +Output [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, CASE WHEN (mean#28 = 0.0) THEN null ELSE (stdev#27 / mean#28) END AS cov#29] +Input [5]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, stdev#27, mean#28] + +(28) Scan parquet default.inventory +Output [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/inventory] +PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 8] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] + +(30) Filter [codegen id : 8] +Input [4]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4] +Condition : ((isnotnull(inv_item_sk#2) AND isnotnull(inv_warehouse_sk#3)) AND isnotnull(inv_date_sk#1)) + +(31) ReusedExchange [Reuses operator id: 7] +Output [1]: [i_item_sk#30] + +(32) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_item_sk#2] +Right keys [1]: [i_item_sk#30] +Join condition: None + +(33) Project [codegen id : 8] +Output [4]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] +Input [5]: [inv_date_sk#1, inv_item_sk#2, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30] + +(34) ReusedExchange [Reuses operator id: 13] +Output [2]: [w_warehouse_sk#31, w_warehouse_name#32] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_warehouse_sk#3] +Right keys [1]: [w_warehouse_sk#31] +Join condition: None + +(36) Project [codegen id : 8] +Output [5]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] +Input [6]: [inv_date_sk#1, inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32] + +(37) Scan parquet default.date_dim +Output [3]: [d_date_sk#33, d_year#34, d_moy#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(39) Filter [codegen id : 7] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] +Condition : ((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#33)) + +(40) Project [codegen id : 7] +Output [2]: [d_date_sk#33, d_moy#35] +Input [3]: [d_date_sk#33, d_year#34, d_moy#35] + +(41) BroadcastExchange +Input [2]: [d_date_sk#33, d_moy#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [inv_date_sk#1] +Right keys [1]: [d_date_sk#33] +Join condition: None + +(43) Project [codegen id : 8] +Output [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Input [7]: [inv_date_sk#1, inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_date_sk#33, d_moy#35] + +(44) HashAggregate [codegen id : 8] +Input [5]: [inv_quantity_on_hand#4, i_item_sk#30, w_warehouse_sk#31, w_warehouse_name#32, d_moy#35] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#4 as double)), partial_avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [5]: [n#37, avg#38, m2#39, sum#40, count#41] +Results [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] + +(45) Exchange +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] +Arguments: hashpartitioning(w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, 5), true, [id=#47] + +(46) HashAggregate [codegen id : 9] +Input [9]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35, n#42, avg#43, m2#44, sum#45, count#46] +Keys [4]: [w_warehouse_name#32, w_warehouse_sk#31, i_item_sk#30, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double)), avg(cast(inv_quantity_on_hand#4 as bigint))] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#4 as double))#48, avg(cast(inv_quantity_on_hand#4 as bigint))#49] +Results [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#4 as double))#48 AS stdev#50, avg(cast(inv_quantity_on_hand#4 as bigint))#49 AS mean#51] + +(47) Filter [codegen id : 9] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] +Condition : (CASE WHEN (mean#51 = 0.0) THEN 0.0 ELSE (stdev#50 / mean#51) END > 1.0) + +(48) Project [codegen id : 9] +Output [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, CASE WHEN (mean#51 = 0.0) THEN null ELSE (stdev#50 / mean#51) END AS cov#52] +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, stdev#50, mean#51] + +(49) BroadcastExchange +Input [5]: [w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#53] + +(50) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [i_item_sk#5, w_warehouse_sk#7] +Right keys [2]: [i_item_sk#30, w_warehouse_sk#31] +Join condition: None + +(51) Exchange +Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: rangepartitioning(w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST, 5), true, [id=#54] + +(52) Sort [codegen id : 11] +Input [10]: [w_warehouse_sk#7, i_item_sk#5, d_moy#12, mean#28, cov#29, w_warehouse_sk#31, i_item_sk#30, d_moy#35, mean#51, cov#52] +Arguments: [w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#5 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#28 ASC NULLS FIRST, cov#29 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt index d80f4a4ee..f4e23c837 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q39b/simplified.txt @@ -1,69 +1,77 @@ -WholeStageCodegen - Sort [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] +WholeStageCodegen (11) + Sort [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] InputAdapter - Exchange [cov,cov,d_moy,d_moy,i_item_sk,mean,mean,w_warehouse_sk] #1 - WholeStageCodegen - BroadcastHashJoin [i_item_sk,i_item_sk,w_warehouse_sk,w_warehouse_sk] - Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + Exchange [w_warehouse_sk,i_item_sk,d_moy,mean,cov,d_moy,mean,cov] #1 + WholeStageCodegen (10) + BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] Filter [mean,stdev] - HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint)),stdev,mean,n,avg,m2,sum,count] InputAdapter - Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #2 - WholeStageCodegen - HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] - Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_date_sk,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_sk] [i_item_sk] + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk,d_moy] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [d_moy,i_item_sk,mean,stdev,w_warehouse_sk] + WholeStageCodegen (9) + Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] Filter [mean,stdev] - HashAggregate [avg,avg(cast(inv_quantity_on_hand as bigint)),count,d_moy,i_item_sk,m2,n,stddev_samp(cast(inv_quantity_on_hand as double)),sum,w_warehouse_name,w_warehouse_sk] [avg,avg(cast(inv_quantity_on_hand as bigint)),count,m2,mean,n,stddev_samp(cast(inv_quantity_on_hand as double)),stdev,sum] + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(cast(inv_quantity_on_hand as bigint)),stdev,mean,n,avg,m2,sum,count] InputAdapter - Exchange [d_moy,i_item_sk,w_warehouse_name,w_warehouse_sk] #7 - WholeStageCodegen - HashAggregate [avg,avg,count,count,d_moy,i_item_sk,inv_quantity_on_hand,m2,m2,n,n,sum,sum,w_warehouse_name,w_warehouse_sk] [avg,avg,count,count,m2,m2,n,n,sum,sum] - Project [d_moy,i_item_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,w_warehouse_name,w_warehouse_sk] + Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 + WholeStageCodegen (8) + HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,inv_quantity_on_hand] [n,avg,m2,sum,count,n,avg,m2,sum,count] + Project [inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name,d_moy] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [inv_date_sk,inv_quantity_on_hand,i_item_sk,w_warehouse_sk,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [i_item_sk,inv_date_sk,inv_quantity_on_hand,inv_warehouse_sk] - BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + Project [inv_date_sk,inv_warehouse_sk,inv_quantity_on_hand,i_item_sk] + BroadcastHashJoin [inv_item_sk,i_item_sk] + Filter [inv_item_sk,inv_warehouse_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter - ReusedExchange [i_item_sk] [i_item_sk] #3 + ReusedExchange [i_item_sk] #3 InputAdapter - ReusedExchange [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] #4 + ReusedExchange [w_warehouse_sk,w_warehouse_name] #4 InputAdapter BroadcastExchange #8 - WholeStageCodegen + WholeStageCodegen (7) Project [d_date_sk,d_moy] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt index 098474466..79a7abdcf 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/explain.txt @@ -1,124 +1,606 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer_first_name#2 ASC NULLS FIRST,customer_last_name#3 ASC NULLS FIRST,customer_preferred_cust_flag#4 ASC NULLS FIRST,customer_birth_country#5 ASC NULLS FIRST,customer_login#6 ASC NULLS FIRST,customer_email_address#7 ASC NULLS FIRST], output=[customer_id#1,customer_first_name#2,customer_last_name#3,customer_preferred_cust_flag#4,customer_birth_country#5,customer_login#6,customer_email_address#7]) -+- *(25) Project [customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7] - +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#9], Inner, BuildRight, (CASE WHEN (year_total#10 > 0.000000) THEN CheckOverflow((promote_precision(year_total#11) / promote_precision(year_total#10)), DecimalType(38,14)) ELSE null END > CASE WHEN (year_total#12 > 0.000000) THEN CheckOverflow((promote_precision(year_total#13) / promote_precision(year_total#12)), DecimalType(38,14)) ELSE null END) - :- *(25) Project [customer_id#8, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#10, year_total#11, year_total#12] - : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#14], Inner, BuildRight - : :- *(25) Project [customer_id#8, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#10, year_total#11] - : : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#15], Inner, BuildRight, (CASE WHEN (year_total#10 > 0.000000) THEN CheckOverflow((promote_precision(year_total#11) / promote_precision(year_total#10)), DecimalType(38,14)) ELSE null END > CASE WHEN (year_total#16 > 0.000000) THEN CheckOverflow((promote_precision(year_total#17) / promote_precision(year_total#16)), DecimalType(38,14)) ELSE null END) - : : :- *(25) Project [customer_id#8, year_total#16, customer_id#1, customer_first_name#2, customer_last_name#3, customer_preferred_cust_flag#4, customer_birth_country#5, customer_login#6, customer_email_address#7, year_total#17, year_total#10] - : : : +- *(25) BroadcastHashJoin [customer_id#8], [customer_id#18], Inner, BuildRight - : : : :- *(25) BroadcastHashJoin [customer_id#8], [customer_id#1], Inner, BuildRight - : : : : :- Union - : : : : : :- *(4) Filter (isnotnull(year_total#16) && (year_total#16 > 0.000000)) - : : : : : : +- *(4) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - : : : : : : +- *(3) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : : : +- *(3) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] - : : : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight - : : : : : : :- *(3) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_sold_date_sk#31, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27] - : : : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight - : : : : : : : :- *(3) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : : : : :- LocalTableScan , [customer_id#35, year_total#36] - : : : : : +- LocalTableScan , [customer_id#37, year_total#38] - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : : : +- Union - : : : : :- *(8) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - : : : : : +- *(7) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#29 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#30 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : : +- *(7) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27, d_year#26] - : : : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight - : : : : : :- *(7) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ss_sold_date_sk#31, ss_ext_discount_amt#29, ss_ext_sales_price#30, ss_ext_wholesale_cost#28, ss_ext_list_price#27] - : : : : : : +- *(7) BroadcastHashJoin [c_customer_sk#33], [ss_customer_sk#34], Inner, BuildRight - : : : : : : :- *(7) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : : : : : : +- *(7) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : : : :- LocalTableScan , [customer_id#35, customer_first_name#39, customer_last_name#40, customer_preferred_cust_flag#41, customer_birth_country#42, customer_login#43, customer_email_address#44, year_total#36] - : : : : +- LocalTableScan , [customer_id#37, customer_first_name#45, customer_last_name#46, customer_preferred_cust_flag#47, customer_birth_country#48, customer_login#49, customer_email_address#50, year_total#38] - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : : +- Union - : : : :- LocalTableScan , [customer_id#18, year_total#10] - : : : :- *(12) Filter (isnotnull(year_total#36) && (year_total#36 > 0.000000)) - : : : : +- *(12) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - : : : : +- *(11) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : : +- *(11) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] - : : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight - : : : : :- *(11) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_sold_date_sk#55, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51] - : : : : : +- *(11) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight - : : : : : :- *(11) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : : : : : +- *(11) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : : +- *(11) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : +- Union - : : :- LocalTableScan , [customer_id#15, year_total#11] - : : :- *(16) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - : : : +- *(15) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#52 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#53 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#54 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : : : +- *(15) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51, d_year#26] - : : : +- *(15) BroadcastHashJoin [cs_sold_date_sk#55], [d_date_sk#32], Inner, BuildRight - : : : :- *(15) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, cs_sold_date_sk#55, cs_ext_discount_amt#53, cs_ext_sales_price#54, cs_ext_wholesale_cost#52, cs_ext_list_price#51] - : : : : +- *(15) BroadcastHashJoin [c_customer_sk#33], [cs_bill_customer_sk#56], Inner, BuildRight - : : : : :- *(15) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : : : : +- *(15) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : : : +- *(15) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#37, year_total#38] - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- Union - : :- LocalTableScan , [customer_id#14, year_total#12] - : :- LocalTableScan , [customer_id#35, year_total#36] - : +- *(20) Filter (isnotnull(year_total#38) && (year_total#38 > 0.000000)) - : +- *(20) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - : +- *(19) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - : +- *(19) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] - : +- *(19) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight - : :- *(19) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_sold_date_sk#61, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57] - : : +- *(19) BroadcastHashJoin [c_customer_sk#33], [ws_bill_customer_sk#62], Inner, BuildRight - : : :- *(19) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : : +- *(19) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : : +- *(19) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct, [customer_id#9, year_total#13] - :- LocalTableScan , [customer_id#35, year_total#36] - +- *(24) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - +- Exchange hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26, 5) - +- *(23) HashAggregate(keys=[c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, d_year#26], functions=[partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#57 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#58 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#59 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#60 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]) - +- *(23) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57, d_year#26] - +- *(23) BroadcastHashJoin [ws_sold_date_sk#61], [d_date_sk#32], Inner, BuildRight - :- *(23) Project [c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25, ws_sold_date_sk#61, ws_ext_discount_amt#59, ws_ext_sales_price#60, ws_ext_wholesale_cost#58, ws_ext_list_price#57] - : +- *(23) BroadcastHashJoin [c_customer_sk#33], [ws_bill_customer_sk#62], Inner, BuildRight - : :- *(23) Project [c_customer_sk#33, c_customer_id#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22, c_birth_country#23, c_login#24, c_email_address#25] - : : +- *(23) Filter (isnotnull(c_customer_sk#33) && isnotnull(c_customer_id#19)) - : : +- *(23) FileScan parquet default.customer[c_customer_sk#33,c_customer_id#19,c_first_name#20,c_last_name#21,c_preferred_cust_flag#22,c_birth_country#23,c_login#24,c_email_address#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(3) Filter [codegen id : 3] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] + +(6) Filter [codegen id : 1] +Input [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] +Condition : (isnotnull(ss_customer_sk#10) AND isnotnull(ss_sold_date_sk#9)) + +(7) BroadcastExchange +Input [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#15] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#10] +Join condition: None + +(9) Project [codegen id : 3] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#16, d_year#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#16, d_year#17] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#16, d_year#17] +Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#16, d_year#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(15) Project [codegen id : 3] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_date_sk#16, d_year#17] + +(16) HashAggregate [codegen id : 3] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#19, isEmpty#20] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#21, isEmpty#22] + +(17) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#21, isEmpty#22] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#23] + +(18) HashAggregate [codegen id : 24] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#21, isEmpty#22] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#24] +Results [2]: [c_customer_id#2 AS customer_id#25, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#24 AS year_total#26] + +(19) Filter [codegen id : 24] +Input [2]: [customer_id#25, year_total#26] +Condition : (isnotnull(year_total#26) AND (year_total#26 > 0.000000)) + +(20) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(22) Filter [codegen id : 6] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [6]: [ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#10] +Join condition: None + +(25) Project [codegen id : 6] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_customer_sk#10, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14] + +(26) Scan parquet default.date_dim +Output [2]: [d_date_sk#16, d_year#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#16, d_year#17] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#16, d_year#17] +Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2002)) AND isnotnull(d_date_sk#16)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#16, d_year#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(31) Project [codegen id : 6] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_sold_date_sk#9, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_date_sk#16, d_year#17] + +(32) HashAggregate [codegen id : 6] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_ext_list_price#14, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#28, isEmpty#29] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#30, isEmpty#31] + +(33) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#30, isEmpty#31] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#32] + +(34) HashAggregate [codegen id : 7] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#30, isEmpty#31] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#33] +Results [8]: [c_customer_id#2 AS customer_id#34, c_first_name#3 AS customer_first_name#35, c_last_name#4 AS customer_last_name#36, c_preferred_cust_flag#5 AS customer_preferred_cust_flag#37, c_birth_country#6 AS customer_birth_country#38, c_login#7 AS customer_login#39, c_email_address#8 AS customer_email_address#40, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#14 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#13 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#11 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#12 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#33 AS year_total#41] + +(35) BroadcastExchange +Input [8]: [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#42] + +(36) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#34] +Join condition: None + +(37) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(39) Filter [codegen id : 10] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(40) Scan parquet default.catalog_sales +Output [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] + +(42) Filter [codegen id : 8] +Input [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] +Condition : (isnotnull(cs_bill_customer_sk#44) AND isnotnull(cs_sold_date_sk#43)) + +(43) BroadcastExchange +Input [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#49] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_bill_customer_sk#44] +Join condition: None + +(45) Project [codegen id : 10] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#16, d_year#17] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#43] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(48) Project [codegen id : 10] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_date_sk#16, d_year#17] + +(49) HashAggregate [codegen id : 10] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#50, isEmpty#51] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#52, isEmpty#53] + +(50) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#52, isEmpty#53] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#54] + +(51) HashAggregate [codegen id : 11] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#52, isEmpty#53] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#55] +Results [2]: [c_customer_id#2 AS customer_id#56, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#55 AS year_total#57] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#56, year_total#57] +Condition : (isnotnull(year_total#57) AND (year_total#57 > 0.000000)) + +(53) Project [codegen id : 11] +Output [2]: [customer_id#56 AS customer_id#58, year_total#57 AS year_total#59] +Input [2]: [customer_id#56, year_total#57] + +(54) BroadcastExchange +Input [2]: [customer_id#58, year_total#59] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#60] + +(55) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#58] +Join condition: None + +(56) Project [codegen id : 24] +Output [11]: [customer_id#25, year_total#26, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41, year_total#59] +Input [12]: [customer_id#25, year_total#26, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41, customer_id#58, year_total#59] + +(57) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 14] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(59) Filter [codegen id : 14] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(60) ReusedExchange [Reuses operator id: 43] +Output [6]: [cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_bill_customer_sk#44] +Join condition: None + +(62) Project [codegen id : 14] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_bill_customer_sk#44, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48] + +(63) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#16, d_year#17] + +(64) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [cs_sold_date_sk#43] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(65) Project [codegen id : 14] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_sold_date_sk#43, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_date_sk#16, d_year#17] + +(66) HashAggregate [codegen id : 14] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, cs_ext_discount_amt#45, cs_ext_sales_price#46, cs_ext_wholesale_cost#47, cs_ext_list_price#48, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#61, isEmpty#62] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#63, isEmpty#64] + +(67) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#63, isEmpty#64] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#65] + +(68) HashAggregate [codegen id : 15] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#63, isEmpty#64] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#66] +Results [2]: [c_customer_id#2 AS customer_id#67, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#48 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#47 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#45 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#46 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#66 AS year_total#68] + +(69) BroadcastExchange +Input [2]: [customer_id#67, year_total#68] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#69] + +(70) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#67] +Join condition: (CASE WHEN (year_total#59 > 0.000000) THEN CheckOverflow((promote_precision(year_total#68) / promote_precision(year_total#59)), DecimalType(38,14), true) ELSE null END > CASE WHEN (year_total#26 > 0.000000) THEN CheckOverflow((promote_precision(year_total#41) / promote_precision(year_total#26)), DecimalType(38,14), true) ELSE null END) + +(71) Project [codegen id : 24] +Output [10]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68] +Input [13]: [customer_id#25, year_total#26, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#41, year_total#59, customer_id#67, year_total#68] + +(72) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 18] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(74) Filter [codegen id : 18] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(75) Scan parquet default.web_sales +Output [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 16] +Input [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] + +(77) Filter [codegen id : 16] +Input [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] +Condition : (isnotnull(ws_bill_customer_sk#71) AND isnotnull(ws_sold_date_sk#70)) + +(78) BroadcastExchange +Input [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#76] + +(79) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#71] +Join condition: None + +(80) Project [codegen id : 18] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] + +(81) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#16, d_year#17] + +(82) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#70] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(83) Project [codegen id : 18] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_date_sk#16, d_year#17] + +(84) HashAggregate [codegen id : 18] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#77, isEmpty#78] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#79, isEmpty#80] + +(85) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#79, isEmpty#80] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#81] + +(86) HashAggregate [codegen id : 19] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#79, isEmpty#80] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#82] +Results [2]: [c_customer_id#2 AS customer_id#83, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#82 AS year_total#84] + +(87) Filter [codegen id : 19] +Input [2]: [customer_id#83, year_total#84] +Condition : (isnotnull(year_total#84) AND (year_total#84 > 0.000000)) + +(88) Project [codegen id : 19] +Output [2]: [customer_id#83 AS customer_id#85, year_total#84 AS year_total#86] +Input [2]: [customer_id#83, year_total#84] + +(89) BroadcastExchange +Input [2]: [customer_id#85, year_total#86] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#87] + +(90) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#85] +Join condition: None + +(91) Project [codegen id : 24] +Output [11]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68, year_total#86] +Input [12]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68, customer_id#85, year_total#86] + +(92) Scan parquet default.customer +Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(93) ColumnarToRow [codegen id : 22] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] + +(94) Filter [codegen id : 22] +Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(95) ReusedExchange [Reuses operator id: 78] +Output [6]: [ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] + +(96) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#71] +Join condition: None + +(97) Project [codegen id : 22] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] +Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_bill_customer_sk#71, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75] + +(98) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#16, d_year#17] + +(99) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#70] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(100) Project [codegen id : 22] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_sold_date_sk#70, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_date_sk#16, d_year#17] + +(101) HashAggregate [codegen id : 22] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ws_ext_discount_amt#72, ws_ext_sales_price#73, ws_ext_wholesale_cost#74, ws_ext_list_price#75, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#88, isEmpty#89] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#90, isEmpty#91] + +(102) Exchange +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#90, isEmpty#91] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), true, [id=#92] + +(103) HashAggregate [codegen id : 23] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#90, isEmpty#91] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#93] +Results [2]: [c_customer_id#2 AS customer_id#94, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#75 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#74 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#72 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#73 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#93 AS year_total#95] + +(104) BroadcastExchange +Input [2]: [customer_id#94, year_total#95] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#96] + +(105) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#94] +Join condition: (CASE WHEN (year_total#59 > 0.000000) THEN CheckOverflow((promote_precision(year_total#68) / promote_precision(year_total#59)), DecimalType(38,14), true) ELSE null END > CASE WHEN (year_total#86 > 0.000000) THEN CheckOverflow((promote_precision(year_total#95) / promote_precision(year_total#86)), DecimalType(38,14), true) ELSE null END) + +(106) Project [codegen id : 24] +Output [7]: [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40] +Input [13]: [customer_id#25, customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40, year_total#59, year_total#68, year_total#86, customer_id#94, year_total#95] + +(107) TakeOrderedAndProject +Input [7]: [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40] +Arguments: 100, [customer_id#34 ASC NULLS FIRST, customer_first_name#35 ASC NULLS FIRST, customer_last_name#36 ASC NULLS FIRST, customer_preferred_cust_flag#37 ASC NULLS FIRST, customer_birth_country#38 ASC NULLS FIRST, customer_login#39 ASC NULLS FIRST, customer_email_address#40 ASC NULLS FIRST], [customer_id#34, customer_first_name#35, customer_last_name#36, customer_preferred_cust_flag#37, customer_birth_country#38, customer_login#39, customer_email_address#40] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/simplified.txt index af3c3b475..017b34451 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q4/simplified.txt @@ -1,165 +1,158 @@ -TakeOrderedAndProject [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag] - WholeStageCodegen - Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag] +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] + WholeStageCodegen (24) + Project [customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address] BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] - Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total,year_total] + Project [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] - Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total] + Project [customer_id,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] - Project [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] BroadcastHashJoin [customer_id,customer_id] - InputAdapter - Union - WholeStageCodegen - Filter [year_total] - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_id,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] - InputAdapter - Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #1 - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,sum,sum] [sum,sum] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_sold_date_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [customer_id,year_total] [customer_id,year_total] - LocalTableScan [customer_id,year_total] [customer_id,year_total] + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - Union - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] - InputAdapter - Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #5 - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,sum,sum] [sum,sum] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - InputAdapter - ReusedExchange [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_sold_date_sk] #2 - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total] [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total] - LocalTableScan [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total] [customer_birth_country,customer_email_address,customer_first_name,customer_id,customer_last_name,customer_login,customer_preferred_cust_flag,year_total] + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ss_ext_list_price,ss_ext_wholesale_cost,ss_ext_discount_amt,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ss_sold_date_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ss_sold_date_sk,ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #7 - Union - LocalTableScan [customer_id,year_total] [customer_id,year_total] - WholeStageCodegen + WholeStageCodegen (11) + Project [customer_id,year_total] Filter [year_total] - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_id,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty] InputAdapter - Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #8 - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year,sum,sum] [sum,sum] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #8 + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,cs_ext_list_price,cs_ext_wholesale_cost,cs_ext_discount_amt,cs_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,d_year] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_sold_date_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price] BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] InputAdapter BroadcastExchange #9 - WholeStageCodegen - Project [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] + WholeStageCodegen (8) + Filter [cs_bill_customer_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price] InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 - LocalTableScan [customer_id,year_total] [customer_id,year_total] + ReusedExchange [d_date_sk,d_year] #3 InputAdapter BroadcastExchange #10 - Union - LocalTableScan [customer_id,year_total] [customer_id,year_total] - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_id,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] - InputAdapter - Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #11 - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year,sum,sum] [sum,sum] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,d_year] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - InputAdapter - ReusedExchange [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_list_price,cs_ext_sales_price,cs_ext_wholesale_cost,cs_sold_date_sk] #9 - InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 - LocalTableScan [customer_id,year_total] [customer_id,year_total] + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,cs_ext_list_price,cs_ext_wholesale_cost,cs_ext_discount_amt,cs_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,d_year] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,cs_sold_date_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price] + BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [cs_sold_date_sk,cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 InputAdapter BroadcastExchange #12 - Union - LocalTableScan [customer_id,year_total] [customer_id,year_total] - LocalTableScan [customer_id,year_total] [customer_id,year_total] - WholeStageCodegen + WholeStageCodegen (19) + Project [customer_id,year_total] Filter [year_total] - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_id,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty] InputAdapter - Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #13 - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] [sum,sum] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #13 + WholeStageCodegen (18) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_discount_amt,ws_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_sold_date_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] InputAdapter BroadcastExchange #14 - WholeStageCodegen - Project [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] + WholeStageCodegen (16) + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price] InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 + ReusedExchange [d_date_sk,d_year] #3 InputAdapter BroadcastExchange #15 - Union - LocalTableScan [customer_id,year_total] [customer_id,year_total] - LocalTableScan [customer_id,year_total] [customer_id,year_total] - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] [customer_id,sum,sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),year_total] - InputAdapter - Exchange [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year] #16 - WholeStageCodegen - HashAggregate [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,sum,sum,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] [sum,sum] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,d_year,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [c_birth_country,c_customer_id,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] [c_birth_country,c_customer_id,c_customer_sk,c_email_address,c_first_name,c_last_name,c_login,c_preferred_cust_flag] - InputAdapter - ReusedExchange [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_ext_sales_price,ws_ext_wholesale_cost,ws_sold_date_sk] #14 - InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 + WholeStageCodegen (23) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true)),customer_id,year_total,sum,isEmpty] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #16 + WholeStageCodegen (22) + HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,ws_ext_list_price,ws_ext_wholesale_cost,ws_ext_discount_amt,ws_ext_sales_price] [sum,isEmpty,sum,isEmpty] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,ws_sold_date_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] + InputAdapter + ReusedExchange [ws_sold_date_sk,ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price] #14 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/explain.txt index 04080cd89..210469223 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/explain.txt @@ -1,32 +1,183 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[w_state#1 ASC NULLS FIRST,i_item_id#2 ASC NULLS FIRST], output=[w_state#1,i_item_id#2,sales_before#3,sales_after#4]) -+- *(6) HashAggregate(keys=[w_state#1, i_item_id#2], functions=[sum(CASE WHEN (d_date#5 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), sum(CASE WHEN (d_date#5 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) - +- Exchange hashpartitioning(w_state#1, i_item_id#2, 5) - +- *(5) HashAggregate(keys=[w_state#1, i_item_id#2], functions=[partial_sum(CASE WHEN (d_date#5 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#5 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#7 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]) - +- *(5) Project [cs_sales_price#6, cr_refunded_cash#7, w_state#1, i_item_id#2, d_date#5] - +- *(5) BroadcastHashJoin [cs_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight - :- *(5) Project [cs_sold_date_sk#8, cs_sales_price#6, cr_refunded_cash#7, w_state#1, i_item_id#2] - : +- *(5) BroadcastHashJoin [cs_item_sk#10], [i_item_sk#11], Inner, BuildRight - : :- *(5) Project [cs_sold_date_sk#8, cs_item_sk#10, cs_sales_price#6, cr_refunded_cash#7, w_state#1] - : : +- *(5) BroadcastHashJoin [cs_warehouse_sk#12], [w_warehouse_sk#13], Inner, BuildRight - : : :- *(5) Project [cs_sold_date_sk#8, cs_warehouse_sk#12, cs_item_sk#10, cs_sales_price#6, cr_refunded_cash#7] - : : : +- *(5) BroadcastHashJoin [cs_order_number#14, cs_item_sk#10], [cr_order_number#15, cr_item_sk#16], LeftOuter, BuildRight - : : : :- *(5) Project [cs_sold_date_sk#8, cs_warehouse_sk#12, cs_item_sk#10, cs_order_number#14, cs_sales_price#6] - : : : : +- *(5) Filter ((isnotnull(cs_warehouse_sk#12) && isnotnull(cs_item_sk#10)) && isnotnull(cs_sold_date_sk#8)) - : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#8,cs_warehouse_sk#12,cs_item_sk#10,cs_order_number#14,cs_sales_price#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [w_warehouse_sk#13, w_state#1] - : : +- *(2) Filter isnotnull(w_warehouse_sk#13) - : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#13,w_state#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [i_item_sk#11, i_item_id#2] - : +- *(3) Filter (((isnotnull(i_current_price#17) && (i_current_price#17 >= 0.99)) && (i_current_price#17 <= 1.49)) && isnotnull(i_item_sk#11)) - : +- *(3) FileScan parquet default.item[i_item_sk#11,i_item_id#2,i_current_price#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [d_date_sk#9, d_date#5] - +- *(4) Filter (((isnotnull(d_date#5) && (d_date#5 >= 10997)) && (d_date#5 <= 11057)) && isnotnull(d_date_sk#9)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#9,d_date#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), Is..., ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.catalog_returns (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.warehouse (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.item (16) + +- BroadcastExchange (26) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet default.date_dim (23) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] + +(3) Filter [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5] +Condition : ((isnotnull(cs_warehouse_sk#2) AND isnotnull(cs_item_sk#3)) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.catalog_returns +Output [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] + +(6) Filter [codegen id : 1] +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Condition : (isnotnull(cr_order_number#7) AND isnotnull(cr_item_sk#6)) + +(7) BroadcastExchange +Input [3]: [cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#9] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [2]: [cs_order_number#4, cs_item_sk#3] +Right keys [2]: [cr_order_number#7, cr_item_sk#6] +Join condition: None + +(9) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8] +Input [8]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_order_number#4, cs_sales_price#5, cr_item_sk#6, cr_order_number#7, cr_refunded_cash#8] + +(10) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#10, w_state#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#10, w_state#11] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#10, w_state#11] +Condition : isnotnull(w_warehouse_sk#10) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#10, w_state#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_warehouse_sk#2] +Right keys [1]: [w_warehouse_sk#10] +Join condition: None + +(15) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8, w_state#11] +Input [7]: [cs_sold_date_sk#1, cs_warehouse_sk#2, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8, w_warehouse_sk#10, w_state#11] + +(16) Scan parquet default.item +Output [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] + +(18) Filter [codegen id : 3] +Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] +Condition : (((isnotnull(i_current_price#15) AND (i_current_price#15 >= 0.99)) AND (i_current_price#15 <= 1.49)) AND isnotnull(i_item_sk#13)) + +(19) Project [codegen id : 3] +Output [2]: [i_item_sk#13, i_item_id#14] +Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] + +(20) BroadcastExchange +Input [2]: [i_item_sk#13, i_item_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_item_sk#3] +Right keys [1]: [i_item_sk#13] +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14] +Input [7]: [cs_sold_date_sk#1, cs_item_sk#3, cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_sk#13, i_item_id#14] + +(23) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_date#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#17, d_date#18] + +(25) Filter [codegen id : 4] +Input [2]: [d_date_sk#17, d_date#18] +Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 10997)) AND (d_date#18 <= 11057)) AND isnotnull(d_date_sk#17)) + +(26) BroadcastExchange +Input [2]: [d_date_sk#17, d_date#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14, d_date#18] +Input [7]: [cs_sold_date_sk#1, cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14, d_date_sk#17, d_date#18] + +(29) HashAggregate [codegen id : 5] +Input [5]: [cs_sales_price#5, cr_refunded_cash#8, w_state#11, i_item_id#14, d_date#18] +Keys [2]: [w_state#11, i_item_id#14] +Functions [2]: [partial_sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)] +Aggregate Attributes [4]: [sum#20, isEmpty#21, sum#22, isEmpty#23] +Results [6]: [w_state#11, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27] + +(30) Exchange +Input [6]: [w_state#11, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27] +Arguments: hashpartitioning(w_state#11, i_item_id#14, 5), true, [id=#28] + +(31) HashAggregate [codegen id : 6] +Input [6]: [w_state#11, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27] +Keys [2]: [w_state#11, i_item_id#14] +Functions [2]: [sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END), sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#29, sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#30] +Results [4]: [w_state#11, i_item_id#14, sum(CASE WHEN (d_date#18 < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#29 AS sales_before#31, sum(CASE WHEN (d_date#18 >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price#5 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#8 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END)#30 AS sales_after#32] + +(32) TakeOrderedAndProject +Input [4]: [w_state#11, i_item_id#14, sales_before#31, sales_after#32] +Arguments: 100, [w_state#11 ASC NULLS FIRST, i_item_id#14 ASC NULLS FIRST], [w_state#11, i_item_id#14, sales_before#31, sales_after#32] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt index fcc311d0d..b1e0f1f17 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q40/simplified.txt @@ -1,42 +1,48 @@ -TakeOrderedAndProject [i_item_id,sales_after,sales_before,w_state] - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),w_state] [sales_after,sales_before,sum,sum,sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] +TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] + WholeStageCodegen (6) + HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END),sum(CASE WHEN (d_date >= 11027) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [i_item_id,w_state] #1 - WholeStageCodegen - HashAggregate [cr_refunded_cash,cs_sales_price,d_date,i_item_id,sum,sum,sum,sum,w_state] [sum,sum,sum,sum] - Project [cr_refunded_cash,cs_sales_price,d_date,i_item_id,w_state] + Exchange [w_state,i_item_id] #1 + WholeStageCodegen (5) + HashAggregate [w_state,i_item_id,d_date,cs_sales_price,cr_refunded_cash] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Project [cs_sales_price,cr_refunded_cash,w_state,i_item_id,d_date] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cr_refunded_cash,cs_sales_price,cs_sold_date_sk,i_item_id,w_state] + Project [cs_sold_date_sk,cs_sales_price,cr_refunded_cash,w_state,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cr_refunded_cash,cs_item_sk,cs_sales_price,cs_sold_date_sk,w_state] + Project [cs_sold_date_sk,cs_item_sk,cs_sales_price,cr_refunded_cash,w_state] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cr_refunded_cash,cs_item_sk,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] - Filter [cs_item_sk,cs_sold_date_sk,cs_warehouse_sk] - Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] [cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk,cs_warehouse_sk] + Project [cs_sold_date_sk,cs_warehouse_sk,cs_item_sk,cs_sales_price,cr_refunded_cash] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Filter [cs_warehouse_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_refunded_cash] - Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash] [cr_item_sk,cr_order_number,cr_refunded_cash] + WholeStageCodegen (1) + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [w_state,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_state,w_warehouse_sk] [w_state,w_warehouse_sk] + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk,w_state] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [i_item_id,i_item_sk] + WholeStageCodegen (3) + Project [i_item_sk,i_item_id] Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_current_price,i_item_id,i_item_sk] [i_current_price,i_item_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_current_price] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + WholeStageCodegen (4) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt index 146160760..13d73e61e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/explain.txt @@ -1,19 +1,120 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_product_name#1 ASC NULLS FIRST], output=[i_product_name#1]) -+- *(4) HashAggregate(keys=[i_product_name#1], functions=[]) - +- Exchange hashpartitioning(i_product_name#1, 5) - +- *(3) HashAggregate(keys=[i_product_name#1], functions=[]) - +- *(3) Project [i_product_name#1] - +- *(3) BroadcastHashJoin [i_manufact#2], [i_manufact#2#3], Inner, BuildRight - :- *(3) Project [i_manufact#2, i_product_name#1] - : +- *(3) Filter (((isnotnull(i_manufact_id#4) && (i_manufact_id#4 >= 738)) && (i_manufact_id#4 <= 778)) && isnotnull(i_manufact#2)) - : +- *(3) FileScan parquet default.item[i_manufact_id#4,i_manufact#2,i_product_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,7..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(2) Project [i_manufact#2#3] - +- *(2) Filter (if (isnull(alwaysTrue#5)) 0 else item_cnt#6 > 0) - +- *(2) HashAggregate(keys=[i_manufact#2], functions=[count(1)]) - +- Exchange hashpartitioning(i_manufact#2, 5) - +- *(1) HashAggregate(keys=[i_manufact#2], functions=[partial_count(1)]) - +- *(1) Project [i_manufact#2] - +- *(1) Filter (((((i_category#7 = Women) && (((((i_color#8 = powder) || (i_color#8 = khaki)) && ((i_units#9 = Ounce) || (i_units#9 = Oz))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = brown) || (i_color#8 = honeydew)) && ((i_units#9 = Bunch) || (i_units#9 = Ton))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = floral) || (i_color#8 = deep)) && ((i_units#9 = N/A) || (i_units#9 = Dozen))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = light) || (i_color#8 = cornflower)) && ((i_units#9 = Box) || (i_units#9 = Pound))) && ((i_size#10 = medium) || (i_size#10 = extra large)))))) || (((i_category#7 = Women) && (((((i_color#8 = midnight) || (i_color#8 = snow)) && ((i_units#9 = Pallet) || (i_units#9 = Gross))) && ((i_size#10 = medium) || (i_size#10 = extra large))) || ((((i_color#8 = cyan) || (i_color#8 = papaya)) && ((i_units#9 = Cup) || (i_units#9 = Dram))) && ((i_size#10 = N/A) || (i_size#10 = small))))) || ((i_category#7 = Men) && (((((i_color#8 = orange) || (i_color#8 = frosted)) && ((i_units#9 = Each) || (i_units#9 = Tbl))) && ((i_size#10 = petite) || (i_size#10 = large))) || ((((i_color#8 = forest) || (i_color#8 = ghost)) && ((i_units#9 = Lb) || (i_units#9 = Bundle))) && ((i_size#10 = medium) || (i_size#10 = extra large))))))) && isnotnull(i_manufact#2)) - +- *(1) FileScan parquet default.item[i_category#7,i_manufact#2,i_size#10,i_color#8,i_units#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,powder),EqualTo(i_color,khaki)..., ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (20) ++- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (4) + : +- * Filter (3) + : +- * ColumnarToRow (2) + : +- Scan parquet default.item (1) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * HashAggregate (11) + +- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet default.item (5) + + +(1) Scan parquet default.item +Output [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), GreaterThanOrEqual(i_manufact_id,738), LessThanOrEqual(i_manufact_id,778), IsNotNull(i_manufact)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(3) Filter [codegen id : 3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] +Condition : (((isnotnull(i_manufact_id#1) AND (i_manufact_id#1 >= 738)) AND (i_manufact_id#1 <= 778)) AND isnotnull(i_manufact#2)) + +(4) Project [codegen id : 3] +Output [2]: [i_manufact#2, i_product_name#3] +Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3] + +(5) Scan parquet default.item +Output [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,powder),EqualTo(i_color,khaki)),Or(EqualTo(i_units,Ounce),EqualTo(i_units,Oz))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))),And(And(Or(EqualTo(i_color,brown),EqualTo(i_color,honeydew)),Or(EqualTo(i_units,Bunch),EqualTo(i_units,Ton))),Or(EqualTo(i_size,N/A),EqualTo(i_size,small))))),And(EqualTo(i_category,Men),Or(And(And(Or(EqualTo(i_color,floral),EqualTo(i_color,deep)),Or(EqualTo(i_units,N/A),EqualTo(i_units,Dozen))),Or(EqualTo(i_size,petite),EqualTo(i_size,large))),And(And(Or(EqualTo(i_color,light),EqualTo(i_color,cornflower)),Or(EqualTo(i_units,Box),EqualTo(i_units,Pound))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large)))))),Or(And(EqualTo(i_category,Women),Or(And(And(Or(EqualTo(i_color,midnight),EqualTo(i_color,snow)),Or(EqualTo(i_units,Pallet),EqualTo(i_units,Gross))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))),And(And(Or(EqualTo(i_color,cyan),EqualTo(i_color,papaya)),Or(EqualTo(i_units,Cup),EqualTo(i_units,Dram))),Or(EqualTo(i_size,N/A),EqualTo(i_size,small))))),And(EqualTo(i_category,Men),Or(And(And(Or(EqualTo(i_color,orange),EqualTo(i_color,frosted)),Or(EqualTo(i_units,Each),EqualTo(i_units,Tbl))),Or(EqualTo(i_size,petite),EqualTo(i_size,large))),And(And(Or(EqualTo(i_color,forest),EqualTo(i_color,ghost)),Or(EqualTo(i_units,Lb),EqualTo(i_units,Bundle))),Or(EqualTo(i_size,medium),EqualTo(i_size,extra large))))))), IsNotNull(i_manufact)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] + +(7) Filter [codegen id : 1] +Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] +Condition : (((((i_category#4 = Women) AND (((((i_color#6 = powder) OR (i_color#6 = khaki)) AND ((i_units#7 = Ounce) OR (i_units#7 = Oz))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))) OR ((((i_color#6 = brown) OR (i_color#6 = honeydew)) AND ((i_units#7 = Bunch) OR (i_units#7 = Ton))) AND ((i_size#5 = N/A) OR (i_size#5 = small))))) OR ((i_category#4 = Men) AND (((((i_color#6 = floral) OR (i_color#6 = deep)) AND ((i_units#7 = N/A) OR (i_units#7 = Dozen))) AND ((i_size#5 = petite) OR (i_size#5 = large))) OR ((((i_color#6 = light) OR (i_color#6 = cornflower)) AND ((i_units#7 = Box) OR (i_units#7 = Pound))) AND ((i_size#5 = medium) OR (i_size#5 = extra large)))))) OR (((i_category#4 = Women) AND (((((i_color#6 = midnight) OR (i_color#6 = snow)) AND ((i_units#7 = Pallet) OR (i_units#7 = Gross))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))) OR ((((i_color#6 = cyan) OR (i_color#6 = papaya)) AND ((i_units#7 = Cup) OR (i_units#7 = Dram))) AND ((i_size#5 = N/A) OR (i_size#5 = small))))) OR ((i_category#4 = Men) AND (((((i_color#6 = orange) OR (i_color#6 = frosted)) AND ((i_units#7 = Each) OR (i_units#7 = Tbl))) AND ((i_size#5 = petite) OR (i_size#5 = large))) OR ((((i_color#6 = forest) OR (i_color#6 = ghost)) AND ((i_units#7 = Lb) OR (i_units#7 = Bundle))) AND ((i_size#5 = medium) OR (i_size#5 = extra large))))))) AND isnotnull(i_manufact#2)) + +(8) Project [codegen id : 1] +Output [1]: [i_manufact#2] +Input [5]: [i_category#4, i_manufact#2, i_size#5, i_color#6, i_units#7] + +(9) HashAggregate [codegen id : 1] +Input [1]: [i_manufact#2] +Keys [1]: [i_manufact#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#8] +Results [2]: [i_manufact#2, count#9] + +(10) Exchange +Input [2]: [i_manufact#2, count#9] +Arguments: hashpartitioning(i_manufact#2, 5), true, [id=#10] + +(11) HashAggregate [codegen id : 2] +Input [2]: [i_manufact#2, count#9] +Keys [1]: [i_manufact#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#11] +Results [2]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13] + +(12) Filter [codegen id : 2] +Input [2]: [item_cnt#12, i_manufact#2#13] +Condition : (item_cnt#12 > 0) + +(13) Project [codegen id : 2] +Output [1]: [i_manufact#2#13] +Input [2]: [item_cnt#12, i_manufact#2#13] + +(14) BroadcastExchange +Input [1]: [i_manufact#2#13] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [i_manufact#2] +Right keys [1]: [i_manufact#2#13] +Join condition: None + +(16) Project [codegen id : 3] +Output [1]: [i_product_name#3] +Input [3]: [i_manufact#2, i_product_name#3, i_manufact#2#13] + +(17) HashAggregate [codegen id : 3] +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] +Aggregate Attributes: [] +Results [1]: [i_product_name#3] + +(18) Exchange +Input [1]: [i_product_name#3] +Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#15] + +(19) HashAggregate [codegen id : 4] +Input [1]: [i_product_name#3] +Keys [1]: [i_product_name#3] +Functions: [] +Aggregate Attributes: [] +Results [1]: [i_product_name#3] + +(20) TakeOrderedAndProject +Input [1]: [i_product_name#3] +Arguments: 100, [i_product_name#3 ASC NULLS FIRST], [i_product_name#3] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt index 7196f719a..2d14d75ca 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q41/simplified.txt @@ -1,25 +1,29 @@ TakeOrderedAndProject [i_product_name] - WholeStageCodegen + WholeStageCodegen (4) HashAggregate [i_product_name] InputAdapter Exchange [i_product_name] #1 - WholeStageCodegen + WholeStageCodegen (3) HashAggregate [i_product_name] Project [i_product_name] BroadcastHashJoin [i_manufact,i_manufact] Project [i_manufact,i_product_name] - Filter [i_manufact,i_manufact_id] - Scan parquet default.item [i_manufact,i_manufact_id,i_product_name] [i_manufact,i_manufact_id,i_product_name] + Filter [i_manufact_id,i_manufact] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_manufact_id,i_manufact,i_product_name] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (2) Project [i_manufact] - Filter [alwaysTrue,item_cnt] - HashAggregate [count,count(1),i_manufact] [alwaysTrue,count,count(1),i_manufact,item_cnt] + Filter [item_cnt] + HashAggregate [i_manufact,count] [count(1),item_cnt,i_manufact,count] InputAdapter Exchange [i_manufact] #3 - WholeStageCodegen - HashAggregate [count,count,i_manufact] [count,count] + WholeStageCodegen (1) + HashAggregate [i_manufact] [count,count] Project [i_manufact] - Filter [i_category,i_color,i_manufact,i_size,i_units] - Scan parquet default.item [i_category,i_color,i_manufact,i_size,i_units] [i_category,i_color,i_manufact,i_size,i_units] + Filter [i_category,i_color,i_units,i_size,i_manufact] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_category,i_manufact,i_size,i_color,i_units] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt index 932860551..f7732f3c8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/explain.txt @@ -1,20 +1,122 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[sum(ss_ext_sales_price)#1 DESC NULLS LAST,d_year#2 ASC NULLS FIRST,i_category_id#3 ASC NULLS FIRST,i_category#4 ASC NULLS FIRST], output=[d_year#2,i_category_id#3,i_category#4,sum(ss_ext_sales_price)#1]) -+- *(4) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) - +- Exchange hashpartitioning(d_year#2, i_category_id#3, i_category#4, 5) - +- *(3) HashAggregate(keys=[d_year#2, i_category_id#3, i_category#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) - +- *(3) Project [d_year#2, ss_ext_sales_price#5, i_category_id#3, i_category#4] - +- *(3) BroadcastHashJoin [ss_item_sk#6], [i_item_sk#7], Inner, BuildRight - :- *(3) Project [d_year#2, ss_item_sk#6, ss_ext_sales_price#5] - : +- *(3) BroadcastHashJoin [d_date_sk#8], [ss_sold_date_sk#9], Inner, BuildRight - : :- *(3) Project [d_date_sk#8, d_year#2] - : : +- *(3) Filter ((((isnotnull(d_moy#10) && isnotnull(d_year#2)) && (d_moy#10 = 11)) && (d_year#2 = 2000)) && isnotnull(d_date_sk#8)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_year#2,d_moy#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#6, ss_ext_sales_price#5] - : +- *(1) Filter (isnotnull(ss_sold_date_sk#9) && isnotnull(ss_item_sk#6)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#6,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [i_item_sk#7, i_category_id#3, i_category#4] - +- *(2) Filter ((isnotnull(i_manager_id#11) && (i_manager_id#11 = 1)) && isnotnull(i_item_sk#7)) - +- *(2) FileScan parquet default.item[i_item_sk#7,i_category_id#3,i_category#4,i_manager_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_category_id#9, i_category#10] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_category_id#9, i_category#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#6, i_category_id#9, i_category#10] +Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_category_id#9, i_category#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#6, i_category_id#9, i_category#10] +Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] + +(19) Exchange +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] +Arguments: hashpartitioning(d_year#2, i_category_id#9, i_category#10, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] +Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] +Results [4]: [d_year#2, i_category_id#9, i_category#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS sum(ss_ext_sales_price)#17] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] +Arguments: 100, [sum(ss_ext_sales_price)#17 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt index 67de156ac..d9bb6de20 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q42/simplified.txt @@ -1,26 +1,31 @@ -TakeOrderedAndProject [d_year,i_category,i_category_id,sum(ss_ext_sales_price)] - WholeStageCodegen - HashAggregate [d_year,i_category,i_category_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price)] +TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category] + WholeStageCodegen (4) + HashAggregate [d_year,i_category_id,i_category,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(ss_ext_sales_price),sum] InputAdapter - Exchange [d_year,i_category,i_category_id] #1 - WholeStageCodegen - HashAggregate [d_year,i_category,i_category_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [d_year,i_category,i_category_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [d_year,ss_ext_sales_price,ss_item_sk] + Exchange [d_year,i_category_id,i_category] #1 + WholeStageCodegen (3) + HashAggregate [d_year,i_category_id,i_category,ss_ext_sales_price] [sum,sum] + Project [d_year,ss_ext_sales_price,i_category_id,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_year] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + WholeStageCodegen (1) + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_category,i_category_id,i_item_sk] - Filter [i_item_sk,i_manager_id] - Scan parquet default.item [i_category,i_category_id,i_item_sk,i_manager_id] [i_category,i_category_id,i_item_sk,i_manager_id] + WholeStageCodegen (2) + Project [i_item_sk,i_category_id,i_category] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_category_id,i_category,i_manager_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt index c5ec4b687..8f3ef7fee 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/explain.txt @@ -1,20 +1,122 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_store_id#2 ASC NULLS FIRST,sun_sales#3 ASC NULLS FIRST,mon_sales#4 ASC NULLS FIRST,tue_sales#5 ASC NULLS FIRST,wed_sales#6 ASC NULLS FIRST,thu_sales#7 ASC NULLS FIRST,fri_sales#8 ASC NULLS FIRST,sat_sales#9 ASC NULLS FIRST], output=[s_store_name#1,s_store_id#2,sun_sales#3,mon_sales#4,tue_sales#5,wed_sales#6,thu_sales#7,fri_sales#8,sat_sales#9]) -+- *(4) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) - +- Exchange hashpartitioning(s_store_name#1, s_store_id#2, 5) - +- *(3) HashAggregate(keys=[s_store_name#1, s_store_id#2], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Sunday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Monday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Tuesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Wednesday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Thursday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Friday) THEN ss_sales_price#11 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#10 = Saturday) THEN ss_sales_price#11 ELSE null END))]) - +- *(3) Project [d_day_name#10, ss_sales_price#11, s_store_id#2, s_store_name#1] - +- *(3) BroadcastHashJoin [ss_store_sk#12], [s_store_sk#13], Inner, BuildRight - :- *(3) Project [d_day_name#10, ss_store_sk#12, ss_sales_price#11] - : +- *(3) BroadcastHashJoin [d_date_sk#14], [ss_sold_date_sk#15], Inner, BuildRight - : :- *(3) Project [d_date_sk#14, d_day_name#10] - : : +- *(3) Filter ((isnotnull(d_year#16) && (d_year#16 = 2000)) && isnotnull(d_date_sk#14)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_day_name#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [ss_sold_date_sk#15, ss_store_sk#12, ss_sales_price#11] - : +- *(1) Filter (isnotnull(ss_sold_date_sk#15) && isnotnull(ss_store_sk#12)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#15,ss_store_sk#12,ss_sales_price#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [s_store_sk#13, s_store_id#2, s_store_name#1] - +- *(2) Filter ((isnotnull(s_gmt_offset#17) && (s_gmt_offset#17 = -5.00)) && isnotnull(s_store_sk#13)) - +- *(2) FileScan parquet default.store[s_store_sk#13,s_store_id#2,s_store_name#1,s_gmt_offset#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.store (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] +Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_day_name#3] +Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] +Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_store_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6] +Input [5]: [d_date_sk#1, d_day_name#3, ss_sold_date_sk#4, ss_store_sk#5, ss_sales_price#6] + +(11) Scan parquet default.store +Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] + +(13) Filter [codegen id : 2] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Condition : ((isnotnull(s_gmt_offset#11) AND (s_gmt_offset#11 = -5.00)) AND isnotnull(s_store_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] + +(15) BroadcastExchange +Input [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_day_name#3, ss_sales_price#6, s_store_id#9, s_store_name#10] +Input [6]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6, s_store_sk#8, s_store_id#9, s_store_name#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_day_name#3, ss_sales_price#6, s_store_id#9, s_store_name#10] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))] +Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] +Results [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] + +(19) Exchange +Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Arguments: hashpartitioning(s_store_name#10, s_store_id#9, 5), true, [id=#27] + +(20) HashAggregate [codegen id : 4] +Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))#34] +Results [9]: [s_store_name#10, s_store_id#9, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday) THEN ss_sales_price#6 ELSE null END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday) THEN ss_sales_price#6 ELSE null END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday) THEN ss_sales_price#6 ELSE null END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 ELSE null END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday) THEN ss_sales_price#6 ELSE null END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday) THEN ss_sales_price#6 ELSE null END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday) THEN ss_sales_price#6 ELSE null END))#34,17,2) AS sat_sales#41] + +(21) TakeOrderedAndProject +Input [9]: [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] +Arguments: 100, [s_store_name#10 ASC NULLS FIRST, s_store_id#9 ASC NULLS FIRST, sun_sales#35 ASC NULLS FIRST, mon_sales#36 ASC NULLS FIRST, tue_sales#37 ASC NULLS FIRST, wed_sales#38 ASC NULLS FIRST, thu_sales#39 ASC NULLS FIRST, fri_sales#40 ASC NULLS FIRST, sat_sales#41 ASC NULLS FIRST], [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt index 136a277e1..1694f3aab 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q43/simplified.txt @@ -1,26 +1,31 @@ -TakeOrderedAndProject [fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] - WholeStageCodegen - HashAggregate [s_store_id,s_store_name,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] +TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] + WholeStageCodegen (4) + HashAggregate [s_store_name,s_store_id,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] InputAdapter - Exchange [s_store_id,s_store_name] #1 - WholeStageCodegen - HashAggregate [d_day_name,s_store_id,s_store_name,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_day_name,s_store_id,s_store_name,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_day_name,ss_sales_price,ss_store_sk] + Exchange [s_store_name,s_store_id] #1 + WholeStageCodegen (3) + HashAggregate [s_store_name,s_store_id,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [d_day_name,ss_sales_price,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [d_day_name,ss_store_sk,ss_sales_price] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_day_name] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_day_name,d_year] [d_date_sk,d_day_name,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_day_name] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_sales_price,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (1) + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [s_store_id,s_store_name,s_store_sk] + WholeStageCodegen (2) + Project [s_store_sk,s_store_id,s_store_name] Filter [s_gmt_offset,s_store_sk] - Scan parquet default.store [s_gmt_offset,s_store_id,s_store_name,s_store_sk] [s_gmt_offset,s_store_id,s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt index aebba592c..096bd45f0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/explain.txt @@ -1,50 +1,248 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[rnk#1 ASC NULLS FIRST], output=[rnk#1,best_performing#2,worst_performing#3]) -+- *(10) Project [rnk#1, i_product_name#4 AS best_performing#2, i_product_name#5 AS worst_performing#3] - +- *(10) BroadcastHashJoin [item_sk#6], [i_item_sk#7], Inner, BuildRight - :- *(10) Project [rnk#1, item_sk#6, i_product_name#4] - : +- *(10) BroadcastHashJoin [item_sk#8], [i_item_sk#9], Inner, BuildRight - : :- *(10) Project [item_sk#8, rnk#1, item_sk#6] - : : +- *(10) BroadcastHashJoin [rnk#1], [rnk#10], Inner, BuildRight - : : :- *(10) Project [item_sk#8, rnk#1] - : : : +- *(10) Filter ((isnotnull(rnk#1) && (rnk#1 < 11)) && isnotnull(item_sk#8)) - : : : +- Window [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#1], [rank_col#11 ASC NULLS FIRST] - : : : +- *(3) Sort [rank_col#11 ASC NULLS FIRST], false, 0 - : : : +- Exchange SinglePartition - : : : +- *(2) Project [item_sk#8, rank_col#11] - : : : +- *(2) Filter (isnotnull(avg(ss_net_profit#12)#13) && (cast(avg(ss_net_profit#12)#13 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery6830)), DecimalType(13,7)))) - : : : : +- Subquery subquery6830 - : : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) - : : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) - : : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] - : : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct - : : : +- *(2) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : : +- Exchange hashpartitioning(ss_item_sk#16, 5) - : : : +- *(1) HashAggregate(keys=[ss_item_sk#16], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) - : : : +- *(1) Project [ss_item_sk#16, ss_net_profit#12] - : : : +- *(1) Filter (isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) - : : : +- *(1) FileScan parquet default.store_sales[ss_item_sk#16,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- *(7) Project [item_sk#6, rnk#10] - : : +- *(7) Filter ((isnotnull(rnk#10) && (rnk#10 < 11)) && isnotnull(item_sk#6)) - : : +- Window [rank(rank_col#17) windowspecdefinition(rank_col#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#10], [rank_col#17 DESC NULLS LAST] - : : +- *(6) Sort [rank_col#17 DESC NULLS LAST], false, 0 - : : +- Exchange SinglePartition - : : +- *(5) Project [item_sk#6, rank_col#17] - : : +- *(5) Filter (isnotnull(avg(ss_net_profit#12)#18) && (cast(avg(ss_net_profit#12)#18 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery subquery6835)), DecimalType(13,7)))) - : : : +- Subquery subquery6835 - : : : +- *(2) HashAggregate(keys=[ss_store_sk#14], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : : +- Exchange hashpartitioning(ss_store_sk#14, 5) - : : : +- *(1) HashAggregate(keys=[ss_store_sk#14], functions=[partial_avg(UnscaledValue(ss_net_profit#12))]) - : : : +- *(1) Project [ss_store_sk#14, ss_net_profit#12] - : : : +- *(1) Filter ((isnotnull(ss_store_sk#14) && (ss_store_sk#14 = 4)) && isnull(ss_addr_sk#15)) - : : : +- *(1) FileScan parquet default.store_sales[ss_addr_sk#15,ss_store_sk#14,ss_net_profit#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)], ReadSchema: struct - : : +- *(5) HashAggregate(keys=[ss_item_sk#16], functions=[avg(UnscaledValue(ss_net_profit#12))]) - : : +- ReusedExchange [ss_item_sk#16, sum#19, count#20], Exchange hashpartitioning(ss_item_sk#16, 5) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(8) Project [i_item_sk#9, i_product_name#4] - : +- *(8) Filter isnotnull(i_item_sk#9) - : +- *(8) FileScan parquet default.item[i_item_sk#9,i_product_name#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- ReusedExchange [i_item_sk#7, i_product_name#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (36) ++- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (32) + : +- * BroadcastHashJoin Inner BuildRight (31) + : :- * Project (26) + : : +- * BroadcastHashJoin Inner BuildRight (25) + : : :- * Project (14) + : : : +- * Filter (13) + : : : +- Window (12) + : : : +- * Sort (11) + : : : +- Exchange (10) + : : : +- * Project (9) + : : : +- * Filter (8) + : : : +- * HashAggregate (7) + : : : +- Exchange (6) + : : : +- * HashAggregate (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (24) + : : +- * Project (23) + : : +- * Filter (22) + : : +- Window (21) + : : +- * Sort (20) + : : +- Exchange (19) + : : +- * Project (18) + : : +- * Filter (17) + : : +- * HashAggregate (16) + : : +- ReusedExchange (15) + : +- BroadcastExchange (30) + : +- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet default.item (27) + +- ReusedExchange (33) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 1] +Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) + +(4) Project [codegen id : 1] +Output [2]: [ss_item_sk#1, ss_net_profit#3] +Input [3]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3] + +(5) HashAggregate [codegen id : 1] +Input [2]: [ss_item_sk#1, ss_net_profit#3] +Keys [1]: [ss_item_sk#1] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#4, count#5] +Results [3]: [ss_item_sk#1, sum#6, count#7] + +(6) Exchange +Input [3]: [ss_item_sk#1, sum#6, count#7] +Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#8] + +(7) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#1, sum#6, count#7] +Keys [1]: [ss_item_sk#1] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#9] +Results [3]: [ss_item_sk#1 AS item_sk#10, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS rank_col#11, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS avg(ss_net_profit#3)#12] + +(8) Filter [codegen id : 2] +Input [3]: [item_sk#10, rank_col#11, avg(ss_net_profit#3)#12] +Condition : (isnotnull(avg(ss_net_profit#3)#12) AND (cast(avg(ss_net_profit#3)#12 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7), true))) + +(9) Project [codegen id : 2] +Output [2]: [item_sk#10, rank_col#11] +Input [3]: [item_sk#10, rank_col#11, avg(ss_net_profit#3)#12] + +(10) Exchange +Input [2]: [item_sk#10, rank_col#11] +Arguments: SinglePartition, true, [id=#15] + +(11) Sort [codegen id : 3] +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank_col#11 ASC NULLS FIRST], false, 0 + +(12) Window +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#16], [rank_col#11 ASC NULLS FIRST] + +(13) Filter [codegen id : 10] +Input [3]: [item_sk#10, rank_col#11, rnk#16] +Condition : ((isnotnull(rnk#16) AND (rnk#16 < 11)) AND isnotnull(item_sk#10)) + +(14) Project [codegen id : 10] +Output [2]: [item_sk#10, rnk#16] +Input [3]: [item_sk#10, rank_col#11, rnk#16] + +(15) ReusedExchange [Reuses operator id: 6] +Output [3]: [ss_item_sk#1, sum#17, count#18] + +(16) HashAggregate [codegen id : 5] +Input [3]: [ss_item_sk#1, sum#17, count#18] +Keys [1]: [ss_item_sk#1] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#19] +Results [3]: [ss_item_sk#1 AS item_sk#20, cast((avg(UnscaledValue(ss_net_profit#3))#19 / 100.0) as decimal(11,6)) AS rank_col#21, cast((avg(UnscaledValue(ss_net_profit#3))#19 / 100.0) as decimal(11,6)) AS avg(ss_net_profit#3)#22] + +(17) Filter [codegen id : 5] +Input [3]: [item_sk#20, rank_col#21, avg(ss_net_profit#3)#22] +Condition : (isnotnull(avg(ss_net_profit#3)#22) AND (cast(avg(ss_net_profit#3)#22 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(ReusedSubquery Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7), true))) + +(18) Project [codegen id : 5] +Output [2]: [item_sk#20, rank_col#21] +Input [3]: [item_sk#20, rank_col#21, avg(ss_net_profit#3)#22] + +(19) Exchange +Input [2]: [item_sk#20, rank_col#21] +Arguments: SinglePartition, true, [id=#23] + +(20) Sort [codegen id : 6] +Input [2]: [item_sk#20, rank_col#21] +Arguments: [rank_col#21 DESC NULLS LAST], false, 0 + +(21) Window +Input [2]: [item_sk#20, rank_col#21] +Arguments: [rank(rank_col#21) windowspecdefinition(rank_col#21 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#24], [rank_col#21 DESC NULLS LAST] + +(22) Filter [codegen id : 7] +Input [3]: [item_sk#20, rank_col#21, rnk#24] +Condition : ((isnotnull(rnk#24) AND (rnk#24 < 11)) AND isnotnull(item_sk#20)) + +(23) Project [codegen id : 7] +Output [2]: [item_sk#20, rnk#24] +Input [3]: [item_sk#20, rank_col#21, rnk#24] + +(24) BroadcastExchange +Input [2]: [item_sk#20, rnk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#25] + +(25) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [rnk#16] +Right keys [1]: [rnk#24] +Join condition: None + +(26) Project [codegen id : 10] +Output [3]: [item_sk#10, rnk#16, item_sk#20] +Input [4]: [item_sk#10, rnk#16, item_sk#20, rnk#24] + +(27) Scan parquet default.item +Output [2]: [i_item_sk#26, i_product_name#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 8] +Input [2]: [i_item_sk#26, i_product_name#27] + +(29) Filter [codegen id : 8] +Input [2]: [i_item_sk#26, i_product_name#27] +Condition : isnotnull(i_item_sk#26) + +(30) BroadcastExchange +Input [2]: [i_item_sk#26, i_product_name#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(31) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [item_sk#10] +Right keys [1]: [i_item_sk#26] +Join condition: None + +(32) Project [codegen id : 10] +Output [3]: [rnk#16, item_sk#20, i_product_name#27] +Input [5]: [item_sk#10, rnk#16, item_sk#20, i_item_sk#26, i_product_name#27] + +(33) ReusedExchange [Reuses operator id: 30] +Output [2]: [i_item_sk#29, i_product_name#30] + +(34) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [item_sk#20] +Right keys [1]: [i_item_sk#29] +Join condition: None + +(35) Project [codegen id : 10] +Output [3]: [rnk#16, i_product_name#27 AS best_performing#31, i_product_name#30 AS worst_performing#32] +Input [5]: [rnk#16, item_sk#20, i_product_name#27, i_item_sk#29, i_product_name#30] + +(36) TakeOrderedAndProject +Input [3]: [rnk#16, best_performing#31, worst_performing#32] +Arguments: 100, [rnk#16 ASC NULLS FIRST], [rnk#16, best_performing#31, worst_performing#32] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +* HashAggregate (43) ++- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.store_sales (37) + + +(37) Scan parquet default.store_sales +Output [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 1] +Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] + +(39) Filter [codegen id : 1] +Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] +Condition : ((isnotnull(ss_store_sk#2) AND (ss_store_sk#2 = 4)) AND isnull(ss_addr_sk#33)) + +(40) Project [codegen id : 1] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [3]: [ss_addr_sk#33, ss_store_sk#2, ss_net_profit#3] + +(41) HashAggregate [codegen id : 1] +Input [2]: [ss_store_sk#2, ss_net_profit#3] +Keys [1]: [ss_store_sk#2] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [2]: [sum#34, count#35] +Results [3]: [ss_store_sk#2, sum#36, count#37] + +(42) Exchange +Input [3]: [ss_store_sk#2, sum#36, count#37] +Arguments: hashpartitioning(ss_store_sk#2, 5), true, [id=#38] + +(43) HashAggregate [codegen id : 2] +Input [3]: [ss_store_sk#2, sum#36, count#37] +Keys [1]: [ss_store_sk#2] +Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#39] +Results [1]: [cast((avg(UnscaledValue(ss_net_profit#3))#39 / 100.0) as decimal(11,6)) AS rank_col#40] + +Subquery:2 Hosting operator id = 17 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] + + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt index 139fd1f7b..f2106ad7a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q44/simplified.txt @@ -1,72 +1,68 @@ -TakeOrderedAndProject [best_performing,rnk,worst_performing] - WholeStageCodegen - Project [i_product_name,i_product_name,rnk] - BroadcastHashJoin [i_item_sk,item_sk] - Project [i_product_name,item_sk,rnk] - BroadcastHashJoin [i_item_sk,item_sk] - Project [item_sk,item_sk,rnk] +TakeOrderedAndProject [rnk,best_performing,worst_performing] + WholeStageCodegen (10) + Project [rnk,i_product_name,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [rnk,item_sk,i_product_name] + BroadcastHashJoin [item_sk,i_item_sk] + Project [item_sk,rnk,item_sk] BroadcastHashJoin [rnk,rnk] Project [item_sk,rnk] - Filter [item_sk,rnk] + Filter [rnk,item_sk] InputAdapter Window [rank_col] - WholeStageCodegen + WholeStageCodegen (3) Sort [rank_col] InputAdapter Exchange #1 - WholeStageCodegen + WholeStageCodegen (2) Project [item_sk,rank_col] Filter [avg(ss_net_profit)] Subquery #1 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_store_sk,sum] [avg(UnscaledValue(ss_net_profit)),count,rank_col,sum] + WholeStageCodegen (2) + HashAggregate [ss_store_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),rank_col,sum,count] InputAdapter Exchange [ss_store_sk] #3 - WholeStageCodegen - HashAggregate [count,count,ss_net_profit,ss_store_sk,sum,sum] [count,count,sum,sum] - Project [ss_net_profit,ss_store_sk] - Filter [ss_addr_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_net_profit,ss_store_sk] [ss_addr_sk,ss_net_profit,ss_store_sk] - HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] + WholeStageCodegen (1) + HashAggregate [ss_store_sk,ss_net_profit] [sum,count,sum,count] + Project [ss_store_sk,ss_net_profit] + Filter [ss_store_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit] + HashAggregate [ss_item_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),item_sk,rank_col,avg(ss_net_profit),sum,count] InputAdapter Exchange [ss_item_sk] #2 - WholeStageCodegen - HashAggregate [count,count,ss_item_sk,ss_net_profit,sum,sum] [count,count,sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_item_sk,ss_net_profit] [sum,count,sum,count] Project [ss_item_sk,ss_net_profit] Filter [ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_net_profit,ss_store_sk] [ss_item_sk,ss_net_profit,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (7) Project [item_sk,rnk] - Filter [item_sk,rnk] + Filter [rnk,item_sk] InputAdapter Window [rank_col] - WholeStageCodegen + WholeStageCodegen (6) Sort [rank_col] InputAdapter Exchange #5 - WholeStageCodegen + WholeStageCodegen (5) Project [item_sk,rank_col] Filter [avg(ss_net_profit)] - Subquery #2 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_store_sk,sum] [avg(UnscaledValue(ss_net_profit)),count,rank_col,sum] - InputAdapter - Exchange [ss_store_sk] #6 - WholeStageCodegen - HashAggregate [count,count,ss_net_profit,ss_store_sk,sum,sum] [count,count,sum,sum] - Project [ss_net_profit,ss_store_sk] - Filter [ss_addr_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_net_profit,ss_store_sk] [ss_addr_sk,ss_net_profit,ss_store_sk] - HashAggregate [avg(UnscaledValue(ss_net_profit)),count,ss_item_sk,sum] [avg(UnscaledValue(ss_net_profit)),avg(ss_net_profit),count,item_sk,rank_col,sum] + ReusedSubquery [rank_col] #1 + HashAggregate [ss_item_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),item_sk,rank_col,avg(ss_net_profit),sum,count] InputAdapter - ReusedExchange [count,ss_item_sk,sum] [count,ss_item_sk,sum] #2 + ReusedExchange [ss_item_sk,sum,count] #2 InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [i_item_sk,i_product_name] - Filter [i_item_sk] - Scan parquet default.item [i_item_sk,i_product_name] [i_item_sk,i_product_name] + BroadcastExchange #6 + WholeStageCodegen (8) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_product_name] InputAdapter - ReusedExchange [i_item_sk,i_product_name] [i_item_sk,i_product_name] #7 + ReusedExchange [i_item_sk,i_product_name] #6 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt index 87aa5f5b6..f556e9b0d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/explain.txt @@ -1,39 +1,226 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ca_zip#1 ASC NULLS FIRST,ca_city#2 ASC NULLS FIRST], output=[ca_zip#1,ca_city#2,sum(ws_sales_price)#3]) -+- *(7) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[sum(UnscaledValue(ws_sales_price#4))]) - +- Exchange hashpartitioning(ca_zip#1, ca_city#2, 5) - +- *(6) HashAggregate(keys=[ca_zip#1, ca_city#2], functions=[partial_sum(UnscaledValue(ws_sales_price#4))]) - +- *(6) Project [ws_sales_price#4, ca_city#2, ca_zip#1] - +- *(6) Filter (substring(ca_zip#1, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) || exists#5) - +- *(6) BroadcastHashJoin [i_item_id#6], [i_item_id#6#7], ExistenceJoin(exists#5), BuildRight - :- *(6) Project [ws_sales_price#4, ca_city#2, ca_zip#1, i_item_id#6] - : +- *(6) BroadcastHashJoin [ws_item_sk#8], [i_item_sk#9], Inner, BuildRight - : :- *(6) Project [ws_item_sk#8, ws_sales_price#4, ca_city#2, ca_zip#1] - : : +- *(6) BroadcastHashJoin [ws_sold_date_sk#10], [d_date_sk#11], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_sales_price#4, ca_city#2, ca_zip#1] - : : : +- *(6) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight - : : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_sales_price#4, c_current_addr_sk#12] - : : : : +- *(6) BroadcastHashJoin [ws_bill_customer_sk#14], [c_customer_sk#15], Inner, BuildRight - : : : : :- *(6) Project [ws_sold_date_sk#10, ws_item_sk#8, ws_bill_customer_sk#14, ws_sales_price#4] - : : : : : +- *(6) Filter ((isnotnull(ws_bill_customer_sk#14) && isnotnull(ws_sold_date_sk#10)) && isnotnull(ws_item_sk#8)) - : : : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#10,ws_item_sk#8,ws_bill_customer_sk#14,ws_sales_price#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [c_customer_sk#15, c_current_addr_sk#12] - : : : : +- *(1) Filter (isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) - : : : : +- *(1) FileScan parquet default.customer[c_customer_sk#15,c_current_addr_sk#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [ca_address_sk#13, ca_city#2, ca_zip#1] - : : : +- *(2) Filter isnotnull(ca_address_sk#13) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#13,ca_city#2,ca_zip#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#11] - : : +- *(3) Filter ((((isnotnull(d_qoy#16) && isnotnull(d_year#17)) && (d_qoy#16 = 2)) && (d_year#17 = 2001)) && isnotnull(d_date_sk#11)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#11,d_year#17,d_qoy#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [i_item_sk#9, i_item_id#6] - : +- *(4) Filter isnotnull(i_item_sk#9) - : +- *(4) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(5) Project [i_item_id#6 AS i_item_id#6#7] - +- *(5) Filter i_item_sk#9 IN (2,3,5,7,11,13,17,19,23,29) - +- *(5) FileScan parquet default.item[i_item_sk#9,i_item_id#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [In(i_item_sk, [2,3,5,7,11,13,17,19,23,29])], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (40) ++- * HashAggregate (39) + +- Exchange (38) + +- * HashAggregate (37) + +- * Project (36) + +- * Filter (35) + +- * BroadcastHashJoin ExistenceJoin(exists#1) BuildRight (34) + :- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.customer (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.customer_address (10) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.date_dim (16) + : +- BroadcastExchange (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.item (23) + +- BroadcastExchange (33) + +- * Project (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.item (29) + + +(1) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] + +(3) Filter [codegen id : 6] +Input [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5] +Condition : ((isnotnull(ws_bill_customer_sk#4) AND isnotnull(ws_sold_date_sk#2)) AND isnotnull(ws_item_sk#3)) + +(4) Scan parquet default.customer +Output [2]: [c_customer_sk#6, c_current_addr_sk#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Condition : (isnotnull(c_customer_sk#6) AND isnotnull(c_current_addr_sk#7)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_bill_customer_sk#4] +Right keys [1]: [c_customer_sk#6] +Join condition: None + +(9) Project [codegen id : 6] +Output [4]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, c_current_addr_sk#7] +Input [6]: [ws_sold_date_sk#2, ws_item_sk#3, ws_bill_customer_sk#4, ws_sales_price#5, c_customer_sk#6, c_current_addr_sk#7] + +(10) Scan parquet default.customer_address +Output [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] + +(12) Filter [codegen id : 2] +Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] +Condition : isnotnull(ca_address_sk#9) + +(13) BroadcastExchange +Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#7] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(15) Project [codegen id : 6] +Output [5]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11] +Input [7]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, c_current_addr_sk#7, ca_address_sk#9, ca_city#10, ca_zip#11] + +(16) Scan parquet default.date_dim +Output [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] + +(18) Filter [codegen id : 3] +Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] +Condition : ((((isnotnull(d_qoy#15) AND isnotnull(d_year#14)) AND (d_qoy#15 = 2)) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#13] +Input [3]: [d_date_sk#13, d_year#14, d_qoy#15] + +(20) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#2] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(22) Project [codegen id : 6] +Output [4]: [ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11] +Input [6]: [ws_sold_date_sk#2, ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11, d_date_sk#13] + +(23) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] + +(25) Filter [codegen id : 4] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(26) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#3] +Right keys [1]: [i_item_sk#17] +Join condition: None + +(28) Project [codegen id : 6] +Output [4]: [ws_sales_price#5, ca_city#10, ca_zip#11, i_item_id#18] +Input [6]: [ws_item_sk#3, ws_sales_price#5, ca_city#10, ca_zip#11, i_item_sk#17, i_item_id#18] + +(29) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_item_sk, [2,3,5,7,11,13,17,19,23,29])] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [2]: [i_item_sk#17, i_item_id#18] + +(31) Filter [codegen id : 5] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : i_item_sk#17 IN (2,3,5,7,11,13,17,19,23,29) + +(32) Project [codegen id : 5] +Output [1]: [i_item_id#18 AS i_item_id#18#20] +Input [2]: [i_item_sk#17, i_item_id#18] + +(33) BroadcastExchange +Input [1]: [i_item_id#18#20] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#21] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_id#18] +Right keys [1]: [i_item_id#18#20] +Join condition: None + +(35) Filter [codegen id : 6] +Input [5]: [ws_sales_price#5, ca_city#10, ca_zip#11, i_item_id#18, exists#1] +Condition : (substr(ca_zip#11, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) + +(36) Project [codegen id : 6] +Output [3]: [ws_sales_price#5, ca_city#10, ca_zip#11] +Input [5]: [ws_sales_price#5, ca_city#10, ca_zip#11, i_item_id#18, exists#1] + +(37) HashAggregate [codegen id : 6] +Input [3]: [ws_sales_price#5, ca_city#10, ca_zip#11] +Keys [2]: [ca_zip#11, ca_city#10] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#5))] +Aggregate Attributes [1]: [sum#22] +Results [3]: [ca_zip#11, ca_city#10, sum#23] + +(38) Exchange +Input [3]: [ca_zip#11, ca_city#10, sum#23] +Arguments: hashpartitioning(ca_zip#11, ca_city#10, 5), true, [id=#24] + +(39) HashAggregate [codegen id : 7] +Input [3]: [ca_zip#11, ca_city#10, sum#23] +Keys [2]: [ca_zip#11, ca_city#10] +Functions [1]: [sum(UnscaledValue(ws_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#5))#25] +Results [3]: [ca_zip#11, ca_city#10, MakeDecimal(sum(UnscaledValue(ws_sales_price#5))#25,17,2) AS sum(ws_sales_price)#26] + +(40) TakeOrderedAndProject +Input [3]: [ca_zip#11, ca_city#10, sum(ws_sales_price)#26] +Arguments: 100, [ca_zip#11 ASC NULLS FIRST, ca_city#10 ASC NULLS FIRST], [ca_zip#11, ca_city#10, sum(ws_sales_price)#26] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt index 72b6b03ea..7cc474fb5 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q45/simplified.txt @@ -1,51 +1,59 @@ -TakeOrderedAndProject [ca_city,ca_zip,sum(ws_sales_price)] - WholeStageCodegen - HashAggregate [ca_city,ca_zip,sum,sum(UnscaledValue(ws_sales_price))] [sum,sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price)] +TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)] + WholeStageCodegen (7) + HashAggregate [ca_zip,ca_city,sum] [sum(UnscaledValue(ws_sales_price)),sum(ws_sales_price),sum] InputAdapter - Exchange [ca_city,ca_zip] #1 - WholeStageCodegen - HashAggregate [ca_city,ca_zip,sum,sum,ws_sales_price] [sum,sum] - Project [ca_city,ca_zip,ws_sales_price] + Exchange [ca_zip,ca_city] #1 + WholeStageCodegen (6) + HashAggregate [ca_zip,ca_city,ws_sales_price] [sum,sum] + Project [ws_sales_price,ca_city,ca_zip] Filter [ca_zip,exists] BroadcastHashJoin [i_item_id,i_item_id] - Project [ca_city,ca_zip,i_item_id,ws_sales_price] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ca_city,ca_zip,ws_item_sk,ws_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ca_city,ca_zip,ws_item_sk,ws_sales_price,ws_sold_date_sk] + Project [ws_sales_price,ca_city,ca_zip,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_sales_price,ca_city,ca_zip] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_sales_price,ca_city,ca_zip] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_sales_price,ws_sold_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_sales_price,c_current_addr_sk] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Filter [ws_bill_customer_sk,ws_sold_date_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_sales_price] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] + WholeStageCodegen (1) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [ca_address_sk,ca_city,ca_zip] - Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip] [ca_address_sk,ca_city,ca_zip] + WholeStageCodegen (2) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (5) Project [i_item_id] Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt index 05a0c9bb1..a6a3c3c46 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/explain.txt @@ -1,41 +1,241 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,ca_city#3 ASC NULLS FIRST,bought_city#4 ASC NULLS FIRST,ss_ticket_number#5 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,ca_city#3,bought_city#4,ss_ticket_number#5,amt#6,profit#7]) -+- *(8) Project [c_last_name#1, c_first_name#2, ca_city#3, bought_city#4, ss_ticket_number#5, amt#6, profit#7] - +- *(8) BroadcastHashJoin [c_current_addr_sk#8], [ca_address_sk#9], Inner, BuildRight, NOT (ca_city#3 = bought_city#4) - :- *(8) Project [ss_ticket_number#5, bought_city#4, amt#6, profit#7, c_current_addr_sk#8, c_first_name#2, c_last_name#1] - : +- *(8) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight - : :- *(8) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#13)), sum(UnscaledValue(ss_net_profit#14))]) - : : +- Exchange hashpartitioning(ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3, 5) - : : +- *(5) HashAggregate(keys=[ss_ticket_number#5, ss_customer_sk#10, ss_addr_sk#12, ca_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#13)), partial_sum(UnscaledValue(ss_net_profit#14))]) - : : +- *(5) Project [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14, ca_city#3] - : : +- *(5) BroadcastHashJoin [ss_addr_sk#12], [ca_address_sk#9], Inner, BuildRight - : : :- *(5) Project [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] - : : : +- *(5) BroadcastHashJoin [ss_hdemo_sk#15], [hd_demo_sk#16], Inner, BuildRight - : : : :- *(5) Project [ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] - : : : : +- *(5) BroadcastHashJoin [ss_store_sk#17], [s_store_sk#18], Inner, BuildRight - : : : : :- *(5) Project [ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_store_sk#17, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] - : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#19], [d_date_sk#20], Inner, BuildRight - : : : : : :- *(5) Project [ss_sold_date_sk#19, ss_customer_sk#10, ss_hdemo_sk#15, ss_addr_sk#12, ss_store_sk#17, ss_ticket_number#5, ss_coupon_amt#13, ss_net_profit#14] - : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#19) && isnotnull(ss_store_sk#17)) && isnotnull(ss_hdemo_sk#15)) && isnotnull(ss_addr_sk#12)) && isnotnull(ss_customer_sk#10)) - : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#19,ss_customer_sk#10,ss_hdemo_sk#15,ss_addr_sk#12,ss_store_sk#17,ss_ticket_number#5,ss_coupon_amt#13,ss_net_profit#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [s_store_sk#18] - : : : : +- *(2) Filter (s_city#23 IN (Fairview,Midway) && isnotnull(s_store_sk#18)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#18,s_city#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [hd_demo_sk#16] - : : : +- *(3) Filter (((hd_dep_count#24 = 4) || (hd_vehicle_count#25 = 3)) && isnotnull(hd_demo_sk#16)) - : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#16,hd_dep_count#24,hd_vehicle_count#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [ca_address_sk#9, ca_city#3] - : : +- *(4) Filter (isnotnull(ca_address_sk#9) && isnotnull(ca_city#3)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#9,ca_city#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [c_customer_sk#11, c_current_addr_sk#8, c_first_name#2, c_last_name#1] - : +- *(6) Filter (isnotnull(c_customer_sk#11) && isnotnull(c_current_addr_sk#8)) - : +- *(6) FileScan parquet default.customer[c_customer_sk#11,c_current_addr_sk#8,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - +- ReusedExchange [ca_address_sk#9, ca_city#3], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.household_demographics (18) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.customer_address (25) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.customer (34) + +- ReusedExchange (40) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Condition : ((((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_dow, [6,0]), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : ((d_dow#11 IN (6,0) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, d_date_sk#9] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#13, s_city#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#13, s_city#14] +Condition : (s_city#14 IN (Fairview,Midway) AND isnotnull(s_store_sk#13)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#13] +Input [2]: [s_store_sk#13, s_city#14] + +(15) BroadcastExchange +Input [1]: [s_store_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_store_sk#13] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] +Condition : (((hd_dep_count#17 = 4) OR (hd_vehicle_count#18 = 3)) AND isnotnull(hd_demo_sk#16)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#16] +Input [3]: [hd_demo_sk#16, hd_dep_count#17, hd_vehicle_count#18] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#16] +Join condition: None + +(24) Project [codegen id : 5] +Output [5]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, hd_demo_sk#16] + +(25) Scan parquet default.customer_address +Output [2]: [ca_address_sk#20, ca_city#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_city#21] + +(27) Filter [codegen id : 4] +Input [2]: [ca_address_sk#20, ca_city#21] +Condition : (isnotnull(ca_address_sk#20) AND isnotnull(ca_city#21)) + +(28) BroadcastExchange +Input [2]: [ca_address_sk#20, ca_city#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#4] +Right keys [1]: [ca_address_sk#20] +Join condition: None + +(30) Project [codegen id : 5] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_city#21] +Input [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_address_sk#20, ca_city#21] + +(31) HashAggregate [codegen id : 5] +Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, ca_city#21] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#7)), partial_sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum#23, sum#24] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] + +(32) Exchange +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, 5), true, [id=#27] + +(33) HashAggregate [codegen id : 8] +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21, sum#25, sum#26] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#21] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#7)), sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#7))#28, sum(UnscaledValue(ss_net_profit#8))#29] +Results [5]: [ss_ticket_number#6, ss_customer_sk#2, ca_city#21 AS bought_city#30, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#7))#28,17,2) AS amt#31, MakeDecimal(sum(UnscaledValue(ss_net_profit#8))#29,17,2) AS profit#32] + +(34) Scan parquet default.customer +Output [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(36) Filter [codegen id : 6] +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Condition : (isnotnull(c_customer_sk#33) AND isnotnull(c_current_addr_sk#34)) + +(37) BroadcastExchange +Input [4]: [c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#33] +Join condition: None + +(39) Project [codegen id : 8] +Output [7]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36] +Input [9]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#30, amt#31, profit#32, c_customer_sk#33, c_current_addr_sk#34, c_first_name#35, c_last_name#36] + +(40) ReusedExchange [Reuses operator id: 28] +Output [2]: [ca_address_sk#20, ca_city#21] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [c_current_addr_sk#34] +Right keys [1]: [ca_address_sk#20] +Join condition: NOT (ca_city#21 = bought_city#30) + +(42) Project [codegen id : 8] +Output [7]: [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] +Input [9]: [ss_ticket_number#6, bought_city#30, amt#31, profit#32, c_current_addr_sk#34, c_first_name#35, c_last_name#36, ca_address_sk#20, ca_city#21] + +(43) TakeOrderedAndProject +Input [7]: [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] +Arguments: 100, [c_last_name#36 ASC NULLS FIRST, c_first_name#35 ASC NULLS FIRST, ca_city#21 ASC NULLS FIRST, bought_city#30 ASC NULLS FIRST, ss_ticket_number#6 ASC NULLS FIRST], [c_last_name#36, c_first_name#35, ca_city#21, bought_city#30, ss_ticket_number#6, amt#31, profit#32] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt index 213915231..abdc7a3ba 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q46/simplified.txt @@ -1,54 +1,63 @@ -TakeOrderedAndProject [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] - WholeStageCodegen - Project [amt,bought_city,c_first_name,c_last_name,ca_city,profit,ss_ticket_number] - BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] - Project [amt,bought_city,c_current_addr_sk,c_first_name,c_last_name,profit,ss_ticket_number] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] [amt,bought_city,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] +TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + WholeStageCodegen (8) + Project [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,amt,profit] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [ss_ticket_number,bought_city,amt,profit,c_current_addr_sk,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum] [sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),bought_city,amt,profit,sum,sum] InputAdapter - Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 - WholeStageCodegen - HashAggregate [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [ca_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen (5) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,ss_coupon_amt,ss_net_profit] [sum,sum,sum,sum] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ca_city] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_dow,d_year] - Scan parquet default.date_dim [d_date_sk,d_dow,d_year] [d_date_sk,d_dow,d_year] + Filter [d_dow,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_dow] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [s_store_sk] Filter [s_city,s_store_sk] - Scan parquet default.store [s_city,s_store_sk] [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_city] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [ca_address_sk,ca_city] - Filter [ca_address_sk,ca_city] - Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] + WholeStageCodegen (4) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen (6) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] InputAdapter - ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt index 134ab3c96..a84ba3a03 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/explain.txt @@ -1,51 +1,278 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,s_store_name#3,s_company_name#6,d_year#7,d_moy#8,avg_monthly_sales#2,sum_sales#1,psum#9,nsum#10]) -+- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, avg_monthly_sales#2, sum_sales#1, sum_sales#11 AS psum#9, sum_sales#12 AS nsum#10] - +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, (rn#18 - 1)], Inner, BuildRight - :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13, sum_sales#11] - : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, rn#13], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, (rn#23 + 1)], Inner, BuildRight - : :- *(22) Project [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, sum_sales#1, avg_monthly_sales#2, rn#13] - : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#13)) - : : +- Window [avg(_w0#24) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7] - : : +- *(7) Filter (isnotnull(d_year#7) && (d_year#7 = 1999)) - : : +- Window [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#13], [i_category#4, i_brand#5, s_store_name#3, s_company_name#6], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] - : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#6 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, 5) - : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#25))]) - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) - : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#25))]) - : : +- *(4) Project [i_brand#5, i_category#4, ss_sales_price#25, d_year#7, d_moy#8, s_store_name#3, s_company_name#6] - : : +- *(4) BroadcastHashJoin [ss_store_sk#26], [s_store_sk#27], Inner, BuildRight - : : :- *(4) Project [i_brand#5, i_category#4, ss_store_sk#26, ss_sales_price#25, d_year#7, d_moy#8] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#28], [d_date_sk#29], Inner, BuildRight - : : : :- *(4) Project [i_brand#5, i_category#4, ss_sold_date_sk#28, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(4) BroadcastHashJoin [i_item_sk#30], [ss_item_sk#31], Inner, BuildRight - : : : : :- *(4) Project [i_item_sk#30, i_brand#5, i_category#4] - : : : : : +- *(4) Filter ((isnotnull(i_item_sk#30) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#30,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : : +- *(1) Project [ss_sold_date_sk#28, ss_item_sk#31, ss_store_sk#26, ss_sales_price#25] - : : : : +- *(1) Filter ((isnotnull(ss_item_sk#31) && isnotnull(ss_sold_date_sk#28)) && isnotnull(ss_store_sk#26)) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#28,ss_item_sk#31,ss_store_sk#26,ss_sales_price#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [d_date_sk#29, d_year#7, d_moy#8] - : : : +- *(2) Filter ((((d_year#7 = 1999) || ((d_year#7 = 1998) && (d_moy#8 = 12))) || ((d_year#7 = 2000) && (d_moy#8 = 1))) && isnotnull(d_date_sk#29)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#29,d_year#7,d_moy#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [s_store_sk#27, s_store_name#3, s_company_name#6] - : : +- *(3) Filter ((isnotnull(s_store_sk#27) && isnotnull(s_store_name#3)) && isnotnull(s_company_name#6)) - : : +- *(3) FileScan parquet default.store[s_store_sk#27,s_store_name#3,s_company_name#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1))) - : +- *(14) Project [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, sum_sales#11, rn#23] - : +- *(14) Filter isnotnull(rn#23) - : +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#23], [i_category#19, i_brand#20, s_store_name#21, s_company_name#22], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] - : +- *(13) Sort [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, s_store_name#21 ASC NULLS FIRST, s_company_name#22 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) - : +- *(12) HashAggregate(keys=[i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33], functions=[sum(UnscaledValue(ss_sales_price#25))]) - : +- ReusedExchange [i_category#19, i_brand#20, s_store_name#21, s_company_name#22, d_year#32, d_moy#33, sum#34], Exchange hashpartitioning(i_category#4, i_brand#5, s_store_name#3, s_company_name#6, d_year#7, d_moy#8, 5) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1))) - +- *(21) Project [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, sum_sales#12, rn#18] - +- *(21) Filter isnotnull(rn#18) - +- Window [rank(d_year#35, d_moy#36) windowspecdefinition(i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#18], [i_category#14, i_brand#15, s_store_name#16, s_company_name#17], [d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST] - +- *(20) Sort [i_category#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#16 ASC NULLS FIRST, s_company_name#17 ASC NULLS FIRST, d_year#35 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST], false, 0 - +- ReusedExchange [i_category#14, i_brand#15, s_store_name#16, s_company_name#17, d_year#35, d_moy#36, sum_sales#12], Exchange hashpartitioning(i_category#19, i_brand#20, s_store_name#21, s_company_name#22, 5) \ No newline at end of file +TakeOrderedAndProject (51) ++- * Project (50) + +- * BroadcastHashJoin Inner BuildRight (49) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Filter (32) + : : +- Window (31) + : : +- * Sort (30) + : : +- Exchange (29) + : : +- * Project (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.store (16) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- Window (37) + : +- * Sort (36) + : +- Exchange (35) + : +- * HashAggregate (34) + : +- ReusedExchange (33) + +- BroadcastExchange (48) + +- * Project (47) + +- * Filter (46) + +- Window (45) + +- * Sort (44) + +- ReusedExchange (43) + + +(1) Scan parquet default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] + +(6) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] +Condition : ((isnotnull(ss_item_sk#5) AND isnotnull(ss_sold_date_sk#4)) AND isnotnull(ss_store_sk#6)) + +(7) BroadcastExchange +Input [4]: [ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#5] +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, ss_sold_date_sk#4, ss_store_sk#6, ss_sales_price#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_sold_date_sk#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, ss_store_sk#6, ss_sales_price#7, d_year#10, d_moy#11] +Input [8]: [i_brand#2, i_category#3, ss_sold_date_sk#4, ss_store_sk#6, ss_sales_price#7, d_date_sk#9, d_year#10, d_moy#11] + +(16) Scan parquet default.store +Output [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] + +(18) Filter [codegen id : 3] +Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Condition : ((isnotnull(s_store_sk#13) AND isnotnull(s_store_name#14)) AND isnotnull(s_company_name#15)) + +(19) BroadcastExchange +Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(21) Project [codegen id : 4] +Output [7]: [i_brand#2, i_category#3, ss_sales_price#7, d_year#10, d_moy#11, s_store_name#14, s_company_name#15] +Input [9]: [i_brand#2, i_category#3, ss_store_sk#6, ss_sales_price#7, d_year#10, d_moy#11, s_store_sk#13, s_store_name#14, s_company_name#15] + +(22) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_category#3, ss_sales_price#7, d_year#10, d_moy#11, s_store_name#14, s_company_name#15] +Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum#17] +Results [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] + +(23) Exchange +Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, 5), true, [id=#19] + +(24) HashAggregate [codegen id : 5] +Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum#18] +Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#20] +Results [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS _w0#22] + +(25) Exchange +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, 5), true, [id=#23] + +(26) Sort [codegen id : 6] +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10] + +(28) Project [codegen id : 7] +Output [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, _w0#22, avg_monthly_sales#24] + +(29) Exchange +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, 5), true, [id=#25] + +(30) Sort [codegen id : 8] +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 + +(31) Window +Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24] +Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] + +(32) Filter [codegen id : 23] +Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26] +Condition : (((((isnotnull(d_year#10) AND isnotnull(avg_monthly_sales#24)) AND (d_year#10 = 1999)) AND (avg_monthly_sales#24 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#24 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#26)) + +(33) ReusedExchange [Reuses operator id: 23] +Output [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33] + +(34) HashAggregate [codegen id : 13] +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum#33] +Keys [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#34] +Results [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#34,17,2) AS sum_sales#35] + +(35) Exchange +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: hashpartitioning(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, 5), true, [id=#36] + +(36) Sort [codegen id : 14] +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, s_store_name#29 ASC NULLS FIRST, s_company_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST], false, 0 + +(37) Window +Input [7]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35] +Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#27, i_brand#28, s_store_name#29, s_company_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] + +(38) Filter [codegen id : 15] +Input [8]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] +Condition : isnotnull(rn#37) + +(39) Project [codegen id : 15] +Output [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] +Input [8]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, d_year#31, d_moy#32, sum_sales#35, rn#37] + +(40) BroadcastExchange +Input [6]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] + 1)),false), [id=#38] + +(41) BroadcastHashJoin [codegen id : 23] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#26] +Right keys [5]: [i_category#27, i_brand#28, s_store_name#29, s_company_name#30, (rn#37 + 1)] +Join condition: None + +(42) Project [codegen id : 23] +Output [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35] +Input [15]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, i_category#27, i_brand#28, s_store_name#29, s_company_name#30, sum_sales#35, rn#37] + +(43) ReusedExchange [Reuses operator id: 35] +Output [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] + +(44) Sort [codegen id : 21] +Input [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] +Arguments: [i_category#39 ASC NULLS FIRST, i_brand#40 ASC NULLS FIRST, s_store_name#41 ASC NULLS FIRST, s_company_name#42 ASC NULLS FIRST, d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST], false, 0 + +(45) Window +Input [7]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45] +Arguments: [rank(d_year#43, d_moy#44) windowspecdefinition(i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#46], [i_category#39, i_brand#40, s_store_name#41, s_company_name#42], [d_year#43 ASC NULLS FIRST, d_moy#44 ASC NULLS FIRST] + +(46) Filter [codegen id : 22] +Input [8]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45, rn#46] +Condition : isnotnull(rn#46) + +(47) Project [codegen id : 22] +Output [6]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] +Input [8]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, d_year#43, d_moy#44, sum_sales#45, rn#46] + +(48) BroadcastExchange +Input [6]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, true] - 1)),false), [id=#47] + +(49) BroadcastHashJoin [codegen id : 23] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#26] +Right keys [5]: [i_category#39, i_brand#40, s_store_name#41, s_company_name#42, (rn#46 - 1)] +Join condition: None + +(50) Project [codegen id : 23] +Output [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, sum_sales#35 AS psum#48, sum_sales#45 AS nsum#49] +Input [16]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, sum_sales#21, avg_monthly_sales#24, rn#26, sum_sales#35, i_category#39, i_brand#40, s_store_name#41, s_company_name#42, sum_sales#45, rn#46] + +(51) TakeOrderedAndProject +Input [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, psum#48, nsum#49] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#10, d_moy#11, avg_monthly_sales#24, sum_sales#21, psum#48, nsum#49] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt index f0ed21cce..66ccfa4a9 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q47/simplified.txt @@ -1,77 +1,84 @@ -TakeOrderedAndProject [avg_monthly_sales,d_moy,d_year,i_brand,i_category,nsum,psum,s_company_name,s_store_name,sum_sales] - WholeStageCodegen - Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales,sum_sales,sum_sales] - BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] - Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales,sum_sales] - BroadcastHashJoin [i_brand,i_brand,i_category,i_category,rn,rn,s_company_name,s_company_name,s_store_name,s_store_name] - Project [avg_monthly_sales,d_moy,d_year,i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] - Filter [avg_monthly_sales,rn,sum_sales] - InputAdapter - Window [_w0,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Filter [d_year] - InputAdapter - Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - InputAdapter - Exchange [i_brand,i_category,s_company_name,s_store_name] #1 - WholeStageCodegen - HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] - InputAdapter - Exchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] #2 - WholeStageCodegen - HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] - Project [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_moy,d_year,i_brand,i_category,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand,i_category,ss_sales_price,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_brand,i_category,i_item_sk] - Filter [i_brand,i_category,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk,d_moy,d_year] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [s_company_name,s_store_name,s_store_sk] - Filter [s_company_name,s_store_name,s_store_sk] - Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_brand,s_company_name,d_year,d_moy,psum,nsum] + WholeStageCodegen (23) + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,s_store_name,s_company_name,rn,i_category,i_brand,s_store_name,s_company_name,rn] + Filter [d_year,avg_monthly_sales,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (8) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen (7) + Project [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,avg_monthly_sales] + InputAdapter + Window [_w0,i_category,i_brand,s_store_name,s_company_name,d_year] + WholeStageCodegen (6) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name,d_year] #2 + WholeStageCodegen (5) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] #3 + WholeStageCodegen (4) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,ss_sales_price] [sum,sum] + Project [i_brand,i_category,ss_sales_price,d_year,d_moy,s_store_name,s_company_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_brand,i_category,ss_store_sk,ss_sales_price,d_year,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_brand,i_category,ss_sold_date_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [i_item_sk,ss_item_sk] + Filter [i_item_sk,i_category,i_brand] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [s_store_sk,s_store_name,s_company_name] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name,s_company_name] InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + BroadcastExchange #7 + WholeStageCodegen (15) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] Filter [rn] InputAdapter - Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (14) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] InputAdapter - Exchange [i_brand,i_category,s_company_name,s_store_name] #7 - WholeStageCodegen - HashAggregate [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + Exchange [i_category,i_brand,s_store_name,s_company_name] #8 + WholeStageCodegen (13) + HashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,sum] InputAdapter - ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum] #2 + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #3 InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [i_brand,i_category,rn,s_company_name,s_store_name,sum_sales] + BroadcastExchange #9 + WholeStageCodegen (22) + Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] Filter [rn] InputAdapter - Window [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Sort [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name] + Window [d_year,d_moy,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (21) + Sort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy] InputAdapter - ReusedExchange [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] [d_moy,d_year,i_brand,i_category,s_company_name,s_store_name,sum_sales] #7 + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #8 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt index a7bcb203c..f317bcdf1 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/explain.txt @@ -1,31 +1,183 @@ == Physical Plan == -*(6) HashAggregate(keys=[], functions=[sum(cast(ss_quantity#1 as bigint))]) -+- Exchange SinglePartition - +- *(5) HashAggregate(keys=[], functions=[partial_sum(cast(ss_quantity#1 as bigint))]) - +- *(5) Project [ss_quantity#1] - +- *(5) BroadcastHashJoin [ss_sold_date_sk#2], [d_date_sk#3], Inner, BuildRight - :- *(5) Project [ss_sold_date_sk#2, ss_quantity#1] - : +- *(5) BroadcastHashJoin [ss_addr_sk#4], [ca_address_sk#5], Inner, BuildRight, ((((ca_state#6 IN (CO,OH,TX) && (ss_net_profit#7 >= 0.00)) && (ss_net_profit#7 <= 2000.00)) || ((ca_state#6 IN (OR,MN,KY) && (ss_net_profit#7 >= 150.00)) && (ss_net_profit#7 <= 3000.00))) || ((ca_state#6 IN (VA,CA,MS) && (ss_net_profit#7 >= 50.00)) && (ss_net_profit#7 <= 25000.00))) - : :- *(5) Project [ss_sold_date_sk#2, ss_addr_sk#4, ss_quantity#1, ss_net_profit#7] - : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#8], [cd_demo_sk#9], Inner, BuildRight, ((((((cd_marital_status#10 = M) && (cd_education_status#11 = 4 yr Degree)) && (ss_sales_price#12 >= 100.00)) && (ss_sales_price#12 <= 150.00)) || ((((cd_marital_status#10 = D) && (cd_education_status#11 = 2 yr Degree)) && (ss_sales_price#12 >= 50.00)) && (ss_sales_price#12 <= 100.00))) || ((((cd_marital_status#10 = S) && (cd_education_status#11 = College)) && (ss_sales_price#12 >= 150.00)) && (ss_sales_price#12 <= 200.00))) - : : :- *(5) Project [ss_sold_date_sk#2, ss_cdemo_sk#8, ss_addr_sk#4, ss_quantity#1, ss_sales_price#12, ss_net_profit#7] - : : : +- *(5) BroadcastHashJoin [ss_store_sk#13], [s_store_sk#14], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#2, ss_cdemo_sk#8, ss_addr_sk#4, ss_store_sk#13, ss_quantity#1, ss_sales_price#12, ss_net_profit#7] - : : : : +- *(5) Filter (((isnotnull(ss_store_sk#13) && isnotnull(ss_cdemo_sk#8)) && isnotnull(ss_addr_sk#4)) && isnotnull(ss_sold_date_sk#2)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#2,ss_cdemo_sk#8,ss_addr_sk#4,ss_store_sk#13,ss_quantity#1,ss_sales_price#12,ss_net_profit#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [cd_demo_sk#9, cd_marital_status#10, cd_education_status#11] - : : +- *(2) Filter isnotnull(cd_demo_sk#9) - : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#9,cd_marital_status#10,cd_education_status#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [ca_address_sk#5, ca_state#6] - : +- *(3) Filter ((isnotnull(ca_country#15) && (ca_country#15 = United States)) && isnotnull(ca_address_sk#5)) - : +- *(3) FileScan parquet default.customer_address[ca_address_sk#5,ca_state#6,ca_country#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [d_date_sk#3] - +- *(4) Filter ((isnotnull(d_year#16) && (d_year#16 = 2001)) && isnotnull(d_date_sk#3)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#3,d_year#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file +* HashAggregate (32) ++- Exchange (31) + +- * HashAggregate (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.store (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.customer_demographics (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.customer_address (16) + +- BroadcastExchange (27) + +- * Project (26) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet default.date_dim (23) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ss_net_profit,0.00),LessThanOrEqual(ss_net_profit,2000.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,3000.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,25000.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] + +(3) Filter [codegen id : 5] +Input [7]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] +Condition : (((((isnotnull(ss_store_sk#4) AND isnotnull(ss_cdemo_sk#2)) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_sold_date_sk#1)) AND ((((ss_sales_price#6 >= 100.00) AND (ss_sales_price#6 <= 150.00)) OR ((ss_sales_price#6 >= 50.00) AND (ss_sales_price#6 <= 100.00))) OR ((ss_sales_price#6 >= 150.00) AND (ss_sales_price#6 <= 200.00)))) AND ((((ss_net_profit#7 >= 0.00) AND (ss_net_profit#7 <= 2000.00)) OR ((ss_net_profit#7 >= 150.00) AND (ss_net_profit#7 <= 3000.00))) OR ((ss_net_profit#7 >= 50.00) AND (ss_net_profit#7 <= 25000.00)))) + +(4) Scan parquet default.store +Output [1]: [s_store_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [1]: [s_store_sk#8] + +(6) Filter [codegen id : 1] +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(7) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(9) Project [codegen id : 5] +Output [6]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_net_profit#7] +Input [8]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_net_profit#7, s_store_sk#8] + +(10) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,4 yr Degree)),And(EqualTo(cd_marital_status,D),EqualTo(cd_education_status,2 yr Degree))),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College)))] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] + +(12) Filter [codegen id : 2] +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Condition : (isnotnull(cd_demo_sk#10) AND ((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree)) OR ((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree))) OR ((cd_marital_status#11 = S) AND (cd_education_status#12 = College)))) + +(13) BroadcastExchange +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join condition: ((((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree)) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree)) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#11 = S) AND (cd_education_status#12 = College)) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00))) + +(15) Project [codegen id : 5] +Output [4]: [ss_sold_date_sk#1, ss_addr_sk#3, ss_quantity#5, ss_net_profit#7] +Input [9]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_net_profit#7, cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] + +(16) Scan parquet default.customer_address +Output [3]: [ca_address_sk#14, ca_state#15, ca_country#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [CO,OH,TX]),In(ca_state, [OR,MN,KY])),In(ca_state, [VA,CA,MS]))] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] + +(18) Filter [codegen id : 3] +Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] +Condition : (((isnotnull(ca_country#16) AND (ca_country#16 = United States)) AND isnotnull(ca_address_sk#14)) AND ((ca_state#15 IN (CO,OH,TX) OR ca_state#15 IN (OR,MN,KY)) OR ca_state#15 IN (VA,CA,MS))) + +(19) Project [codegen id : 3] +Output [2]: [ca_address_sk#14, ca_state#15] +Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] + +(20) BroadcastExchange +Input [2]: [ca_address_sk#14, ca_state#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#14] +Join condition: ((((ca_state#15 IN (CO,OH,TX) AND (ss_net_profit#7 >= 0.00)) AND (ss_net_profit#7 <= 2000.00)) OR ((ca_state#15 IN (OR,MN,KY) AND (ss_net_profit#7 >= 150.00)) AND (ss_net_profit#7 <= 3000.00))) OR ((ca_state#15 IN (VA,CA,MS) AND (ss_net_profit#7 >= 50.00)) AND (ss_net_profit#7 <= 25000.00))) + +(22) Project [codegen id : 5] +Output [2]: [ss_sold_date_sk#1, ss_quantity#5] +Input [6]: [ss_sold_date_sk#1, ss_addr_sk#3, ss_quantity#5, ss_net_profit#7, ca_address_sk#14, ca_state#15] + +(23) Scan parquet default.date_dim +Output [2]: [d_date_sk#18, d_year#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#18, d_year#19] + +(25) Filter [codegen id : 4] +Input [2]: [d_date_sk#18, d_year#19] +Condition : ((isnotnull(d_year#19) AND (d_year#19 = 2001)) AND isnotnull(d_date_sk#18)) + +(26) Project [codegen id : 4] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_year#19] + +(27) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(29) Project [codegen id : 5] +Output [1]: [ss_quantity#5] +Input [3]: [ss_sold_date_sk#1, ss_quantity#5, d_date_sk#18] + +(30) HashAggregate [codegen id : 5] +Input [1]: [ss_quantity#5] +Keys: [] +Functions [1]: [partial_sum(cast(ss_quantity#5 as bigint))] +Aggregate Attributes [1]: [sum#21] +Results [1]: [sum#22] + +(31) Exchange +Input [1]: [sum#22] +Arguments: SinglePartition, true, [id=#23] + +(32) HashAggregate [codegen id : 6] +Input [1]: [sum#22] +Keys: [] +Functions [1]: [sum(cast(ss_quantity#5 as bigint))] +Aggregate Attributes [1]: [sum(cast(ss_quantity#5 as bigint))#24] +Results [1]: [sum(cast(ss_quantity#5 as bigint))#24 AS sum(ss_quantity)#25] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt index b4f78ab5c..710fbdd72 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q48/simplified.txt @@ -1,41 +1,48 @@ -WholeStageCodegen - HashAggregate [sum,sum(cast(ss_quantity as bigint))] [sum,sum(cast(ss_quantity as bigint)),sum(ss_quantity)] +WholeStageCodegen (6) + HashAggregate [sum] [sum(cast(ss_quantity as bigint)),sum(ss_quantity),sum] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [ss_quantity,sum,sum] [sum,sum] + WholeStageCodegen (5) + HashAggregate [ss_quantity] [sum,sum] Project [ss_quantity] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [ca_address_sk,ca_state,ss_addr_sk,ss_net_profit] - Project [ss_addr_sk,ss_net_profit,ss_quantity,ss_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,ss_cdemo_sk,ss_sales_price] - Project [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_addr_sk,ss_cdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_addr_sk,ss_cdemo_sk,ss_net_profit,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity] + BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit] + Project [ss_sold_date_sk,ss_addr_sk,ss_quantity,ss_net_profit] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price] + Project [ss_sold_date_sk,ss_cdemo_sk,ss_addr_sk,ss_quantity,ss_sales_price,ss_net_profit] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_cdemo_sk,ss_addr_sk,ss_sold_date_sk,ss_sales_price,ss_net_profit] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] + WholeStageCodegen (1) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [cd_demo_sk,cd_education_status,cd_marital_status] - Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] + WholeStageCodegen (2) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [ca_address_sk,ca_state] - Filter [ca_address_sk,ca_country] - Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] + Filter [ca_country,ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt new file mode 100644 index 000000000..8d10c1641 --- /dev/null +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/explain.txt @@ -0,0 +1,433 @@ +== Physical Plan == +TakeOrderedAndProject (78) ++- * HashAggregate (77) + +- Exchange (76) + +- * HashAggregate (75) + +- Union (74) + :- * Project (27) + : +- * Filter (26) + : +- Window (25) + : +- * Sort (24) + : +- Window (23) + : +- * Sort (22) + : +- Exchange (21) + : +- * HashAggregate (20) + : +- Exchange (19) + : +- * HashAggregate (18) + : +- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.web_sales (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.web_returns (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.date_dim (11) + :- * Project (50) + : +- * Filter (49) + : +- Window (48) + : +- * Sort (47) + : +- Window (46) + : +- * Sort (45) + : +- Exchange (44) + : +- * HashAggregate (43) + : +- Exchange (42) + : +- * HashAggregate (41) + : +- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Project (31) + : : : +- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.catalog_sales (28) + : : +- BroadcastExchange (35) + : : +- * Filter (34) + : : +- * ColumnarToRow (33) + : : +- Scan parquet default.catalog_returns (32) + : +- ReusedExchange (38) + +- * Project (73) + +- * Filter (72) + +- Window (71) + +- * Sort (70) + +- Window (69) + +- * Sort (68) + +- Exchange (67) + +- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- * Project (63) + +- * BroadcastHashJoin Inner BuildRight (62) + :- * Project (60) + : +- * BroadcastHashJoin Inner BuildRight (59) + : :- * Project (54) + : : +- * Filter (53) + : : +- * ColumnarToRow (52) + : : +- Scan parquet default.store_sales (51) + : +- BroadcastExchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet default.store_returns (55) + +- ReusedExchange (61) + + +(1) Scan parquet default.web_sales +Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_net_profit), IsNotNull(ws_net_paid), IsNotNull(ws_quantity), GreaterThan(ws_net_profit,1.00), GreaterThan(ws_net_paid,0.00), GreaterThan(ws_quantity,0), IsNotNull(ws_order_number), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] + +(3) Filter [codegen id : 3] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] +Condition : ((((((((isnotnull(ws_net_profit#6) AND isnotnull(ws_net_paid#5)) AND isnotnull(ws_quantity#4)) AND (ws_net_profit#6 > 1.00)) AND (ws_net_paid#5 > 0.00)) AND (ws_quantity#4 > 0)) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_item_sk#2)) AND isnotnull(ws_sold_date_sk#1)) + +(4) Project [codegen id : 3] +Output [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5] +Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, ws_net_profit#6] + +(5) Scan parquet default.web_returns +Output [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(7) Filter [codegen id : 1] +Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Condition : (((isnotnull(wr_return_amt#10) AND (wr_return_amt#10 > 10000.00)) AND isnotnull(wr_order_number#8)) AND isnotnull(wr_item_sk#7)) + +(8) BroadcastExchange +Input [4]: [wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#11] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [cast(ws_order_number#3 as bigint), cast(ws_item_sk#2 as bigint)] +Right keys [2]: [wr_order_number#8, wr_item_sk#7] +Join condition: None + +(10) Project [codegen id : 3] +Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] +Input [9]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#3, ws_quantity#4, ws_net_paid#5, wr_item_sk#7, wr_order_number#8, wr_return_quantity#9, wr_return_amt#10] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#12, d_year#13, d_moy#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] +Condition : ((((isnotnull(d_year#13) AND isnotnull(d_moy#14)) AND (d_year#13 = 2001)) AND (d_moy#14 = 12)) AND isnotnull(d_date_sk#12)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#12] +Input [3]: [d_date_sk#12, d_year#13, d_moy#14] + +(15) BroadcastExchange +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(17) Project [codegen id : 3] +Output [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] +Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10, d_date_sk#12] + +(18) HashAggregate [codegen id : 3] +Input [5]: [ws_item_sk#2, ws_quantity#4, ws_net_paid#5, wr_return_quantity#9, wr_return_amt#10] +Keys [1]: [ws_item_sk#2] +Functions [4]: [partial_sum(cast(coalesce(wr_return_quantity#9, 0) as bigint)), partial_sum(cast(coalesce(ws_quantity#4, 0) as bigint)), partial_sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#16, sum#17, sum#18, isEmpty#19, sum#20, isEmpty#21] +Results [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] + +(19) Exchange +Input [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#28] + +(20) HashAggregate [codegen id : 4] +Input [7]: [ws_item_sk#2, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] +Keys [1]: [ws_item_sk#2] +Functions [4]: [sum(cast(coalesce(wr_return_quantity#9, 0) as bigint)), sum(cast(coalesce(ws_quantity#4, 0) as bigint)), sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(wr_return_quantity#9, 0) as bigint))#29, sum(cast(coalesce(ws_quantity#4, 0) as bigint))#30, sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#31, sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#32] +Results [3]: [ws_item_sk#2 AS item#33, CheckOverflow((promote_precision(cast(sum(cast(coalesce(wr_return_quantity#9, 0) as bigint))#29 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ws_quantity#4, 0) as bigint))#30 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#34, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#10 as decimal(12,2)), 0.00))#31 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#5 as decimal(12,2)), 0.00))#32 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#35] + +(21) Exchange +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: SinglePartition, true, [id=#36] + +(22) Sort [codegen id : 5] +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: [return_ratio#34 ASC NULLS FIRST], false, 0 + +(23) Window +Input [3]: [item#33, return_ratio#34, currency_ratio#35] +Arguments: [rank(return_ratio#34) windowspecdefinition(return_ratio#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#37], [return_ratio#34 ASC NULLS FIRST] + +(24) Sort [codegen id : 6] +Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] +Arguments: [currency_ratio#35 ASC NULLS FIRST], false, 0 + +(25) Window +Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] +Arguments: [rank(currency_ratio#35) windowspecdefinition(currency_ratio#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#38], [currency_ratio#35 ASC NULLS FIRST] + +(26) Filter [codegen id : 7] +Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] +Condition : ((return_rank#37 <= 10) OR (currency_rank#38 <= 10)) + +(27) Project [codegen id : 7] +Output [5]: [web AS channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] + +(28) Scan parquet default.catalog_sales +Output [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 10] +Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] + +(30) Filter [codegen id : 10] +Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] +Condition : ((((((((isnotnull(cs_net_profit#45) AND isnotnull(cs_net_paid#44)) AND isnotnull(cs_quantity#43)) AND (cs_net_profit#45 > 1.00)) AND (cs_net_paid#44 > 0.00)) AND (cs_quantity#43 > 0)) AND isnotnull(cs_order_number#42)) AND isnotnull(cs_item_sk#41)) AND isnotnull(cs_sold_date_sk#40)) + +(31) Project [codegen id : 10] +Output [5]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44] +Input [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cs_net_profit#45] + +(32) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 8] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] + +(34) Filter [codegen id : 8] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] +Condition : (((isnotnull(cr_return_amount#49) AND (cr_return_amount#49 > 10000.00)) AND isnotnull(cr_order_number#47)) AND isnotnull(cr_item_sk#46)) + +(35) BroadcastExchange +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#50] + +(36) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [cs_order_number#42, cs_item_sk#41] +Right keys [2]: [cr_order_number#47, cr_item_sk#46] +Join condition: None + +(37) Project [codegen id : 10] +Output [6]: [cs_sold_date_sk#40, cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] +Input [9]: [cs_sold_date_sk#40, cs_item_sk#41, cs_order_number#42, cs_quantity#43, cs_net_paid#44, cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] + +(38) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#12] + +(39) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#40] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(40) Project [codegen id : 10] +Output [5]: [cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] +Input [7]: [cs_sold_date_sk#40, cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49, d_date_sk#12] + +(41) HashAggregate [codegen id : 10] +Input [5]: [cs_item_sk#41, cs_quantity#43, cs_net_paid#44, cr_return_quantity#48, cr_return_amount#49] +Keys [1]: [cs_item_sk#41] +Functions [4]: [partial_sum(cast(coalesce(cr_return_quantity#48, 0) as bigint)), partial_sum(cast(coalesce(cs_quantity#43, 0) as bigint)), partial_sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#51, sum#52, sum#53, isEmpty#54, sum#55, isEmpty#56] +Results [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] + +(42) Exchange +Input [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] +Arguments: hashpartitioning(cs_item_sk#41, 5), true, [id=#63] + +(43) HashAggregate [codegen id : 11] +Input [7]: [cs_item_sk#41, sum#57, sum#58, sum#59, isEmpty#60, sum#61, isEmpty#62] +Keys [1]: [cs_item_sk#41] +Functions [4]: [sum(cast(coalesce(cr_return_quantity#48, 0) as bigint)), sum(cast(coalesce(cs_quantity#43, 0) as bigint)), sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(cr_return_quantity#48, 0) as bigint))#64, sum(cast(coalesce(cs_quantity#43, 0) as bigint))#65, sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#66, sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))#67] +Results [3]: [cs_item_sk#41 AS item#68, CheckOverflow((promote_precision(cast(sum(cast(coalesce(cr_return_quantity#48, 0) as bigint))#64 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(cs_quantity#43, 0) as bigint))#65 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#69, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#66 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#44 as decimal(12,2)), 0.00))#67 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#70] + +(44) Exchange +Input [3]: [item#68, return_ratio#69, currency_ratio#70] +Arguments: SinglePartition, true, [id=#71] + +(45) Sort [codegen id : 12] +Input [3]: [item#68, return_ratio#69, currency_ratio#70] +Arguments: [return_ratio#69 ASC NULLS FIRST], false, 0 + +(46) Window +Input [3]: [item#68, return_ratio#69, currency_ratio#70] +Arguments: [rank(return_ratio#69) windowspecdefinition(return_ratio#69 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#72], [return_ratio#69 ASC NULLS FIRST] + +(47) Sort [codegen id : 13] +Input [4]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72] +Arguments: [currency_ratio#70 ASC NULLS FIRST], false, 0 + +(48) Window +Input [4]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72] +Arguments: [rank(currency_ratio#70) windowspecdefinition(currency_ratio#70 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#73], [currency_ratio#70 ASC NULLS FIRST] + +(49) Filter [codegen id : 14] +Input [5]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72, currency_rank#73] +Condition : ((return_rank#72 <= 10) OR (currency_rank#73 <= 10)) + +(50) Project [codegen id : 14] +Output [5]: [catalog AS channel#74, item#68, return_ratio#69, return_rank#72, currency_rank#73] +Input [5]: [item#68, return_ratio#69, currency_ratio#70, return_rank#72, currency_rank#73] + +(51) Scan parquet default.store_sales +Output [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 17] +Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] + +(53) Filter [codegen id : 17] +Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] +Condition : ((((((((isnotnull(ss_net_profit#80) AND isnotnull(ss_net_paid#79)) AND isnotnull(ss_quantity#78)) AND (ss_net_profit#80 > 1.00)) AND (ss_net_paid#79 > 0.00)) AND (ss_quantity#78 > 0)) AND isnotnull(ss_ticket_number#77)) AND isnotnull(ss_item_sk#76)) AND isnotnull(ss_sold_date_sk#75)) + +(54) Project [codegen id : 17] +Output [5]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79] +Input [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80] + +(55) Scan parquet default.store_returns +Output [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 15] +Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] + +(57) Filter [codegen id : 15] +Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] +Condition : (((isnotnull(sr_return_amt#84) AND (sr_return_amt#84 > 10000.00)) AND isnotnull(sr_ticket_number#82)) AND isnotnull(sr_item_sk#81)) + +(58) BroadcastExchange +Input [4]: [sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#85] + +(59) BroadcastHashJoin [codegen id : 17] +Left keys [2]: [cast(ss_ticket_number#77 as bigint), cast(ss_item_sk#76 as bigint)] +Right keys [2]: [sr_ticket_number#82, sr_item_sk#81] +Join condition: None + +(60) Project [codegen id : 17] +Output [6]: [ss_sold_date_sk#75, ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] +Input [9]: [ss_sold_date_sk#75, ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, sr_item_sk#81, sr_ticket_number#82, sr_return_quantity#83, sr_return_amt#84] + +(61) ReusedExchange [Reuses operator id: 15] +Output [1]: [d_date_sk#12] + +(62) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ss_sold_date_sk#75] +Right keys [1]: [d_date_sk#12] +Join condition: None + +(63) Project [codegen id : 17] +Output [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] +Input [7]: [ss_sold_date_sk#75, ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84, d_date_sk#12] + +(64) HashAggregate [codegen id : 17] +Input [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#83, sr_return_amt#84] +Keys [1]: [ss_item_sk#76] +Functions [4]: [partial_sum(cast(coalesce(sr_return_quantity#83, 0) as bigint)), partial_sum(cast(coalesce(ss_quantity#78, 0) as bigint)), partial_sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#86, sum#87, sum#88, isEmpty#89, sum#90, isEmpty#91] +Results [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] + +(65) Exchange +Input [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] +Arguments: hashpartitioning(ss_item_sk#76, 5), true, [id=#98] + +(66) HashAggregate [codegen id : 18] +Input [7]: [ss_item_sk#76, sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] +Keys [1]: [ss_item_sk#76] +Functions [4]: [sum(cast(coalesce(sr_return_quantity#83, 0) as bigint)), sum(cast(coalesce(ss_quantity#78, 0) as bigint)), sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(cast(coalesce(sr_return_quantity#83, 0) as bigint))#99, sum(cast(coalesce(ss_quantity#78, 0) as bigint))#100, sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00))#101, sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#102] +Results [3]: [ss_item_sk#76 AS item#103, CheckOverflow((promote_precision(cast(sum(cast(coalesce(sr_return_quantity#83, 0) as bigint))#99 as decimal(15,4))) / promote_precision(cast(sum(cast(coalesce(ss_quantity#78, 0) as bigint))#100 as decimal(15,4)))), DecimalType(35,20), true) AS return_ratio#104, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#84 as decimal(12,2)), 0.00))#101 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#102 as decimal(15,4)))), DecimalType(35,20), true) AS currency_ratio#105] + +(67) Exchange +Input [3]: [item#103, return_ratio#104, currency_ratio#105] +Arguments: SinglePartition, true, [id=#106] + +(68) Sort [codegen id : 19] +Input [3]: [item#103, return_ratio#104, currency_ratio#105] +Arguments: [return_ratio#104 ASC NULLS FIRST], false, 0 + +(69) Window +Input [3]: [item#103, return_ratio#104, currency_ratio#105] +Arguments: [rank(return_ratio#104) windowspecdefinition(return_ratio#104 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#107], [return_ratio#104 ASC NULLS FIRST] + +(70) Sort [codegen id : 20] +Input [4]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107] +Arguments: [currency_ratio#105 ASC NULLS FIRST], false, 0 + +(71) Window +Input [4]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107] +Arguments: [rank(currency_ratio#105) windowspecdefinition(currency_ratio#105 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#108], [currency_ratio#105 ASC NULLS FIRST] + +(72) Filter [codegen id : 21] +Input [5]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107, currency_rank#108] +Condition : ((return_rank#107 <= 10) OR (currency_rank#108 <= 10)) + +(73) Project [codegen id : 21] +Output [5]: [store AS channel#109, item#103, return_ratio#104, return_rank#107, currency_rank#108] +Input [5]: [item#103, return_ratio#104, currency_ratio#105, return_rank#107, currency_rank#108] + +(74) Union + +(75) HashAggregate [codegen id : 22] +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] + +(76) Exchange +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Arguments: hashpartitioning(channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38, 5), true, [id=#110] + +(77) HashAggregate [codegen id : 23] +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Functions: [] +Aggregate Attributes: [] +Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] + +(78) TakeOrderedAndProject +Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Arguments: 100, [channel#39 ASC NULLS FIRST, return_rank#37 ASC NULLS FIRST, currency_rank#38 ASC NULLS FIRST], [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt new file mode 100644 index 000000000..c15f2394e --- /dev/null +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q49/simplified.txt @@ -0,0 +1,126 @@ +TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio] + WholeStageCodegen (23) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Exchange [channel,item,return_ratio,return_rank,currency_rank] #1 + WholeStageCodegen (22) + HashAggregate [channel,item,return_ratio,return_rank,currency_rank] + InputAdapter + Union + WholeStageCodegen (7) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (6) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (5) + Sort [return_ratio] + InputAdapter + Exchange #2 + WholeStageCodegen (4) + HashAggregate [ws_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(cast(coalesce(wr_return_quantity, 0) as bigint)),sum(cast(coalesce(ws_quantity, 0) as bigint)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ws_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [ws_item_sk] #3 + WholeStageCodegen (3) + HashAggregate [ws_item_sk,wr_return_quantity,ws_quantity,wr_return_amt,ws_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_net_paid,wr_return_quantity,wr_return_amt] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_net_paid] + Filter [ws_net_profit,ws_net_paid,ws_quantity,ws_order_number,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [wr_return_amt,wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (14) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (13) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (12) + Sort [return_ratio] + InputAdapter + Exchange #6 + WholeStageCodegen (11) + HashAggregate [cs_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(cast(coalesce(cr_return_quantity, 0) as bigint)),sum(cast(coalesce(cs_quantity, 0) as bigint)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(coalesce(cast(cs_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [cs_item_sk] #7 + WholeStageCodegen (10) + HashAggregate [cs_item_sk,cr_return_quantity,cs_quantity,cr_return_amount,cs_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [cs_item_sk,cs_quantity,cs_net_paid,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_net_paid,cr_return_quantity,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_net_paid] + Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_order_number,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (8) + Filter [cr_return_amount,cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + InputAdapter + ReusedExchange [d_date_sk] #5 + WholeStageCodegen (21) + Project [item,return_ratio,return_rank,currency_rank] + Filter [return_rank,currency_rank] + InputAdapter + Window [currency_ratio] + WholeStageCodegen (20) + Sort [currency_ratio] + InputAdapter + Window [return_ratio] + WholeStageCodegen (19) + Sort [return_ratio] + InputAdapter + Exchange #9 + WholeStageCodegen (18) + HashAggregate [ss_item_sk,sum,sum,sum,isEmpty,sum,isEmpty] [sum(cast(coalesce(sr_return_quantity, 0) as bigint)),sum(cast(coalesce(ss_quantity, 0) as bigint)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(coalesce(cast(ss_net_paid as decimal(12,2)), 0.00)),item,return_ratio,currency_ratio,sum,sum,sum,isEmpty,sum,isEmpty] + InputAdapter + Exchange [ss_item_sk] #10 + WholeStageCodegen (17) + HashAggregate [ss_item_sk,sr_return_quantity,ss_quantity,sr_return_amt,ss_net_paid] [sum,sum,sum,isEmpty,sum,isEmpty,sum,sum,sum,isEmpty,sum,isEmpty] + Project [ss_item_sk,ss_quantity,ss_net_paid,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_net_paid,sr_return_quantity,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid] + Filter [ss_net_profit,ss_net_paid,ss_quantity,ss_ticket_number,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (15) + Filter [sr_return_amt,sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + InputAdapter + ReusedExchange [d_date_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt index 73563859c..15f0cda0b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/explain.txt @@ -1,76 +1,435 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) -+- *(21) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) - +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) - +- *(20) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) - +- *(20) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] - +- Union - :- *(6) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(sales_price#13)), sum(UnscaledValue(return_amt#14)), sum(UnscaledValue(profit#15)), sum(UnscaledValue(net_loss#16))]) - : +- Exchange hashpartitioning(s_store_id#12, 5) - : +- *(5) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(sales_price#13)), partial_sum(UnscaledValue(return_amt#14)), partial_sum(UnscaledValue(profit#15)), partial_sum(UnscaledValue(net_loss#16))]) - : +- *(5) Project [sales_price#13, profit#15, return_amt#14, net_loss#16, s_store_id#12] - : +- *(5) BroadcastHashJoin [store_sk#17], [cast(s_store_sk#18 as bigint)], Inner, BuildRight - : :- *(5) Project [store_sk#17, sales_price#13, profit#15, return_amt#14, net_loss#16] - : : +- *(5) BroadcastHashJoin [date_sk#19], [cast(d_date_sk#20 as bigint)], Inner, BuildRight - : : :- Union - : : : :- *(1) Project [cast(ss_store_sk#21 as bigint) AS store_sk#17, cast(ss_sold_date_sk#22 as bigint) AS date_sk#19, ss_ext_sales_price#23 AS sales_price#13, ss_net_profit#24 AS profit#15, 0.00 AS return_amt#14, 0.00 AS net_loss#16] - : : : : +- *(1) Filter (isnotnull(cast(ss_sold_date_sk#22 as bigint)) && isnotnull(cast(ss_store_sk#21 as bigint))) - : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_store_sk#21,ss_ext_sales_price#23,ss_net_profit#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [s_store_sk#18, s_store_id#12] - : +- *(4) Filter isnotnull(s_store_sk#18) - : +- *(4) FileScan parquet default.store[s_store_sk#18,s_store_id#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - :- *(12) HashAggregate(keys=[cp_catalog_page_id#36], functions=[sum(UnscaledValue(sales_price#37)), sum(UnscaledValue(return_amt#38)), sum(UnscaledValue(profit#39)), sum(UnscaledValue(net_loss#40))]) - : +- Exchange hashpartitioning(cp_catalog_page_id#36, 5) - : +- *(11) HashAggregate(keys=[cp_catalog_page_id#36], functions=[partial_sum(UnscaledValue(sales_price#37)), partial_sum(UnscaledValue(return_amt#38)), partial_sum(UnscaledValue(profit#39)), partial_sum(UnscaledValue(net_loss#40))]) - : +- *(11) Project [sales_price#37, profit#39, return_amt#38, net_loss#40, cp_catalog_page_id#36] - : +- *(11) BroadcastHashJoin [page_sk#41], [cp_catalog_page_sk#42], Inner, BuildRight - : :- *(11) Project [page_sk#41, sales_price#37, profit#39, return_amt#38, net_loss#40] - : : +- *(11) BroadcastHashJoin [date_sk#43], [d_date_sk#20], Inner, BuildRight - : : :- Union - : : : :- *(7) Project [cs_catalog_page_sk#44 AS page_sk#41, cs_sold_date_sk#45 AS date_sk#43, cs_ext_sales_price#46 AS sales_price#37, cs_net_profit#47 AS profit#39, 0.00 AS return_amt#38, 0.00 AS net_loss#40] - : : : : +- *(7) Filter (isnotnull(cs_sold_date_sk#45) && isnotnull(cs_catalog_page_sk#44)) - : : : : +- *(7) FileScan parquet default.catalog_sales[cs_sold_date_sk#45,cs_catalog_page_sk#44,cs_ext_sales_price#46,cs_net_profit#47] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)], ReadSchema: struct= 11192)) && (d_date#35 <= 11206)) && isnotnull(d_date_sk#20)) - : : +- *(9) FileScan parquet default.date_dim[d_date_sk#20,d_date#35] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), Is..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(10) Project [cp_catalog_page_sk#42, cp_catalog_page_id#36] - : +- *(10) Filter isnotnull(cp_catalog_page_sk#42) - : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#42,cp_catalog_page_id#36] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct - +- *(19) HashAggregate(keys=[web_site_id#58], functions=[sum(UnscaledValue(sales_price#59)), sum(UnscaledValue(return_amt#60)), sum(UnscaledValue(profit#61)), sum(UnscaledValue(net_loss#62))]) - +- Exchange hashpartitioning(web_site_id#58, 5) - +- *(18) HashAggregate(keys=[web_site_id#58], functions=[partial_sum(UnscaledValue(sales_price#59)), partial_sum(UnscaledValue(return_amt#60)), partial_sum(UnscaledValue(profit#61)), partial_sum(UnscaledValue(net_loss#62))]) - +- *(18) Project [sales_price#59, profit#61, return_amt#60, net_loss#62, web_site_id#58] - +- *(18) BroadcastHashJoin [wsr_web_site_sk#63], [web_site_sk#64], Inner, BuildRight - :- *(18) Project [wsr_web_site_sk#63, sales_price#59, profit#61, return_amt#60, net_loss#62] - : +- *(18) BroadcastHashJoin [date_sk#65], [cast(d_date_sk#20 as bigint)], Inner, BuildRight - : :- Union - : : :- *(13) Project [ws_web_site_sk#66 AS wsr_web_site_sk#63, cast(ws_sold_date_sk#67 as bigint) AS date_sk#65, ws_ext_sales_price#68 AS sales_price#59, ws_net_profit#69 AS profit#61, 0.00 AS return_amt#60, 0.00 AS net_loss#62] - : : : +- *(13) Filter (isnotnull(cast(ws_sold_date_sk#67 as bigint)) && isnotnull(ws_web_site_sk#66)) - : : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#67,ws_web_site_sk#66,ws_ext_sales_price#68,ws_net_profit#69] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_web_site_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(17) Project [web_site_sk#64, web_site_id#58] - +- *(17) Filter isnotnull(web_site_sk#64) - +- *(17) FileScan parquet default.web_site[web_site_sk#64,web_site_id#58] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (78) ++- * HashAggregate (77) + +- Exchange (76) + +- * HashAggregate (75) + +- * Expand (74) + +- Union (73) + :- * HashAggregate (25) + : +- Exchange (24) + : +- * HashAggregate (23) + : +- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- Union (9) + : : : :- * Project (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- * Project (8) + : : : +- * Filter (7) + : : : +- * ColumnarToRow (6) + : : : +- Scan parquet default.store_returns (5) + : : +- BroadcastExchange (14) + : : +- * Project (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.date_dim (10) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.store (17) + :- * HashAggregate (46) + : +- Exchange (45) + : +- * HashAggregate (44) + : +- * Project (43) + : +- * BroadcastHashJoin Inner BuildRight (42) + : :- * Project (37) + : : +- * BroadcastHashJoin Inner BuildRight (36) + : : :- Union (34) + : : : :- * Project (29) + : : : : +- * Filter (28) + : : : : +- * ColumnarToRow (27) + : : : : +- Scan parquet default.catalog_sales (26) + : : : +- * Project (33) + : : : +- * Filter (32) + : : : +- * ColumnarToRow (31) + : : : +- Scan parquet default.catalog_returns (30) + : : +- ReusedExchange (35) + : +- BroadcastExchange (41) + : +- * Filter (40) + : +- * ColumnarToRow (39) + : +- Scan parquet default.catalog_page (38) + +- * HashAggregate (72) + +- Exchange (71) + +- * HashAggregate (70) + +- * Project (69) + +- * BroadcastHashJoin Inner BuildRight (68) + :- * Project (63) + : +- * BroadcastHashJoin Inner BuildRight (62) + : :- Union (60) + : : :- * Project (50) + : : : +- * Filter (49) + : : : +- * ColumnarToRow (48) + : : : +- Scan parquet default.web_sales (47) + : : +- * Project (59) + : : +- * BroadcastHashJoin Inner BuildRight (58) + : : :- * Filter (53) + : : : +- * ColumnarToRow (52) + : : : +- Scan parquet default.web_returns (51) + : : +- BroadcastExchange (57) + : : +- * Filter (56) + : : +- * ColumnarToRow (55) + : : +- Scan parquet default.web_sales (54) + : +- ReusedExchange (61) + +- BroadcastExchange (67) + +- * Filter (66) + +- * ColumnarToRow (65) + +- Scan parquet default.web_site (64) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(3) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Condition : (isnotnull(cast(ss_sold_date_sk#1 as bigint)) AND isnotnull(cast(ss_store_sk#2 as bigint))) + +(4) Project [codegen id : 1] +Output [6]: [cast(ss_store_sk#2 as bigint) AS store_sk#5, cast(ss_sold_date_sk#1 as bigint) AS date_sk#6, ss_ext_sales_price#3 AS sales_price#7, ss_net_profit#4 AS profit#8, 0.00 AS return_amt#9, 0.00 AS net_loss#10] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(5) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] + +(7) Filter [codegen id : 2] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] +Condition : (isnotnull(sr_returned_date_sk#11) AND isnotnull(sr_store_sk#12)) + +(8) Project [codegen id : 2] +Output [6]: [sr_store_sk#12 AS store_sk#15, sr_returned_date_sk#11 AS date_sk#16, 0.00 AS sales_price#17, 0.00 AS profit#18, sr_return_amt#13 AS return_amt#19, sr_net_loss#14 AS net_loss#20] +Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_loss#14] + +(9) Union + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#21, d_date#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#21, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 11192)) AND (d_date#22 <= 11206)) AND isnotnull(d_date_sk#21)) + +(13) Project [codegen id : 3] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_date#22] + +(14) BroadcastExchange +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(15) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [date_sk#6] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(16) Project [codegen id : 5] +Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10] +Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#24, s_store_id#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [2]: [s_store_sk#24, s_store_id#25] + +(19) Filter [codegen id : 4] +Input [2]: [s_store_sk#24, s_store_id#25] +Condition : isnotnull(s_store_sk#24) + +(20) BroadcastExchange +Input [2]: [s_store_sk#24, s_store_id#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [store_sk#5] +Right keys [1]: [cast(s_store_sk#24 as bigint)] +Join condition: None + +(22) Project [codegen id : 5] +Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] +Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25] + +(23) HashAggregate [codegen id : 5] +Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25] +Keys [1]: [s_store_id#25] +Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30] +Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] + +(24) Exchange +Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35] + +(25) HashAggregate [codegen id : 6] +Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34] +Keys [1]: [s_store_id#25] +Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#25) AS id#44] + +(26) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 7] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] + +(28) Filter [codegen id : 7] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] +Condition : (isnotnull(cs_sold_date_sk#45) AND isnotnull(cs_catalog_page_sk#46)) + +(29) Project [codegen id : 7] +Output [6]: [cs_catalog_page_sk#46 AS page_sk#49, cs_sold_date_sk#45 AS date_sk#50, cs_ext_sales_price#47 AS sales_price#51, cs_net_profit#48 AS profit#52, 0.00 AS return_amt#53, 0.00 AS net_loss#54] +Input [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48] + +(30) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_catalog_page_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 8] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] + +(32) Filter [codegen id : 8] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] +Condition : (isnotnull(cr_returned_date_sk#55) AND isnotnull(cr_catalog_page_sk#56)) + +(33) Project [codegen id : 8] +Output [6]: [cr_catalog_page_sk#56 AS page_sk#59, cr_returned_date_sk#55 AS date_sk#60, 0.00 AS sales_price#61, 0.00 AS profit#62, cr_return_amount#57 AS return_amt#63, cr_net_loss#58 AS net_loss#64] +Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57, cr_net_loss#58] + +(34) Union + +(35) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#21] + +(36) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#50] +Right keys [1]: [d_date_sk#21] +Join condition: None + +(37) Project [codegen id : 11] +Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54] +Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21] + +(38) Scan parquet default.catalog_page +Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(40) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Condition : isnotnull(cp_catalog_page_sk#65) + +(41) BroadcastExchange +Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] + +(42) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [page_sk#49] +Right keys [1]: [cp_catalog_page_sk#65] +Join condition: None + +(43) Project [codegen id : 11] +Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66] + +(44) HashAggregate [codegen id : 11] +Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66] +Keys [1]: [cp_catalog_page_id#66] +Functions [4]: [partial_sum(UnscaledValue(sales_price#51)), partial_sum(UnscaledValue(return_amt#53)), partial_sum(UnscaledValue(profit#52)), partial_sum(UnscaledValue(net_loss#54))] +Aggregate Attributes [4]: [sum#68, sum#69, sum#70, sum#71] +Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] + +(45) Exchange +Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76] + +(46) HashAggregate [codegen id : 12] +Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Keys [1]: [cp_catalog_page_id#66] +Functions [4]: [sum(UnscaledValue(sales_price#51)), sum(UnscaledValue(return_amt#53)), sum(UnscaledValue(profit#52)), sum(UnscaledValue(net_loss#54))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#51))#77, sum(UnscaledValue(return_amt#53))#78, sum(UnscaledValue(profit#52))#79, sum(UnscaledValue(net_loss#54))#80] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS RETURNS#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85] + +(47) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 13] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] + +(49) Filter [codegen id : 13] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] +Condition : (isnotnull(cast(ws_sold_date_sk#86 as bigint)) AND isnotnull(ws_web_site_sk#87)) + +(50) Project [codegen id : 13] +Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#90, cast(ws_sold_date_sk#86 as bigint) AS date_sk#91, ws_ext_sales_price#88 AS sales_price#92, ws_net_profit#89 AS profit#93, 0.00 AS return_amt#94, 0.00 AS net_loss#95] +Input [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88, ws_net_profit#89] + +(51) Scan parquet default.web_returns +Output [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 15] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] + +(53) Filter [codegen id : 15] +Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100] +Condition : isnotnull(wr_returned_date_sk#96) + +(54) Scan parquet default.web_sales +Output [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 14] +Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] + +(56) Filter [codegen id : 14] +Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] +Condition : ((isnotnull(ws_item_sk#101) AND isnotnull(ws_order_number#102)) AND isnotnull(ws_web_site_sk#87)) + +(57) BroadcastExchange +Input [3]: [ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint), cast(input[2, int, false] as bigint)),false), [id=#103] + +(58) BroadcastHashJoin [codegen id : 15] +Left keys [2]: [wr_item_sk#97, wr_order_number#98] +Right keys [2]: [cast(ws_item_sk#101 as bigint), cast(ws_order_number#102 as bigint)] +Join condition: None + +(59) Project [codegen id : 15] +Output [6]: [ws_web_site_sk#87 AS wsr_web_site_sk#104, wr_returned_date_sk#96 AS date_sk#105, 0.00 AS sales_price#106, 0.00 AS profit#107, wr_return_amt#99 AS return_amt#108, wr_net_loss#100 AS net_loss#109] +Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100, ws_item_sk#101, ws_web_site_sk#87, ws_order_number#102] + +(60) Union + +(61) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#21] + +(62) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [date_sk#91] +Right keys [1]: [cast(d_date_sk#21 as bigint)] +Join condition: None + +(63) Project [codegen id : 18] +Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] +Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21] + +(64) Scan parquet default.web_site +Output [2]: [web_site_sk#110, web_site_id#111] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 17] +Input [2]: [web_site_sk#110, web_site_id#111] + +(66) Filter [codegen id : 17] +Input [2]: [web_site_sk#110, web_site_id#111] +Condition : isnotnull(web_site_sk#110) + +(67) BroadcastExchange +Input [2]: [web_site_sk#110, web_site_id#111] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#112] + +(68) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [wsr_web_site_sk#90] +Right keys [1]: [web_site_sk#110] +Join condition: None + +(69) Project [codegen id : 18] +Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#111] +Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#110, web_site_id#111] + +(70) HashAggregate [codegen id : 18] +Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#111] +Keys [1]: [web_site_id#111] +Functions [4]: [partial_sum(UnscaledValue(sales_price#92)), partial_sum(UnscaledValue(return_amt#94)), partial_sum(UnscaledValue(profit#93)), partial_sum(UnscaledValue(net_loss#95))] +Aggregate Attributes [4]: [sum#113, sum#114, sum#115, sum#116] +Results [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] + +(71) Exchange +Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] +Arguments: hashpartitioning(web_site_id#111, 5), true, [id=#121] + +(72) HashAggregate [codegen id : 19] +Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120] +Keys [1]: [web_site_id#111] +Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#122, sum(UnscaledValue(return_amt#94))#123, sum(UnscaledValue(profit#93))#124, sum(UnscaledValue(net_loss#95))#125] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#122,17,2) AS sales#126, MakeDecimal(sum(UnscaledValue(return_amt#94))#123,17,2) AS RETURNS#127, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#124,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#125,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#128, web channel AS channel#129, concat(web_site, web_site_id#111) AS id#130] + +(73) Union + +(74) Expand [codegen id : 20] +Input [5]: [sales#40, RETURNS#41, profit#42, channel#43, id#44] +Arguments: [List(sales#40, returns#41, profit#42, channel#43, id#44, 0), List(sales#40, returns#41, profit#42, channel#43, null, 1), List(sales#40, returns#41, profit#42, null, null, 3)], [sales#40, returns#41, profit#42, channel#131, id#132, spark_grouping_id#133] + +(75) HashAggregate [codegen id : 20] +Input [6]: [sales#40, returns#41, profit#42, channel#131, id#132, spark_grouping_id#133] +Keys [3]: [channel#131, id#132, spark_grouping_id#133] +Functions [3]: [partial_sum(sales#40), partial_sum(returns#41), partial_sum(profit#42)] +Aggregate Attributes [6]: [sum#134, isEmpty#135, sum#136, isEmpty#137, sum#138, isEmpty#139] +Results [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] + +(76) Exchange +Input [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Arguments: hashpartitioning(channel#131, id#132, spark_grouping_id#133, 5), true, [id=#146] + +(77) HashAggregate [codegen id : 21] +Input [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Keys [3]: [channel#131, id#132, spark_grouping_id#133] +Functions [3]: [sum(sales#40), sum(returns#41), sum(profit#42)] +Aggregate Attributes [3]: [sum(sales#40)#147, sum(returns#41)#148, sum(profit#42)#149] +Results [5]: [channel#131, id#132, sum(sales#40)#147 AS sales#150, sum(returns#41)#148 AS returns#151, sum(profit#42)#149 AS profit#152] + +(78) TakeOrderedAndProject +Input [5]: [channel#131, id#132, sales#150, returns#151, profit#152] +Arguments: 100, [channel#131 ASC NULLS FIRST, id#132 ASC NULLS FIRST], [channel#131, id#132, sales#150, returns#151, profit#152] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt index 257c8efa6..9b7cc3360 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q5/simplified.txt @@ -1,110 +1,123 @@ -TakeOrderedAndProject [channel,id,profit,returns,sales] - WholeStageCodegen - HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (21) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter Exchange [channel,id,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Expand [channel,id,profit,returns,sales] + WholeStageCodegen (20) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] InputAdapter Union - WholeStageCodegen - HashAggregate [s_store_id,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + WholeStageCodegen (6) + HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum] InputAdapter Exchange [s_store_id] #2 - WholeStageCodegen - HashAggregate [net_loss,profit,return_amt,s_store_id,sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [net_loss,profit,return_amt,s_store_id,sales_price] - BroadcastHashJoin [s_store_sk,store_sk] - Project [net_loss,profit,return_amt,sales_price,store_sk] - BroadcastHashJoin [d_date_sk,date_sk] + WholeStageCodegen (5) + HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,s_store_id] + BroadcastHashJoin [store_sk,s_store_sk] + Project [store_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] InputAdapter Union - WholeStageCodegen - Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (1) + Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit] Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] - WholeStageCodegen - Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] + WholeStageCodegen (2) + Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss] Filter [sr_returned_date_sk,sr_store_sk] - Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [s_store_id,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] - WholeStageCodegen - HashAggregate [cp_catalog_page_id,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + WholeStageCodegen (4) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] + WholeStageCodegen (12) + HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum] InputAdapter Exchange [cp_catalog_page_id] #5 - WholeStageCodegen - HashAggregate [cp_catalog_page_id,net_loss,profit,return_amt,sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [cp_catalog_page_id,net_loss,profit,return_amt,sales_price] - BroadcastHashJoin [cp_catalog_page_sk,page_sk] - Project [net_loss,page_sk,profit,return_amt,sales_price] - BroadcastHashJoin [d_date_sk,date_sk] + WholeStageCodegen (11) + HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id] + BroadcastHashJoin [page_sk,cp_catalog_page_sk] + Project [page_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] InputAdapter Union - WholeStageCodegen - Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] - Filter [cs_catalog_page_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] - WholeStageCodegen - Project [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] - Filter [cr_catalog_page_sk,cr_returned_date_sk] - Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] [cr_catalog_page_sk,cr_net_loss,cr_return_amount,cr_returned_date_sk] + WholeStageCodegen (7) + Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit] + Filter [cs_sold_date_sk,cs_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit] + WholeStageCodegen (8) + Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss] + Filter [cr_returned_date_sk,cr_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss] InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ReusedExchange [d_date_sk] #3 InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [cp_catalog_page_id,cp_catalog_page_sk] - Filter [cp_catalog_page_sk] - Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] [cp_catalog_page_id,cp_catalog_page_sk] - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price)),web_site_id] [RETURNS,channel,id,profit,sales,sum,sum,sum,sum,sum(UnscaledValue(net_loss)),sum(UnscaledValue(profit)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(sales_price))] + BroadcastExchange #6 + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] + WholeStageCodegen (19) + HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum] InputAdapter - Exchange [web_site_id] #8 - WholeStageCodegen - HashAggregate [net_loss,profit,return_amt,sales_price,sum,sum,sum,sum,sum,sum,sum,sum,web_site_id] [sum,sum,sum,sum,sum,sum,sum,sum] - Project [net_loss,profit,return_amt,sales_price,web_site_id] - BroadcastHashJoin [web_site_sk,wsr_web_site_sk] - Project [net_loss,profit,return_amt,sales_price,wsr_web_site_sk] - BroadcastHashJoin [d_date_sk,date_sk] + Exchange [web_site_id] #7 + WholeStageCodegen (18) + HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum] + Project [sales_price,profit,return_amt,net_loss,web_site_id] + BroadcastHashJoin [wsr_web_site_sk,web_site_sk] + Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss] + BroadcastHashJoin [date_sk,d_date_sk] InputAdapter Union - WholeStageCodegen - Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] + WholeStageCodegen (13) + Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit] Filter [ws_sold_date_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_site_sk] - WholeStageCodegen - Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit] + WholeStageCodegen (15) + Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss] BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] - Filter [wr_returned_date_sk] - Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt,wr_returned_date_sk] + Filter [wr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [ws_item_sk,ws_order_number,ws_web_site_sk] - Filter [ws_item_sk,ws_order_number,ws_web_site_sk] - Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_web_site_sk] [ws_item_sk,ws_order_number,ws_web_site_sk] + BroadcastExchange #8 + WholeStageCodegen (14) + Filter [ws_item_sk,ws_order_number,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter - BroadcastExchange #10 - WholeStageCodegen - Project [web_site_id,web_site_sk] - Filter [web_site_sk] - Scan parquet default.web_site [web_site_id,web_site_sk] [web_site_id,web_site_sk] + BroadcastExchange #9 + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_site_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt index 5b8bf2898..e083affa7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/explain.txt @@ -1,32 +1,183 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,s_company_id#2 ASC NULLS FIRST,s_street_number#3 ASC NULLS FIRST,s_street_name#4 ASC NULLS FIRST,s_street_type#5 ASC NULLS FIRST,s_suite_number#6 ASC NULLS FIRST,s_city#7 ASC NULLS FIRST,s_county#8 ASC NULLS FIRST,s_state#9 ASC NULLS FIRST,s_zip#10 ASC NULLS FIRST], output=[s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10,30 days #11,31 - 60 days #12,61 - 90 days #13,91 - 120 days #14,>120 days #15]) -+- *(6) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10, 5) - +- *(5) HashAggregate(keys=[s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10], functions=[partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 30) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 60) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 90) && ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#16 - cast(ss_sold_date_sk#17 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]) - +- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] - +- *(5) BroadcastHashJoin [sr_returned_date_sk#16], [cast(d_date_sk#18 as bigint)], Inner, BuildRight - :- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] - : +- *(5) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#19], Inner, BuildRight - : :- *(5) Project [ss_sold_date_sk#17, sr_returned_date_sk#16, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] - : : +- *(5) BroadcastHashJoin [ss_store_sk#20], [s_store_sk#21], Inner, BuildRight - : : :- *(5) Project [ss_sold_date_sk#17, ss_store_sk#20, sr_returned_date_sk#16] - : : : +- *(5) BroadcastHashJoin [cast(ss_ticket_number#22 as bigint), cast(ss_item_sk#23 as bigint), cast(ss_customer_sk#24 as bigint)], [sr_ticket_number#25, sr_item_sk#26, sr_customer_sk#27], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#17, ss_item_sk#23, ss_customer_sk#24, ss_store_sk#20, ss_ticket_number#22] - : : : : +- *(5) Filter ((((isnotnull(ss_ticket_number#22) && isnotnull(ss_item_sk#23)) && isnotnull(ss_customer_sk#24)) && isnotnull(ss_store_sk#20)) && isnotnull(ss_sold_date_sk#17)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_item_sk#23,ss_customer_sk#24,ss_store_sk#20,ss_ticket_number#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_stor..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[3, bigint, true], input[1, bigint, true], input[2, bigint, true])) - : : : +- *(1) Project [sr_returned_date_sk#16, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#25] - : : : +- *(1) Filter (((isnotnull(sr_ticket_number#25) && isnotnull(sr_customer_sk#27)) && isnotnull(sr_item_sk#26)) && isnotnull(sr_returned_date_sk#16)) - : : : +- *(1) FileScan parquet default.store_returns[sr_returned_date_sk#16,sr_item_sk#26,sr_customer_sk#27,sr_ticket_number#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_retu..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [s_store_sk#21, s_store_name#1, s_company_id#2, s_street_number#3, s_street_name#4, s_street_type#5, s_suite_number#6, s_city#7, s_county#8, s_state#9, s_zip#10] - : : +- *(2) Filter isnotnull(s_store_sk#21) - : : +- *(2) FileScan parquet default.store[s_store_sk#21,s_store_name#1,s_company_id#2,s_street_number#3,s_street_name#4,s_street_type#5,s_suite_number#6,s_city#7,s_county#8,s_state#9,s_zip#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [d_date_sk#18] - +- *(4) Filter ((((isnotnull(d_year#28) && isnotnull(d_moy#29)) && (d_year#28 = 2001)) && (d_moy#29 = 8)) && isnotnull(d_date_sk#18)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#18,d_year#28,d_moy#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.store_returns (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.store (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.date_dim (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet default.date_dim (22) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 5] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : ((((isnotnull(ss_ticket_number#5) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk), IsNotNull(sr_customer_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] + +(6) Filter [codegen id : 1] +Input [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] +Condition : (((isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#7)) AND isnotnull(sr_customer_sk#8)) AND isnotnull(sr_returned_date_sk#6)) + +(7) BroadcastExchange +Input [4]: [sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] +Arguments: HashedRelationBroadcastMode(List(input[3, bigint, false], input[1, bigint, false], input[2, bigint, false]),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [3]: [cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint)] +Right keys [3]: [sr_ticket_number#9, sr_item_sk#7, sr_customer_sk#8] +Join condition: None + +(9) Project [codegen id : 5] +Output [3]: [ss_sold_date_sk#1, ss_store_sk#4, sr_returned_date_sk#6] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, sr_returned_date_sk#6, sr_item_sk#7, sr_customer_sk#8, sr_ticket_number#9] + +(10) Scan parquet default.store +Output [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] + +(12) Filter [codegen id : 2] +Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Condition : isnotnull(s_store_sk#11) + +(13) BroadcastExchange +Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#11] +Join condition: None + +(15) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Input [14]: [ss_sold_date_sk#1, ss_store_sk#4, sr_returned_date_sk#6, s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] + +(16) Scan parquet default.date_dim +Output [1]: [d_date_sk#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [1]: [d_date_sk#23] + +(18) Filter [codegen id : 3] +Input [1]: [d_date_sk#23] +Condition : isnotnull(d_date_sk#23) + +(19) BroadcastExchange +Input [1]: [d_date_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#24] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#23] +Join condition: None + +(21) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Input [13]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, d_date_sk#23] + +(22) Scan parquet default.date_dim +Output [3]: [d_date_sk#25, d_year#26, d_moy#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#25, d_year#26, d_moy#27] + +(24) Filter [codegen id : 4] +Input [3]: [d_date_sk#25, d_year#26, d_moy#27] +Condition : ((((isnotnull(d_year#26) AND isnotnull(d_moy#27)) AND (d_year#26 = 2001)) AND (d_moy#27 = 8)) AND isnotnull(d_date_sk#25)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#25] +Input [3]: [d_date_sk#25, d_year#26, d_moy#27] + +(26) BroadcastExchange +Input [1]: [d_date_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_returned_date_sk#6] +Right keys [1]: [cast(d_date_sk#25 as bigint)] +Join condition: None + +(28) Project [codegen id : 5] +Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Input [13]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, d_date_sk#25] + +(29) HashAggregate [codegen id : 5] +Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum#29, sum#30, sum#31, sum#32, sum#33] +Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] + +(30) Exchange +Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] +Arguments: hashpartitioning(s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 5), true, [id=#39] + +(31) HashAggregate [codegen id : 6] +Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38] +Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#40, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#44] +Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#40 AS 30 days #45, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#41 AS 31 - 60 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#42 AS 61 - 90 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#43 AS 91 - 120 days #48, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#44 AS >120 days #49] + +(32) TakeOrderedAndProject +Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 30 days #45, 31 - 60 days #46, 61 - 90 days #47, 91 - 120 days #48, >120 days #49] +Arguments: 100, [s_store_name#12 ASC NULLS FIRST, s_company_id#13 ASC NULLS FIRST, s_street_number#14 ASC NULLS FIRST, s_street_name#15 ASC NULLS FIRST, s_street_type#16 ASC NULLS FIRST, s_suite_number#17 ASC NULLS FIRST, s_city#18 ASC NULLS FIRST, s_county#19 ASC NULLS FIRST, s_state#20 ASC NULLS FIRST, s_zip#21 ASC NULLS FIRST], [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 30 days #45, 31 - 60 days #46, 61 - 90 days #47, 91 - 120 days #48, >120 days #49] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt index c3f728d0a..43e777385 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q50/simplified.txt @@ -1,42 +1,48 @@ -TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] - WholeStageCodegen - HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) && ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint))] +TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (6) + HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] InputAdapter - Exchange [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] #1 - WholeStageCodegen - HashAggregate [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [s_city,s_company_id,s_county,s_state,s_store_name,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip,sr_returned_date_sk,ss_sold_date_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [sr_returned_date_sk,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [sr_customer_sk,sr_item_sk,sr_ticket_number,ss_customer_sk,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Exchange [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #1 + WholeStageCodegen (5) + HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sr_returned_date_sk,ss_sold_date_sk] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,sr_returned_date_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_sold_date_sk,ss_store_sk,sr_returned_date_sk] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,ss_customer_sk,sr_ticket_number,sr_item_sk,sr_customer_sk] + Filter [ss_ticket_number,ss_item_sk,ss_customer_sk,ss_store_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] - Filter [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] [sr_customer_sk,sr_item_sk,sr_returned_date_sk,sr_ticket_number] + WholeStageCodegen (1) + Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] - Filter [s_store_sk] - Scan parquet default.store [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] [s_city,s_company_id,s_county,s_state,s_store_name,s_store_sk,s_street_name,s_street_number,s_street_type,s_suite_number,s_zip] + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk] [d_date_sk] + WholeStageCodegen (3) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt index 38a4ec309..b391d90a3 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/explain.txt @@ -1,41 +1,228 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[item_sk#1 ASC NULLS FIRST,d_date#2 ASC NULLS FIRST], output=[item_sk#1,d_date#2,web_sales#3,store_sales#4,web_cumulative#5,store_cumulative#6]) -+- *(15) Filter ((isnotnull(web_cumulative#5) && isnotnull(store_cumulative#6)) && (web_cumulative#5 > store_cumulative#6)) - +- Window [max(web_sales#3) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#5, max(store_sales#4) windowspecdefinition(item_sk#1, d_date#2 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#6], [item_sk#1], [d_date#2 ASC NULLS FIRST] - +- *(14) Sort [item_sk#1 ASC NULLS FIRST, d_date#2 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(item_sk#1, 5) - +- *(13) Project [CASE WHEN isnotnull(item_sk#7) THEN item_sk#7 ELSE item_sk#8 END AS item_sk#1, CASE WHEN isnotnull(d_date#9) THEN d_date#9 ELSE d_date#10 END AS d_date#2, cume_sales#11 AS web_sales#3, cume_sales#12 AS store_sales#4] - +- SortMergeJoin [item_sk#7, d_date#9], [item_sk#8, d_date#10], FullOuter - :- *(6) Sort [item_sk#7 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(item_sk#7, d_date#9, 5) - : +- *(5) Project [item_sk#7, d_date#9, cume_sales#11] - : +- Window [sum(_w0#13) windowspecdefinition(ws_item_sk#14, d_date#9 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#11], [ws_item_sk#14], [d_date#9 ASC NULLS FIRST] - : +- *(4) Sort [ws_item_sk#14 ASC NULLS FIRST, d_date#9 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(ws_item_sk#14, 5) - : +- *(3) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[sum(UnscaledValue(ws_sales_price#15))]) - : +- Exchange hashpartitioning(ws_item_sk#14, d_date#9, 5) - : +- *(2) HashAggregate(keys=[ws_item_sk#14, d_date#9], functions=[partial_sum(UnscaledValue(ws_sales_price#15))]) - : +- *(2) Project [ws_item_sk#14, ws_sales_price#15, d_date#9] - : +- *(2) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight - : :- *(2) Project [ws_sold_date_sk#16, ws_item_sk#14, ws_sales_price#15] - : : +- *(2) Filter (isnotnull(ws_item_sk#14) && isnotnull(ws_sold_date_sk#16)) - : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_item_sk#14,ws_sales_price#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [d_date_sk#17, d_date#9] - : +- *(1) Filter (((isnotnull(d_month_seq#18) && (d_month_seq#18 >= 1200)) && (d_month_seq#18 <= 1211)) && isnotnull(d_date_sk#17)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#17,d_date#9,d_month_seq#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - +- *(12) Sort [item_sk#8 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(item_sk#8, d_date#10, 5) - +- *(11) Project [item_sk#8, d_date#10, cume_sales#12] - +- Window [sum(_w0#19) windowspecdefinition(ss_item_sk#20, d_date#10 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#12], [ss_item_sk#20], [d_date#10 ASC NULLS FIRST] - +- *(10) Sort [ss_item_sk#20 ASC NULLS FIRST, d_date#10 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(ss_item_sk#20, 5) - +- *(9) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[sum(UnscaledValue(ss_sales_price#21))]) - +- Exchange hashpartitioning(ss_item_sk#20, d_date#10, 5) - +- *(8) HashAggregate(keys=[ss_item_sk#20, d_date#10], functions=[partial_sum(UnscaledValue(ss_sales_price#21))]) - +- *(8) Project [ss_item_sk#20, ss_sales_price#21, d_date#10] - +- *(8) BroadcastHashJoin [ss_sold_date_sk#22], [d_date_sk#23], Inner, BuildRight - :- *(8) Project [ss_sold_date_sk#22, ss_item_sk#20, ss_sales_price#21] - : +- *(8) Filter (isnotnull(ss_item_sk#20) && isnotnull(ss_sold_date_sk#22)) - : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#22,ss_item_sk#20,ss_sales_price#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#23, d_date#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (41) ++- * Filter (40) + +- Window (39) + +- * Sort (38) + +- Exchange (37) + +- * Project (36) + +- SortMergeJoin FullOuter (35) + :- * Sort (19) + : +- Exchange (18) + : +- * Project (17) + : +- Window (16) + : +- * Sort (15) + : +- Exchange (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (34) + +- Exchange (33) + +- * Project (32) + +- Window (31) + +- * Sort (30) + +- Exchange (29) + +- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Filter (22) + : +- * ColumnarToRow (21) + : +- Scan parquet default.store_sales (20) + +- ReusedExchange (23) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3] +Condition : (isnotnull(ws_item_sk#2) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] +Condition : (((isnotnull(d_month_seq#6) AND (d_month_seq#6 >= 1200)) AND (d_month_seq#6 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#4, d_date#5] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#6] + +(8) BroadcastExchange +Input [2]: [d_date_sk#4, d_date#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] +Input [5]: [ws_sold_date_sk#1, ws_item_sk#2, ws_sales_price#3, d_date_sk#4, d_date#5] + +(11) HashAggregate [codegen id : 2] +Input [3]: [ws_item_sk#2, ws_sales_price#3, d_date#5] +Keys [2]: [ws_item_sk#2, d_date#5] +Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#3))] +Aggregate Attributes [1]: [sum#8] +Results [3]: [ws_item_sk#2, d_date#5, sum#9] + +(12) Exchange +Input [3]: [ws_item_sk#2, d_date#5, sum#9] +Arguments: hashpartitioning(ws_item_sk#2, d_date#5, 5), true, [id=#10] + +(13) HashAggregate [codegen id : 3] +Input [3]: [ws_item_sk#2, d_date#5, sum#9] +Keys [2]: [ws_item_sk#2, d_date#5] +Functions [1]: [sum(UnscaledValue(ws_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#3))#11] +Results [4]: [ws_item_sk#2 AS item_sk#12, d_date#5, MakeDecimal(sum(UnscaledValue(ws_sales_price#3))#11,17,2) AS _w0#13, ws_item_sk#2] + +(14) Exchange +Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#14] + +(15) Sort [codegen id : 4] +Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] +Arguments: [ws_item_sk#2 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(16) Window +Input [4]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2] +Arguments: [sum(_w0#13) windowspecdefinition(ws_item_sk#2, d_date#5 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#15], [ws_item_sk#2], [d_date#5 ASC NULLS FIRST] + +(17) Project [codegen id : 5] +Output [3]: [item_sk#12, d_date#5, cume_sales#15] +Input [5]: [item_sk#12, d_date#5, _w0#13, ws_item_sk#2, cume_sales#15] + +(18) Exchange +Input [3]: [item_sk#12, d_date#5, cume_sales#15] +Arguments: hashpartitioning(item_sk#12, d_date#5, 5), true, [id=#16] + +(19) Sort [codegen id : 6] +Input [3]: [item_sk#12, d_date#5, cume_sales#15] +Arguments: [item_sk#12 ASC NULLS FIRST, d_date#5 ASC NULLS FIRST], false, 0 + +(20) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 8] +Input [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] + +(22) Filter [codegen id : 8] +Input [3]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19] +Condition : (isnotnull(ss_item_sk#18) AND isnotnull(ss_sold_date_sk#17)) + +(23) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#20, d_date#21] + +(24) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#17] +Right keys [1]: [d_date_sk#20] +Join condition: None + +(25) Project [codegen id : 8] +Output [3]: [ss_item_sk#18, ss_sales_price#19, d_date#21] +Input [5]: [ss_sold_date_sk#17, ss_item_sk#18, ss_sales_price#19, d_date_sk#20, d_date#21] + +(26) HashAggregate [codegen id : 8] +Input [3]: [ss_item_sk#18, ss_sales_price#19, d_date#21] +Keys [2]: [ss_item_sk#18, d_date#21] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#19))] +Aggregate Attributes [1]: [sum#22] +Results [3]: [ss_item_sk#18, d_date#21, sum#23] + +(27) Exchange +Input [3]: [ss_item_sk#18, d_date#21, sum#23] +Arguments: hashpartitioning(ss_item_sk#18, d_date#21, 5), true, [id=#24] + +(28) HashAggregate [codegen id : 9] +Input [3]: [ss_item_sk#18, d_date#21, sum#23] +Keys [2]: [ss_item_sk#18, d_date#21] +Functions [1]: [sum(UnscaledValue(ss_sales_price#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#19))#25] +Results [4]: [ss_item_sk#18 AS item_sk#26, d_date#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#19))#25,17,2) AS _w0#27, ss_item_sk#18] + +(29) Exchange +Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] +Arguments: hashpartitioning(ss_item_sk#18, 5), true, [id=#28] + +(30) Sort [codegen id : 10] +Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] +Arguments: [ss_item_sk#18 ASC NULLS FIRST, d_date#21 ASC NULLS FIRST], false, 0 + +(31) Window +Input [4]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18] +Arguments: [sum(_w0#27) windowspecdefinition(ss_item_sk#18, d_date#21 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#29], [ss_item_sk#18], [d_date#21 ASC NULLS FIRST] + +(32) Project [codegen id : 11] +Output [3]: [item_sk#26, d_date#21, cume_sales#29] +Input [5]: [item_sk#26, d_date#21, _w0#27, ss_item_sk#18, cume_sales#29] + +(33) Exchange +Input [3]: [item_sk#26, d_date#21, cume_sales#29] +Arguments: hashpartitioning(item_sk#26, d_date#21, 5), true, [id=#30] + +(34) Sort [codegen id : 12] +Input [3]: [item_sk#26, d_date#21, cume_sales#29] +Arguments: [item_sk#26 ASC NULLS FIRST, d_date#21 ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin +Left keys [2]: [item_sk#12, d_date#5] +Right keys [2]: [item_sk#26, d_date#21] +Join condition: None + +(36) Project [codegen id : 13] +Output [4]: [CASE WHEN isnotnull(item_sk#12) THEN item_sk#12 ELSE item_sk#26 END AS item_sk#31, CASE WHEN isnotnull(d_date#5) THEN d_date#5 ELSE d_date#21 END AS d_date#32, cume_sales#15 AS web_sales#33, cume_sales#29 AS store_sales#34] +Input [6]: [item_sk#12, d_date#5, cume_sales#15, item_sk#26, d_date#21, cume_sales#29] + +(37) Exchange +Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] +Arguments: hashpartitioning(item_sk#31, 5), true, [id=#35] + +(38) Sort [codegen id : 14] +Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] +Arguments: [item_sk#31 ASC NULLS FIRST, d_date#32 ASC NULLS FIRST], false, 0 + +(39) Window +Input [4]: [item_sk#31, d_date#32, web_sales#33, store_sales#34] +Arguments: [max(web_sales#33) windowspecdefinition(item_sk#31, d_date#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#36, max(store_sales#34) windowspecdefinition(item_sk#31, d_date#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#37], [item_sk#31], [d_date#32 ASC NULLS FIRST] + +(40) Filter [codegen id : 15] +Input [6]: [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] +Condition : ((isnotnull(web_cumulative#36) AND isnotnull(store_cumulative#37)) AND (web_cumulative#36 > store_cumulative#37)) + +(41) TakeOrderedAndProject +Input [6]: [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] +Arguments: 100, [item_sk#31 ASC NULLS FIRST, d_date#32 ASC NULLS FIRST], [item_sk#31, d_date#32, web_sales#33, store_sales#34, web_cumulative#36, store_cumulative#37] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt index 2b126d38a..17f35514f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q51/simplified.txt @@ -1,67 +1,71 @@ -TakeOrderedAndProject [d_date,item_sk,store_cumulative,store_sales,web_cumulative,web_sales] - WholeStageCodegen - Filter [store_cumulative,web_cumulative] +TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store_cumulative] + WholeStageCodegen (15) + Filter [web_cumulative,store_cumulative] InputAdapter - Window [d_date,item_sk,store_sales,web_sales] - WholeStageCodegen - Sort [d_date,item_sk] + Window [web_sales,item_sk,d_date,store_sales] + WholeStageCodegen (14) + Sort [item_sk,d_date] InputAdapter Exchange [item_sk] #1 - WholeStageCodegen - Project [cume_sales,cume_sales,d_date,d_date,item_sk,item_sk] + WholeStageCodegen (13) + Project [item_sk,item_sk,d_date,d_date,cume_sales,cume_sales] InputAdapter - SortMergeJoin [d_date,d_date,item_sk,item_sk] - WholeStageCodegen - Sort [d_date,item_sk] + SortMergeJoin [item_sk,d_date,item_sk,d_date] + WholeStageCodegen (6) + Sort [item_sk,d_date] InputAdapter - Exchange [d_date,item_sk] #2 - WholeStageCodegen - Project [cume_sales,d_date,item_sk] + Exchange [item_sk,d_date] #2 + WholeStageCodegen (5) + Project [item_sk,d_date,cume_sales] InputAdapter - Window [_w0,d_date,ws_item_sk] - WholeStageCodegen - Sort [d_date,ws_item_sk] + Window [_w0,ws_item_sk,d_date] + WholeStageCodegen (4) + Sort [ws_item_sk,d_date] InputAdapter Exchange [ws_item_sk] #3 - WholeStageCodegen - HashAggregate [d_date,sum,sum(UnscaledValue(ws_sales_price)),ws_item_sk] [_w0,item_sk,sum,sum(UnscaledValue(ws_sales_price))] + WholeStageCodegen (3) + HashAggregate [ws_item_sk,d_date,sum] [sum(UnscaledValue(ws_sales_price)),item_sk,_w0,sum] InputAdapter - Exchange [d_date,ws_item_sk] #4 - WholeStageCodegen - HashAggregate [d_date,sum,sum,ws_item_sk,ws_sales_price] [sum,sum] - Project [d_date,ws_item_sk,ws_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_item_sk,ws_sales_price,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk] [ws_item_sk,ws_sales_price,ws_sold_date_sk] + Exchange [ws_item_sk,d_date] #4 + WholeStageCodegen (2) + HashAggregate [ws_item_sk,d_date,ws_sales_price] [sum,sum] + Project [ws_item_sk,ws_sales_price,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_sales_price] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] - WholeStageCodegen - Sort [d_date,item_sk] + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] + WholeStageCodegen (12) + Sort [item_sk,d_date] InputAdapter - Exchange [d_date,item_sk] #6 - WholeStageCodegen - Project [cume_sales,d_date,item_sk] + Exchange [item_sk,d_date] #6 + WholeStageCodegen (11) + Project [item_sk,d_date,cume_sales] InputAdapter - Window [_w0,d_date,ss_item_sk] - WholeStageCodegen - Sort [d_date,ss_item_sk] + Window [_w0,ss_item_sk,d_date] + WholeStageCodegen (10) + Sort [ss_item_sk,d_date] InputAdapter Exchange [ss_item_sk] #7 - WholeStageCodegen - HashAggregate [d_date,ss_item_sk,sum,sum(UnscaledValue(ss_sales_price))] [_w0,item_sk,sum,sum(UnscaledValue(ss_sales_price))] + WholeStageCodegen (9) + HashAggregate [ss_item_sk,d_date,sum] [sum(UnscaledValue(ss_sales_price)),item_sk,_w0,sum] InputAdapter - Exchange [d_date,ss_item_sk] #8 - WholeStageCodegen - HashAggregate [d_date,ss_item_sk,ss_sales_price,sum,sum] [sum,sum] - Project [d_date,ss_item_sk,ss_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk] + Exchange [ss_item_sk,d_date] #8 + WholeStageCodegen (8) + HashAggregate [ss_item_sk,d_date,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_sales_price,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_sales_price] InputAdapter - ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #5 + ReusedExchange [d_date_sk,d_date] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt index 59da37fb3..47235253f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/explain.txt @@ -1,20 +1,122 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[d_year#1 ASC NULLS FIRST,ext_price#2 DESC NULLS LAST,brand_id#3 ASC NULLS FIRST], output=[d_year#1,brand_id#3,brand#4,ext_price#2]) -+- *(4) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[sum(UnscaledValue(ss_ext_sales_price#7))]) - +- Exchange hashpartitioning(d_year#1, i_brand#5, i_brand_id#6, 5) - +- *(3) HashAggregate(keys=[d_year#1, i_brand#5, i_brand_id#6], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#7))]) - +- *(3) Project [d_year#1, ss_ext_sales_price#7, i_brand_id#6, i_brand#5] - +- *(3) BroadcastHashJoin [ss_item_sk#8], [i_item_sk#9], Inner, BuildRight - :- *(3) Project [d_year#1, ss_item_sk#8, ss_ext_sales_price#7] - : +- *(3) BroadcastHashJoin [d_date_sk#10], [ss_sold_date_sk#11], Inner, BuildRight - : :- *(3) Project [d_date_sk#10, d_year#1] - : : +- *(3) Filter ((((isnotnull(d_moy#12) && isnotnull(d_year#1)) && (d_moy#12 = 11)) && (d_year#1 = 2000)) && isnotnull(d_date_sk#10)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#10,d_year#1,d_moy#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [ss_sold_date_sk#11, ss_item_sk#8, ss_ext_sales_price#7] - : +- *(1) Filter (isnotnull(ss_sold_date_sk#11) && isnotnull(ss_item_sk#8)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#8,ss_ext_sales_price#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [i_item_sk#9, i_brand_id#6, i_brand#5] - +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 1)) && isnotnull(i_item_sk#9)) - +- *(2) FileScan parquet default.item[i_item_sk#9,i_brand_id#6,i_brand#5,i_manager_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [2]: [d_date_sk#1, d_year#2] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] +Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_brand_id#9, i_brand#10] + +(18) HashAggregate [codegen id : 3] +Input [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] + +(19) Exchange +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] +Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS ext_price#19] + +(21) TakeOrderedAndProject +Input [4]: [d_year#2, brand_id#17, brand#18, ext_price#19] +Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, ext_price#19] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt index 583e5acae..ba48ad5b6 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q52/simplified.txt @@ -1,26 +1,31 @@ -TakeOrderedAndProject [brand,brand_id,d_year,ext_price] - WholeStageCodegen - HashAggregate [d_year,i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] +TakeOrderedAndProject [d_year,ext_price,brand_id,brand] + WholeStageCodegen (4) + HashAggregate [d_year,i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] InputAdapter Exchange [d_year,i_brand,i_brand_id] #1 - WholeStageCodegen - HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [d_year,i_brand,i_brand_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [d_year,ss_ext_sales_price,ss_item_sk] + WholeStageCodegen (3) + HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [d_year,ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [d_year,ss_item_sk,ss_ext_sales_price] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk,d_year] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + WholeStageCodegen (1) + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_brand_id,i_item_sk] - Filter [i_item_sk,i_manager_id] - Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt index eaf2bf3ee..249f74e73 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/explain.txt @@ -1,31 +1,180 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[avg_quarterly_sales#1 ASC NULLS FIRST,sum_sales#2 ASC NULLS FIRST,i_manufact_id#3 ASC NULLS FIRST], output=[i_manufact_id#3,sum_sales#2,avg_quarterly_sales#1]) -+- *(7) Project [i_manufact_id#3, sum_sales#2, avg_quarterly_sales#1] - +- *(7) Filter (CASE WHEN (avg_quarterly_sales#1 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#2 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#1 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) - +- Window [avg(_w0#4) windowspecdefinition(i_manufact_id#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#1], [i_manufact_id#3] - +- *(6) Sort [i_manufact_id#3 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_manufact_id#3, 5) - +- *(5) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) - +- Exchange hashpartitioning(i_manufact_id#3, d_qoy#5, 5) - +- *(4) HashAggregate(keys=[i_manufact_id#3, d_qoy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) - +- *(4) Project [i_manufact_id#3, ss_sales_price#6, d_qoy#5] - +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight - :- *(4) Project [i_manufact_id#3, ss_store_sk#7, ss_sales_price#6, d_qoy#5] - : +- *(4) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight - : :- *(4) Project [i_manufact_id#3, ss_sold_date_sk#9, ss_store_sk#7, ss_sales_price#6] - : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight - : : :- *(4) Project [i_item_sk#11, i_manufact_id#3] - : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,reference,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manufact_id#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] - : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) - : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#10, d_qoy#5] - : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_qoy#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [s_store_sk#8] - +- *(3) Filter isnotnull(s_store_sk#8) - +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.store_sales (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.date_dim (11) + +- BroadcastExchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet default.store (18) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,reference,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] +Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,reference,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [2]: [i_item_sk#1, i_manufact_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] + +(5) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(7) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Condition : ((isnotnull(ss_item_sk#11) AND isnotnull(ss_sold_date_sk#10)) AND isnotnull(ss_store_sk#12)) + +(8) BroadcastExchange +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#14] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#11] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13] +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] +Condition : (d_month_seq#16 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) AND isnotnull(d_date_sk#15)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#15, d_qoy#17] +Input [3]: [d_date_sk#15, d_month_seq#16, d_qoy#17] + +(15) BroadcastExchange +Input [2]: [d_date_sk#15, d_qoy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(17) Project [codegen id : 4] +Output [4]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, d_qoy#17] +Input [6]: [i_manufact_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13, d_date_sk#15, d_qoy#17] + +(18) Scan parquet default.store +Output [1]: [s_store_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [s_store_sk#19] + +(20) Filter [codegen id : 3] +Input [1]: [s_store_sk#19] +Condition : isnotnull(s_store_sk#19) + +(21) BroadcastExchange +Input [1]: [s_store_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#12] +Right keys [1]: [s_store_sk#19] +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#17] +Input [5]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, d_qoy#17, s_store_sk#19] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#17] +Keys [2]: [i_manufact_id#5, d_qoy#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [i_manufact_id#5, d_qoy#17, sum#22] + +(25) Exchange +Input [3]: [i_manufact_id#5, d_qoy#17, sum#22] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#17, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manufact_id#5, d_qoy#17, sum#22] +Keys [2]: [i_manufact_id#5, d_qoy#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#24] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS _w0#26] + +(27) Exchange +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: hashpartitioning(i_manufact_id#5, 5), true, [id=#27] + +(28) Sort [codegen id : 6] +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manufact_id#5, sum_sales#25, _w0#26] +Arguments: [avg(_w0#26) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#28], [i_manufact_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] +Condition : (CASE WHEN (avg_quarterly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_quarterly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(31) Project [codegen id : 7] +Output [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] +Input [4]: [i_manufact_id#5, sum_sales#25, _w0#26, avg_quarterly_sales#28] + +(32) TakeOrderedAndProject +Input [3]: [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] +Arguments: 100, [avg_quarterly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#25, avg_quarterly_sales#28] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt index ace96279f..cd55b60d2 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q53/simplified.txt @@ -1,43 +1,49 @@ -TakeOrderedAndProject [avg_quarterly_sales,i_manufact_id,sum_sales] - WholeStageCodegen - Project [avg_quarterly_sales,i_manufact_id,sum_sales] +TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id] + WholeStageCodegen (7) + Project [i_manufact_id,sum_sales,avg_quarterly_sales] Filter [avg_quarterly_sales,sum_sales] InputAdapter Window [_w0,i_manufact_id] - WholeStageCodegen + WholeStageCodegen (6) Sort [i_manufact_id] InputAdapter Exchange [i_manufact_id] #1 - WholeStageCodegen - HashAggregate [d_qoy,i_manufact_id,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + WholeStageCodegen (5) + HashAggregate [i_manufact_id,d_qoy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] InputAdapter - Exchange [d_qoy,i_manufact_id] #2 - WholeStageCodegen - HashAggregate [d_qoy,i_manufact_id,ss_sales_price,sum,sum] [sum,sum] - Project [d_qoy,i_manufact_id,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_qoy,i_manufact_id,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_manufact_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Exchange [i_manufact_id,d_qoy] #2 + WholeStageCodegen (4) + HashAggregate [i_manufact_id,d_qoy,ss_sales_price] [sum,sum] + Project [i_manufact_id,ss_sales_price,d_qoy] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_manufact_id,ss_store_sk,ss_sales_price,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_manufact_id,ss_sold_date_sk,ss_store_sk,ss_sales_price] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_manufact_id] - Filter [i_brand,i_category,i_class,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manufact_id] [i_brand,i_category,i_class,i_item_sk,i_manufact_id] + Filter [i_category,i_class,i_brand,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk,d_qoy] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] [d_date_sk,d_month_seq,d_qoy] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt index 90bf0d52e..2e93e2605 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/explain.txt @@ -1,88 +1,459 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[segment#1 ASC NULLS FIRST,num_customers#2 ASC NULLS FIRST], output=[segment#1,num_customers#2,segment_base#3]) -+- *(13) HashAggregate(keys=[segment#1], functions=[count(1)]) - +- Exchange hashpartitioning(segment#1, 5) - +- *(12) HashAggregate(keys=[segment#1], functions=[partial_count(1)]) - +- *(12) HashAggregate(keys=[c_customer_sk#4], functions=[sum(UnscaledValue(ss_ext_sales_price#5))]) - +- Exchange hashpartitioning(c_customer_sk#4, 5) - +- *(11) HashAggregate(keys=[c_customer_sk#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#5))]) - +- *(11) Project [c_customer_sk#4, ss_ext_sales_price#5] - +- *(11) BroadcastHashJoin [ss_sold_date_sk#6], [d_date_sk#7], Inner, BuildRight - :- *(11) Project [c_customer_sk#4, ss_sold_date_sk#6, ss_ext_sales_price#5] - : +- *(11) BroadcastHashJoin [ca_county#8, ca_state#9], [s_county#10, s_state#11], Inner, BuildRight - : :- *(11) Project [c_customer_sk#4, ss_sold_date_sk#6, ss_ext_sales_price#5, ca_county#8, ca_state#9] - : : +- *(11) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight - : : :- *(11) Project [c_customer_sk#4, c_current_addr_sk#12, ss_sold_date_sk#6, ss_ext_sales_price#5] - : : : +- *(11) BroadcastHashJoin [c_customer_sk#4], [ss_customer_sk#14], Inner, BuildRight - : : : :- *(11) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) - : : : : +- Exchange hashpartitioning(c_customer_sk#4, c_current_addr_sk#12, 5) - : : : : +- *(6) HashAggregate(keys=[c_customer_sk#4, c_current_addr_sk#12], functions=[]) - : : : : +- *(6) Project [c_customer_sk#4, c_current_addr_sk#12] - : : : : +- *(6) BroadcastHashJoin [customer_sk#15], [c_customer_sk#4], Inner, BuildRight - : : : : :- *(6) Project [customer_sk#15] - : : : : : +- *(6) BroadcastHashJoin [sold_date_sk#16], [d_date_sk#7], Inner, BuildRight - : : : : : :- *(6) Project [sold_date_sk#16, customer_sk#15] - : : : : : : +- *(6) BroadcastHashJoin [item_sk#17], [i_item_sk#18], Inner, BuildRight - : : : : : : :- Union - : : : : : : : :- *(1) Project [cs_sold_date_sk#19 AS sold_date_sk#16, cs_bill_customer_sk#20 AS customer_sk#15, cs_item_sk#21 AS item_sk#17] - : : : : : : : : +- *(1) Filter ((isnotnull(cs_item_sk#21) && isnotnull(cs_sold_date_sk#19)) && isnotnull(cs_bill_customer_sk#20)) - : : : : : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_customer_sk#20,cs_item_sk#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct - : : : : : : : +- *(2) Project [ws_sold_date_sk#22 AS sold_date_sk#23, ws_bill_customer_sk#24 AS customer_sk#25, ws_item_sk#26 AS item_sk#27] - : : : : : : : +- *(2) Filter ((isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#22)) && isnotnull(ws_bill_customer_sk#24)) - : : : : : : : +- *(2) FileScan parquet default.web_sales[ws_sold_date_sk#22,ws_item_sk#26,ws_bill_customer_sk#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct - : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- *(3) Project [i_item_sk#18] - : : : : : : +- *(3) Filter ((((isnotnull(i_category#28) && isnotnull(i_class#29)) && (i_category#28 = Women)) && (i_class#29 = maternity)) && isnotnull(i_item_sk#18)) - : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#18,i_class#29,i_category#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity)..., ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(4) Project [d_date_sk#7] - : : : : : +- *(4) Filter ((((isnotnull(d_moy#30) && isnotnull(d_year#31)) && (d_moy#30 = 12)) && (d_year#31 = 1998)) && isnotnull(d_date_sk#7)) - : : : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#7,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(5) Project [c_customer_sk#4, c_current_addr_sk#12] - : : : : +- *(5) Filter (isnotnull(c_customer_sk#4) && isnotnull(c_current_addr_sk#12)) - : : : : +- *(5) FileScan parquet default.customer[c_customer_sk#4,c_current_addr_sk#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : +- *(7) Project [ss_sold_date_sk#6, ss_customer_sk#14, ss_ext_sales_price#5] - : : : +- *(7) Filter (isnotnull(ss_customer_sk#14) && isnotnull(ss_sold_date_sk#6)) - : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#6,ss_customer_sk#14,ss_ext_sales_price#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(8) Project [ca_address_sk#13, ca_county#8, ca_state#9] - : : +- *(8) Filter ((isnotnull(ca_address_sk#13) && isnotnull(ca_state#9)) && isnotnull(ca_county#8)) - : : +- *(8) FileScan parquet default.customer_address[ca_address_sk#13,ca_county#8,ca_state#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state), IsNotNull(ca_county)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true])) - : +- *(9) Project [s_county#10, s_state#11] - : +- *(9) Filter (isnotnull(s_state#11) && isnotnull(s_county#10)) - : +- *(9) FileScan parquet default.store[s_county#10,s_state#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_state), IsNotNull(s_county)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(10) Project [d_date_sk#7] - +- *(10) Filter (((isnotnull(d_month_seq#32) && (d_month_seq#32 >= Subquery subquery9542)) && (d_month_seq#32 <= Subquery subquery9543)) && isnotnull(d_date_sk#7)) - : :- Subquery subquery9542 - : : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) - : : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) - : : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) - : : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] - : : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - : : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - : +- Subquery subquery9543 - : +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) - : +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) - : +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) - : +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] - : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - +- *(10) FileScan parquet default.date_dim[d_date_sk#7,d_month_seq#32] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct - :- Subquery subquery9542 - : +- *(2) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) - : +- Exchange hashpartitioning((d_month_seq + 1)#33, 5) - : +- *(1) HashAggregate(keys=[(d_month_seq + 1)#33], functions=[]) - : +- *(1) Project [(d_month_seq#32 + 1) AS (d_month_seq + 1)#33] - : +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - : +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct - +- Subquery subquery9543 - +- *(2) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) - +- Exchange hashpartitioning((d_month_seq + 3)#34, 5) - +- *(1) HashAggregate(keys=[(d_month_seq + 3)#34], functions=[]) - +- *(1) Project [(d_month_seq#32 + 3) AS (d_month_seq + 3)#34] - +- *(1) Filter (((isnotnull(d_year#31) && isnotnull(d_moy#30)) && (d_year#31 = 1998)) && (d_moy#30 = 12)) - +- *(1) FileScan parquet default.date_dim[d_month_seq#32,d_year#31,d_moy#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (64) ++- * HashAggregate (63) + +- Exchange (62) + +- * HashAggregate (61) + +- * HashAggregate (60) + +- Exchange (59) + +- * HashAggregate (58) + +- * Project (57) + +- * BroadcastHashJoin Inner BuildRight (56) + :- * Project (50) + : +- * BroadcastHashJoin Inner BuildRight (49) + : :- * Project (44) + : : +- * BroadcastHashJoin Inner BuildRight (43) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * HashAggregate (32) + : : : : +- Exchange (31) + : : : : +- * HashAggregate (30) + : : : : +- * Project (29) + : : : : +- * BroadcastHashJoin Inner BuildRight (28) + : : : : :- * Project (23) + : : : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : : : :- * Project (16) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : : : :- Union (9) + : : : : : : : :- * Project (4) + : : : : : : : : +- * Filter (3) + : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : : : +- * Project (8) + : : : : : : : +- * Filter (7) + : : : : : : : +- * ColumnarToRow (6) + : : : : : : : +- Scan parquet default.web_sales (5) + : : : : : : +- BroadcastExchange (14) + : : : : : : +- * Project (13) + : : : : : : +- * Filter (12) + : : : : : : +- * ColumnarToRow (11) + : : : : : : +- Scan parquet default.item (10) + : : : : : +- BroadcastExchange (21) + : : : : : +- * Project (20) + : : : : : +- * Filter (19) + : : : : : +- * ColumnarToRow (18) + : : : : : +- Scan parquet default.date_dim (17) + : : : : +- BroadcastExchange (27) + : : : : +- * Filter (26) + : : : : +- * ColumnarToRow (25) + : : : : +- Scan parquet default.customer (24) + : : : +- BroadcastExchange (36) + : : : +- * Filter (35) + : : : +- * ColumnarToRow (34) + : : : +- Scan parquet default.store_sales (33) + : : +- BroadcastExchange (42) + : : +- * Filter (41) + : : +- * ColumnarToRow (40) + : : +- Scan parquet default.customer_address (39) + : +- BroadcastExchange (48) + : +- * Filter (47) + : +- * ColumnarToRow (46) + : +- Scan parquet default.store (45) + +- BroadcastExchange (55) + +- * Project (54) + +- * Filter (53) + +- * ColumnarToRow (52) + +- Scan parquet default.date_dim (51) + + +(1) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] + +(3) Filter [codegen id : 1] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] +Condition : ((isnotnull(cs_item_sk#3) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_bill_customer_sk#2)) + +(4) Project [codegen id : 1] +Output [3]: [cs_sold_date_sk#1 AS sold_date_sk#4, cs_bill_customer_sk#2 AS customer_sk#5, cs_item_sk#3 AS item_sk#6] +Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3] + +(5) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 2] +Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] + +(7) Filter [codegen id : 2] +Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] +Condition : ((isnotnull(ws_item_sk#8) AND isnotnull(ws_sold_date_sk#7)) AND isnotnull(ws_bill_customer_sk#9)) + +(8) Project [codegen id : 2] +Output [3]: [ws_sold_date_sk#7 AS sold_date_sk#10, ws_bill_customer_sk#9 AS customer_sk#11, ws_item_sk#8 AS item_sk#12] +Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9] + +(9) Union + +(10) Scan parquet default.item +Output [3]: [i_item_sk#13, i_class#14, i_category#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity), IsNotNull(i_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] + +(12) Filter [codegen id : 3] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] +Condition : ((((isnotnull(i_category#15) AND isnotnull(i_class#14)) AND (i_category#15 = Women)) AND (i_class#14 = maternity)) AND isnotnull(i_item_sk#13)) + +(13) Project [codegen id : 3] +Output [1]: [i_item_sk#13] +Input [3]: [i_item_sk#13, i_class#14, i_category#15] + +(14) BroadcastExchange +Input [1]: [i_item_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [item_sk#6] +Right keys [1]: [i_item_sk#13] +Join condition: None + +(16) Project [codegen id : 6] +Output [2]: [sold_date_sk#4, customer_sk#5] +Input [4]: [sold_date_sk#4, customer_sk#5, item_sk#6, i_item_sk#13] + +(17) Scan parquet default.date_dim +Output [3]: [d_date_sk#17, d_year#18, d_moy#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 4] +Input [3]: [d_date_sk#17, d_year#18, d_moy#19] + +(19) Filter [codegen id : 4] +Input [3]: [d_date_sk#17, d_year#18, d_moy#19] +Condition : ((((isnotnull(d_moy#19) AND isnotnull(d_year#18)) AND (d_moy#19 = 12)) AND (d_year#18 = 1998)) AND isnotnull(d_date_sk#17)) + +(20) Project [codegen id : 4] +Output [1]: [d_date_sk#17] +Input [3]: [d_date_sk#17, d_year#18, d_moy#19] + +(21) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sold_date_sk#4] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(23) Project [codegen id : 6] +Output [1]: [customer_sk#5] +Input [3]: [sold_date_sk#4, customer_sk#5, d_date_sk#17] + +(24) Scan parquet default.customer +Output [2]: [c_customer_sk#21, c_current_addr_sk#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 5] +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] + +(26) Filter [codegen id : 5] +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) + +(27) BroadcastExchange +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [customer_sk#5] +Right keys [1]: [c_customer_sk#21] +Join condition: None + +(29) Project [codegen id : 6] +Output [2]: [c_customer_sk#21, c_current_addr_sk#22] +Input [3]: [customer_sk#5, c_customer_sk#21, c_current_addr_sk#22] + +(30) HashAggregate [codegen id : 6] +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Keys [2]: [c_customer_sk#21, c_current_addr_sk#22] +Functions: [] +Aggregate Attributes: [] +Results [2]: [c_customer_sk#21, c_current_addr_sk#22] + +(31) Exchange +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Arguments: hashpartitioning(c_customer_sk#21, c_current_addr_sk#22, 5), true, [id=#24] + +(32) HashAggregate [codegen id : 11] +Input [2]: [c_customer_sk#21, c_current_addr_sk#22] +Keys [2]: [c_customer_sk#21, c_current_addr_sk#22] +Functions: [] +Aggregate Attributes: [] +Results [2]: [c_customer_sk#21, c_current_addr_sk#22] + +(33) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] + +(35) Filter [codegen id : 7] +Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +Condition : (isnotnull(ss_customer_sk#26) AND isnotnull(ss_sold_date_sk#25)) + +(36) BroadcastExchange +Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#28] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_customer_sk#21] +Right keys [1]: [ss_customer_sk#26] +Join condition: None + +(38) Project [codegen id : 11] +Output [4]: [c_customer_sk#21, c_current_addr_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27] +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27] + +(39) Scan parquet default.customer_address +Output [3]: [ca_address_sk#29, ca_county#30, ca_state#31] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 8] +Input [3]: [ca_address_sk#29, ca_county#30, ca_state#31] + +(41) Filter [codegen id : 8] +Input [3]: [ca_address_sk#29, ca_county#30, ca_state#31] +Condition : ((isnotnull(ca_address_sk#29) AND isnotnull(ca_county#30)) AND isnotnull(ca_state#31)) + +(42) BroadcastExchange +Input [3]: [ca_address_sk#29, ca_county#30, ca_state#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#22] +Right keys [1]: [ca_address_sk#29] +Join condition: None + +(44) Project [codegen id : 11] +Output [5]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#30, ca_state#31] +Input [7]: [c_customer_sk#21, c_current_addr_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_address_sk#29, ca_county#30, ca_state#31] + +(45) Scan parquet default.store +Output [2]: [s_county#33, s_state#34] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 9] +Input [2]: [s_county#33, s_state#34] + +(47) Filter [codegen id : 9] +Input [2]: [s_county#33, s_state#34] +Condition : (isnotnull(s_county#33) AND isnotnull(s_state#34)) + +(48) BroadcastExchange +Input [2]: [s_county#33, s_state#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#35] + +(49) BroadcastHashJoin [codegen id : 11] +Left keys [2]: [ca_county#30, ca_state#31] +Right keys [2]: [s_county#33, s_state#34] +Join condition: None + +(50) Project [codegen id : 11] +Output [3]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27] +Input [7]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#30, ca_state#31, s_county#33, s_state#34] + +(51) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_month_seq#36] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 10] +Input [2]: [d_date_sk#17, d_month_seq#36] + +(53) Filter [codegen id : 10] +Input [2]: [d_date_sk#17, d_month_seq#36] +Condition : (((isnotnull(d_month_seq#36) AND (d_month_seq#36 >= Subquery scalar-subquery#37, [id=#38])) AND (d_month_seq#36 <= Subquery scalar-subquery#39, [id=#40])) AND isnotnull(d_date_sk#17)) + +(54) Project [codegen id : 10] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_month_seq#36] + +(55) BroadcastExchange +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#41] + +(56) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#25] +Right keys [1]: [d_date_sk#17] +Join condition: None + +(57) Project [codegen id : 11] +Output [2]: [c_customer_sk#21, ss_ext_sales_price#27] +Input [4]: [c_customer_sk#21, ss_sold_date_sk#25, ss_ext_sales_price#27, d_date_sk#17] + +(58) HashAggregate [codegen id : 11] +Input [2]: [c_customer_sk#21, ss_ext_sales_price#27] +Keys [1]: [c_customer_sk#21] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#27))] +Aggregate Attributes [1]: [sum#42] +Results [2]: [c_customer_sk#21, sum#43] + +(59) Exchange +Input [2]: [c_customer_sk#21, sum#43] +Arguments: hashpartitioning(c_customer_sk#21, 5), true, [id=#44] + +(60) HashAggregate [codegen id : 12] +Input [2]: [c_customer_sk#21, sum#43] +Keys [1]: [c_customer_sk#21] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#27))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#27))#45] +Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#27))#45,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#46] + +(61) HashAggregate [codegen id : 12] +Input [1]: [segment#46] +Keys [1]: [segment#46] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#47] +Results [2]: [segment#46, count#48] + +(62) Exchange +Input [2]: [segment#46, count#48] +Arguments: hashpartitioning(segment#46, 5), true, [id=#49] + +(63) HashAggregate [codegen id : 13] +Input [2]: [segment#46, count#48] +Keys [1]: [segment#46] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#50] +Results [3]: [segment#46, count(1)#50 AS num_customers#51, (segment#46 * 50) AS segment_base#52] + +(64) TakeOrderedAndProject +Input [3]: [segment#46, num_customers#51, segment_base#52] +Arguments: 100, [segment#46 ASC NULLS FIRST, num_customers#51 ASC NULLS FIRST], [segment#46, num_customers#51, segment_base#52] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#37, [id=#38] +* HashAggregate (71) ++- Exchange (70) + +- * HashAggregate (69) + +- * Project (68) + +- * Filter (67) + +- * ColumnarToRow (66) + +- Scan parquet default.date_dim (65) + + +(65) Scan parquet default.date_dim +Output [3]: [d_month_seq#36, d_year#18, d_moy#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(66) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] + +(67) Filter [codegen id : 1] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] +Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) + +(68) Project [codegen id : 1] +Output [1]: [(d_month_seq#36 + 1) AS (d_month_seq + 1)#53] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] + +(69) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 1)#53] +Keys [1]: [(d_month_seq + 1)#53] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 1)#53] + +(70) Exchange +Input [1]: [(d_month_seq + 1)#53] +Arguments: hashpartitioning((d_month_seq + 1)#53, 5), true, [id=#54] + +(71) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 1)#53] +Keys [1]: [(d_month_seq + 1)#53] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 1)#53] + +Subquery:2 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#39, [id=#40] +* HashAggregate (78) ++- Exchange (77) + +- * HashAggregate (76) + +- * Project (75) + +- * Filter (74) + +- * ColumnarToRow (73) + +- Scan parquet default.date_dim (72) + + +(72) Scan parquet default.date_dim +Output [3]: [d_month_seq#36, d_year#18, d_moy#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] +ReadSchema: struct + +(73) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] + +(74) Filter [codegen id : 1] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] +Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12)) + +(75) Project [codegen id : 1] +Output [1]: [(d_month_seq#36 + 3) AS (d_month_seq + 3)#55] +Input [3]: [d_month_seq#36, d_year#18, d_moy#19] + +(76) HashAggregate [codegen id : 1] +Input [1]: [(d_month_seq + 3)#55] +Keys [1]: [(d_month_seq + 3)#55] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 3)#55] + +(77) Exchange +Input [1]: [(d_month_seq + 3)#55] +Arguments: hashpartitioning((d_month_seq + 3)#55, 5), true, [id=#56] + +(78) HashAggregate [codegen id : 2] +Input [1]: [(d_month_seq + 3)#55] +Keys [1]: [(d_month_seq + 3)#55] +Functions: [] +Aggregate Attributes: [] +Results [1]: [(d_month_seq + 3)#55] + + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt index 4abf2165f..dd92c52db 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q54/simplified.txt @@ -1,123 +1,121 @@ -TakeOrderedAndProject [num_customers,segment,segment_base] - WholeStageCodegen - HashAggregate [count,count(1),segment] [count,count(1),num_customers,segment_base] +TakeOrderedAndProject [segment,num_customers,segment_base] + WholeStageCodegen (13) + HashAggregate [segment,count] [count(1),num_customers,segment_base,count] InputAdapter Exchange [segment] #1 - WholeStageCodegen - HashAggregate [count,count,segment] [count,count] - HashAggregate [c_customer_sk,sum,sum(UnscaledValue(ss_ext_sales_price))] [segment,sum,sum(UnscaledValue(ss_ext_sales_price))] + WholeStageCodegen (12) + HashAggregate [segment] [count,count] + HashAggregate [c_customer_sk,sum] [sum(UnscaledValue(ss_ext_sales_price)),segment,sum] InputAdapter Exchange [c_customer_sk] #2 - WholeStageCodegen - HashAggregate [c_customer_sk,ss_ext_sales_price,sum,sum] [sum,sum] + WholeStageCodegen (11) + HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum] Project [c_customer_sk,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price] BroadcastHashJoin [ca_county,ca_state,s_county,s_state] - Project [c_customer_sk,ca_county,ca_state,ss_ext_sales_price,ss_sold_date_sk] + Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ca_county,ca_state] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + Project [c_customer_sk,c_current_addr_sk,ss_sold_date_sk,ss_ext_sales_price] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - HashAggregate [c_current_addr_sk,c_customer_sk] + HashAggregate [c_customer_sk,c_current_addr_sk] InputAdapter - Exchange [c_current_addr_sk,c_customer_sk] #3 - WholeStageCodegen - HashAggregate [c_current_addr_sk,c_customer_sk] - Project [c_current_addr_sk,c_customer_sk] - BroadcastHashJoin [c_customer_sk,customer_sk] + Exchange [c_customer_sk,c_current_addr_sk] #3 + WholeStageCodegen (6) + HashAggregate [c_customer_sk,c_current_addr_sk] + Project [c_customer_sk,c_current_addr_sk] + BroadcastHashJoin [customer_sk,c_customer_sk] Project [customer_sk] - BroadcastHashJoin [d_date_sk,sold_date_sk] - Project [customer_sk,sold_date_sk] - BroadcastHashJoin [i_item_sk,item_sk] + BroadcastHashJoin [sold_date_sk,d_date_sk] + Project [sold_date_sk,customer_sk] + BroadcastHashJoin [item_sk,i_item_sk] InputAdapter Union - WholeStageCodegen - Project [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - WholeStageCodegen - Project [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] + WholeStageCodegen (1) + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + Filter [cs_item_sk,cs_sold_date_sk,cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + WholeStageCodegen (2) + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk] + Filter [ws_item_sk,ws_sold_date_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [i_item_sk] Filter [i_category,i_class,i_item_sk] - Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_class,i_category] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] + WholeStageCodegen (5) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk] + WholeStageCodegen (7) + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #8 - WholeStageCodegen - Project [ca_address_sk,ca_county,ca_state] - Filter [ca_address_sk,ca_county,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] [ca_address_sk,ca_county,ca_state] + WholeStageCodegen (8) + Filter [ca_address_sk,ca_county,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state] InputAdapter BroadcastExchange #9 - WholeStageCodegen - Project [s_county,s_state] - Filter [s_county,s_state] - Scan parquet default.store [s_county,s_state] [s_county,s_state] + WholeStageCodegen (9) + Filter [s_county,s_state] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_county,s_state] InputAdapter BroadcastExchange #10 - WholeStageCodegen + WholeStageCodegen (10) Project [d_date_sk] - Filter [d_date_sk,d_month_seq] + Filter [d_month_seq,d_date_sk] Subquery #1 - WholeStageCodegen + WholeStageCodegen (2) HashAggregate [(d_month_seq + 1)] InputAdapter Exchange [(d_month_seq + 1)] #11 - WholeStageCodegen + WholeStageCodegen (1) HashAggregate [(d_month_seq + 1)] Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] Subquery #2 - WholeStageCodegen + WholeStageCodegen (2) HashAggregate [(d_month_seq + 3)] InputAdapter Exchange [(d_month_seq + 3)] #12 - WholeStageCodegen + WholeStageCodegen (1) HashAggregate [(d_month_seq + 3)] Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] - Subquery #1 - WholeStageCodegen - HashAggregate [(d_month_seq + 1)] - InputAdapter - Exchange [(d_month_seq + 1)] #11 - WholeStageCodegen - HashAggregate [(d_month_seq + 1)] - Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] - Subquery #2 - WholeStageCodegen - HashAggregate [(d_month_seq + 3)] - InputAdapter - Exchange [(d_month_seq + 3)] #12 - WholeStageCodegen - HashAggregate [(d_month_seq + 3)] - Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt index c950727b3..99c79d204 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/explain.txt @@ -1,20 +1,122 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ext_price#1 DESC NULLS LAST,brand_id#2 ASC NULLS FIRST], output=[brand_id#2,brand#3,ext_price#1]) -+- *(4) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[sum(UnscaledValue(ss_ext_sales_price#6))]) - +- Exchange hashpartitioning(i_brand#4, i_brand_id#5, 5) - +- *(3) HashAggregate(keys=[i_brand#4, i_brand_id#5], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#6))]) - +- *(3) Project [ss_ext_sales_price#6, i_brand_id#5, i_brand#4] - +- *(3) BroadcastHashJoin [ss_item_sk#7], [i_item_sk#8], Inner, BuildRight - :- *(3) Project [ss_item_sk#7, ss_ext_sales_price#6] - : +- *(3) BroadcastHashJoin [d_date_sk#9], [ss_sold_date_sk#10], Inner, BuildRight - : :- *(3) Project [d_date_sk#9] - : : +- *(3) Filter ((((isnotnull(d_moy#11) && isnotnull(d_year#12)) && (d_moy#11 = 11)) && (d_year#12 = 1999)) && isnotnull(d_date_sk#9)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#9,d_year#12,d_moy#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [ss_sold_date_sk#10, ss_item_sk#7, ss_ext_sales_price#6] - : +- *(1) Filter (isnotnull(ss_sold_date_sk#10) && isnotnull(ss_item_sk#7)) - : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#10,ss_item_sk#7,ss_ext_sales_price#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [i_item_sk#8, i_brand_id#5, i_brand#4] - +- *(2) Filter ((isnotnull(i_manager_id#13) && (i_manager_id#13 = 28)) && isnotnull(i_item_sk#8)) - +- *(2) FileScan parquet default.item[i_item_sk#8,i_brand_id#5,i_brand#4,i_manager_id#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (21) ++- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Project (17) + +- * BroadcastHashJoin Inner BuildRight (16) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.date_dim (1) + : +- BroadcastExchange (8) + : +- * Filter (7) + : +- * ColumnarToRow (6) + : +- Scan parquet default.store_sales (5) + +- BroadcastExchange (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.date_dim +Output [3]: [d_date_sk#1, d_year#2, d_moy#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(3) Filter [codegen id : 3] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] +Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND isnotnull(d_date_sk#1)) + +(4) Project [codegen id : 3] +Output [1]: [d_date_sk#1] +Input [3]: [d_date_sk#1, d_year#2, d_moy#3] + +(5) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(7) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Condition : (isnotnull(ss_sold_date_sk#4) AND isnotnull(ss_item_sk#5)) + +(8) BroadcastExchange +Input [3]: [ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date_sk#1] +Right keys [1]: [ss_sold_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ss_item_sk#5, ss_ext_sales_price#6] +Input [4]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_ext_sales_price#6] + +(11) Scan parquet default.item +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(13) Filter [codegen id : 2] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 28)) AND isnotnull(i_item_sk#8)) + +(14) Project [codegen id : 2] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] + +(15) BroadcastExchange +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#8] +Join condition: None + +(17) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Input [5]: [ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#8, i_brand_id#9, i_brand#10] + +(18) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#6, i_brand_id#9, i_brand#10] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#13] +Results [3]: [i_brand#10, i_brand_id#9, sum#14] + +(19) Exchange +Input [3]: [i_brand#10, i_brand_id#9, sum#14] +Arguments: hashpartitioning(i_brand#10, i_brand_id#9, 5), true, [id=#15] + +(20) HashAggregate [codegen id : 4] +Input [3]: [i_brand#10, i_brand_id#9, sum#14] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#16] +Results [3]: [i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#16,17,2) AS ext_price#19] + +(21) TakeOrderedAndProject +Input [3]: [brand_id#17, brand#18, ext_price#19] +Arguments: 100, [ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [brand_id#17, brand#18, ext_price#19] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt index 478e4a54b..bb2ef8e82 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q55/simplified.txt @@ -1,26 +1,31 @@ -TakeOrderedAndProject [brand,brand_id,ext_price] - WholeStageCodegen - HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ss_ext_sales_price))] +TakeOrderedAndProject [ext_price,brand_id,brand] + WholeStageCodegen (4) + HashAggregate [i_brand,i_brand_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),brand_id,brand,ext_price,sum] InputAdapter Exchange [i_brand,i_brand_id] #1 - WholeStageCodegen - HashAggregate [i_brand,i_brand_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_brand,i_brand_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk] + WholeStageCodegen (3) + HashAggregate [i_brand,i_brand_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_brand_id,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] BroadcastHashJoin [d_date_sk,ss_sold_date_sk] Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + WholeStageCodegen (1) + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_brand_id,i_item_sk] - Filter [i_item_sk,i_manager_id] - Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] + WholeStageCodegen (2) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt index dd20ce6d4..d0d647211 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/explain.txt @@ -1,65 +1,378 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[total_sales#1 ASC NULLS FIRST], output=[i_item_id#2,total_sales#1]) -+- *(20) HashAggregate(keys=[i_item_id#2], functions=[sum(total_sales#3)]) - +- Exchange hashpartitioning(i_item_id#2, 5) - +- *(19) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(total_sales#3)]) - +- Union - :- *(6) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- Exchange hashpartitioning(i_item_id#2, 5) - : +- *(5) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- *(5) Project [ss_ext_sales_price#4, i_item_id#2] - : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] - : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight - : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#10] - : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 2001)) && (d_moy#12 = 2)) && isnotnull(d_date_sk#10)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [ca_address_sk#8] - : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) BroadcastHashJoin [i_item_id#2], [i_item_id#2#14], LeftSemi, BuildRight - : :- *(4) Project [i_item_sk#6, i_item_id#2] - : : +- *(4) Filter isnotnull(i_item_sk#6) - : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- *(3) Project [i_item_id#2 AS i_item_id#2#14] - : +- *(3) Filter i_color#15 IN (slate,blanched,burnished) - : +- *(3) FileScan parquet default.item[i_item_id#2,i_color#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [In(i_color, [slate,blanched,burnished])], ReadSchema: struct - :- *(12) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- Exchange hashpartitioning(i_item_id#2, 5) - : +- *(11) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- *(11) Project [cs_ext_sales_price#16, i_item_id#2] - : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight - : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] - : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight - : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight - : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(18) HashAggregate(keys=[i_item_id#2], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) - +- Exchange hashpartitioning(i_item_id#2, 5) - +- *(17) HashAggregate(keys=[i_item_id#2], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) - +- *(17) Project [ws_ext_sales_price#20, i_item_id#2] - +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight - :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] - : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight - : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight - : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) - : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [i_item_sk#6, i_item_id#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 2001)) AND (d_moy#7 = 2)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_gmt_offset#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] +Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#9] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] +Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_id#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] +Condition : isnotnull(i_item_sk#12) + +(21) Scan parquet default.item +Output [2]: [i_item_id#13, i_color#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_color, [slate,blanched,burnished])] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_id#13, i_color#14] + +(23) Filter [codegen id : 3] +Input [2]: [i_item_id#13, i_color#14] +Condition : i_color#14 IN (slate,blanched,burnished) + +(24) Project [codegen id : 3] +Output [1]: [i_item_id#13 AS i_item_id#13#15] +Input [2]: [i_item_id#13, i_color#14] + +(25) BroadcastExchange +Input [1]: [i_item_id#13#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#16] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_id#13] +Right keys [1]: [i_item_id#13#15] +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#4, i_item_id#13] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_item_id#13] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#4, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [2]: [i_item_id#13, sum#19] + +(31) Exchange +Input [2]: [i_item_id#13, sum#19] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#13, sum#19] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] + +(33) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] + +(35) Filter [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#23] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#24] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] +Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#25] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#26, i_item_id#13] +Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_item_id#13] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#26, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_item_id#13, sum#28] + +(46) Exchange +Input [2]: [i_item_id#13, sum#28] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_item_id#13, sum#28] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] + +(48) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] + +(50) Filter [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#34] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] +Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#33] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#35, i_item_id#13] +Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_item_id#13] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#35, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum#36] +Results [2]: [i_item_id#13, sum#37] + +(61) Exchange +Input [2]: [i_item_id#13, sum#37] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#38] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#13, sum#37] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_item_id#13, total_sales#22] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(total_sales#22)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_item_id#13, sum#43, isEmpty#44] + +(65) Exchange +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#45] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(total_sales#22)] +Aggregate Attributes [1]: [sum(total_sales#22)#46] +Results [2]: [i_item_id#13, sum(total_sales#22)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_item_id#13, total_sales#47] +Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_item_id#13, total_sales#47] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt index 0433f198d..5825c6f6e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q56/simplified.txt @@ -1,91 +1,101 @@ -TakeOrderedAndProject [i_item_id,total_sales] - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] +TakeOrderedAndProject [total_sales,i_item_id] + WholeStageCodegen (20) + HashAggregate [i_item_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,total_sales] [sum,sum] + WholeStageCodegen (19) + HashAggregate [i_item_id,total_sales] [sum,isEmpty,sum,isEmpty] InputAdapter Union - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] InputAdapter Exchange [i_item_id] #2 - WholeStageCodegen - HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_item_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [ca_address_sk] - Filter [ca_address_sk,ca_gmt_offset] - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) BroadcastHashJoin [i_item_id,i_item_id] - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (3) Project [i_item_id] Filter [i_color] - Scan parquet default.item [i_color,i_item_id] [i_color,i_item_id] - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_color] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] InputAdapter Exchange [i_item_id] #7 - WholeStageCodegen - HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] + WholeStageCodegen (11) + HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] Project [cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk] - BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 + ReusedExchange [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] + ReusedExchange [i_item_sk,i_item_id] #5 + WholeStageCodegen (18) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] InputAdapter Exchange [i_item_id] #8 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] - Project [i_item_id,ws_ext_sales_price] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + WholeStageCodegen (17) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 + ReusedExchange [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 + ReusedExchange [i_item_sk,i_item_id] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt index 8092d1687..4e1123185 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/explain.txt @@ -1,51 +1,278 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,cc_name#3 ASC NULLS FIRST], output=[i_category#4,i_brand#5,cc_name#3,d_year#6,d_moy#7,avg_monthly_sales#2,sum_sales#1,psum#8,nsum#9]) -+- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, avg_monthly_sales#2, sum_sales#1, sum_sales#10 AS psum#8, sum_sales#11 AS nsum#9] - +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, cc_name#3, rn#12], [i_category#13, i_brand#14, cc_name#15, (rn#16 - 1)], Inner, BuildRight - :- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, sum_sales#1, avg_monthly_sales#2, rn#12, sum_sales#10] - : +- *(22) BroadcastHashJoin [i_category#4, i_brand#5, cc_name#3, rn#12], [i_category#17, i_brand#18, cc_name#19, (rn#20 + 1)], Inner, BuildRight - : :- *(22) Project [i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, sum_sales#1, avg_monthly_sales#2, rn#12] - : : +- *(22) Filter (((isnotnull(avg_monthly_sales#2) && (avg_monthly_sales#2 > 0.000000)) && (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000)) && isnotnull(rn#12)) - : : +- Window [avg(_w0#21) windowspecdefinition(i_category#4, i_brand#5, cc_name#3, d_year#6, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#5, cc_name#3, d_year#6] - : : +- *(7) Filter (isnotnull(d_year#6) && (d_year#6 = 1999)) - : : +- Window [rank(d_year#6, d_moy#7) windowspecdefinition(i_category#4, i_brand#5, cc_name#3, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#12], [i_category#4, i_brand#5, cc_name#3], [d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST] - : : +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#5 ASC NULLS FIRST, cc_name#3 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST], false, 0 - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, 5) - : : +- *(5) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[sum(UnscaledValue(cs_sales_price#22))]) - : : +- Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 5) - : : +- *(4) HashAggregate(keys=[i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7], functions=[partial_sum(UnscaledValue(cs_sales_price#22))]) - : : +- *(4) Project [i_brand#5, i_category#4, cs_sales_price#22, d_year#6, d_moy#7, cc_name#3] - : : +- *(4) BroadcastHashJoin [cs_call_center_sk#23], [cc_call_center_sk#24], Inner, BuildRight - : : :- *(4) Project [i_brand#5, i_category#4, cs_call_center_sk#23, cs_sales_price#22, d_year#6, d_moy#7] - : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#25], [d_date_sk#26], Inner, BuildRight - : : : :- *(4) Project [i_brand#5, i_category#4, cs_sold_date_sk#25, cs_call_center_sk#23, cs_sales_price#22] - : : : : +- *(4) BroadcastHashJoin [i_item_sk#27], [cs_item_sk#28], Inner, BuildRight - : : : : :- *(4) Project [i_item_sk#27, i_brand#5, i_category#4] - : : : : : +- *(4) Filter ((isnotnull(i_item_sk#27) && isnotnull(i_brand#5)) && isnotnull(i_category#4)) - : : : : : +- *(4) FileScan parquet default.item[i_item_sk#27,i_brand#5,i_category#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand), IsNotNull(i_category)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) - : : : : +- *(1) Project [cs_sold_date_sk#25, cs_call_center_sk#23, cs_item_sk#28, cs_sales_price#22] - : : : : +- *(1) Filter ((isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#25)) && isnotnull(cs_call_center_sk#23)) - : : : : +- *(1) FileScan parquet default.catalog_sales[cs_sold_date_sk#25,cs_call_center_sk#23,cs_item_sk#28,cs_sales_price#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [d_date_sk#26, d_year#6, d_moy#7] - : : : +- *(2) Filter ((((d_year#6 = 1999) || ((d_year#6 = 1998) && (d_moy#7 = 12))) || ((d_year#6 = 2000) && (d_moy#7 = 1))) && isnotnull(d_date_sk#26)) - : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#26,d_year#6,d_moy#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000)..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [cc_call_center_sk#24, cc_name#3] - : : +- *(3) Filter (isnotnull(cc_call_center_sk#24) && isnotnull(cc_name#3)) - : : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#24,cc_name#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] + 1))) - : +- *(14) Project [i_category#17, i_brand#18, cc_name#19, sum_sales#10, rn#20] - : +- *(14) Filter isnotnull(rn#20) - : +- Window [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#17, i_brand#18, cc_name#19, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#20], [i_category#17, i_brand#18, cc_name#19], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] - : +- *(13) Sort [i_category#17 ASC NULLS FIRST, i_brand#18 ASC NULLS FIRST, cc_name#19 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 5) - : +- *(12) HashAggregate(keys=[i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30], functions=[sum(UnscaledValue(cs_sales_price#22))]) - : +- ReusedExchange [i_category#17, i_brand#18, cc_name#19, d_year#29, d_moy#30, sum#31], Exchange hashpartitioning(i_category#4, i_brand#5, cc_name#3, d_year#6, d_moy#7, 5) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] - 1))) - +- *(21) Project [i_category#13, i_brand#14, cc_name#15, sum_sales#11, rn#16] - +- *(21) Filter isnotnull(rn#16) - +- Window [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#13, i_brand#14, cc_name#15, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#16], [i_category#13, i_brand#14, cc_name#15], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] - +- *(20) Sort [i_category#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#15 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 - +- ReusedExchange [i_category#13, i_brand#14, cc_name#15, d_year#32, d_moy#33, sum_sales#11], Exchange hashpartitioning(i_category#17, i_brand#18, cc_name#19, 5) \ No newline at end of file +TakeOrderedAndProject (51) ++- * Project (50) + +- * BroadcastHashJoin Inner BuildRight (49) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Filter (32) + : : +- Window (31) + : : +- * Sort (30) + : : +- Exchange (29) + : : +- * Project (28) + : : +- Window (27) + : : +- * Sort (26) + : : +- Exchange (25) + : : +- * HashAggregate (24) + : : +- Exchange (23) + : : +- * HashAggregate (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.item (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.catalog_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (19) + : : +- * Filter (18) + : : +- * ColumnarToRow (17) + : : +- Scan parquet default.call_center (16) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- Window (37) + : +- * Sort (36) + : +- Exchange (35) + : +- * HashAggregate (34) + : +- ReusedExchange (33) + +- BroadcastExchange (48) + +- * Project (47) + +- * Filter (46) + +- Window (45) + +- * Sort (44) + +- ReusedExchange (43) + + +(1) Scan parquet default.item +Output [3]: [i_item_sk#1, i_brand#2, i_category#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] + +(3) Filter [codegen id : 4] +Input [3]: [i_item_sk#1, i_brand#2, i_category#3] +Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2)) + +(4) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_call_center_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] + +(6) Filter [codegen id : 1] +Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] +Condition : ((isnotnull(cs_item_sk#6) AND isnotnull(cs_sold_date_sk#4)) AND isnotnull(cs_call_center_sk#5)) + +(7) BroadcastExchange +Input [4]: [cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [cs_item_sk#6] +Join condition: None + +(9) Project [codegen id : 4] +Output [5]: [i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_sales_price#7] +Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_item_sk#6, cs_sales_price#7] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#4] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#5, cs_sales_price#7, d_year#10, d_moy#11] +Input [8]: [i_brand#2, i_category#3, cs_sold_date_sk#4, cs_call_center_sk#5, cs_sales_price#7, d_date_sk#9, d_year#10, d_moy#11] + +(16) Scan parquet default.call_center +Output [2]: [cc_call_center_sk#13, cc_name#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#13, cc_name#14] + +(18) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#13, cc_name#14] +Condition : (isnotnull(cc_call_center_sk#13) AND isnotnull(cc_name#14)) + +(19) BroadcastExchange +Input [2]: [cc_call_center_sk#13, cc_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_call_center_sk#5] +Right keys [1]: [cc_call_center_sk#13] +Join condition: None + +(21) Project [codegen id : 4] +Output [6]: [i_brand#2, i_category#3, cs_sales_price#7, d_year#10, d_moy#11, cc_name#14] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#5, cs_sales_price#7, d_year#10, d_moy#11, cc_call_center_sk#13, cc_name#14] + +(22) HashAggregate [codegen id : 4] +Input [6]: [i_brand#2, i_category#3, cs_sales_price#7, d_year#10, d_moy#11, cc_name#14] +Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [1]: [sum#16] +Results [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] + +(23) Exchange +Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, 5), true, [id=#18] + +(24) HashAggregate [codegen id : 5] +Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum#17] +Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11] +Functions [1]: [sum(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#7))#19] +Results [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#19,17,2) AS sum_sales#20, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#19,17,2) AS _w0#21] + +(25) Exchange +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, d_year#10, 5), true, [id=#22] + +(26) Sort [codegen id : 6] +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], false, 0 + +(27) Window +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21] +Arguments: [avg(_w0#21) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#23], [i_category#3, i_brand#2, cc_name#14, d_year#10] + +(28) Project [codegen id : 7] +Output [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, _w0#21, avg_monthly_sales#23] + +(29) Exchange +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, 5), true, [id=#24] + +(30) Sort [codegen id : 8] +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 + +(31) Window +Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23] +Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#25], [i_category#3, i_brand#2, cc_name#14], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] + +(32) Filter [codegen id : 23] +Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25] +Condition : (((((isnotnull(d_year#10) AND isnotnull(avg_monthly_sales#23)) AND (d_year#10 = 1999)) AND (avg_monthly_sales#23 > 0.000000)) AND (CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000)) AND isnotnull(rn#25)) + +(33) ReusedExchange [Reuses operator id: 23] +Output [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31] + +(34) HashAggregate [codegen id : 13] +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum#31] +Keys [5]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30] +Functions [1]: [sum(UnscaledValue(cs_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#7))#32] +Results [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, MakeDecimal(sum(UnscaledValue(cs_sales_price#7))#32,17,2) AS sum_sales#33] + +(35) Exchange +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] +Arguments: hashpartitioning(i_category#26, i_brand#27, cc_name#28, 5), true, [id=#34] + +(36) Sort [codegen id : 14] +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] +Arguments: [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST], false, 0 + +(37) Window +Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33] +Arguments: [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#26, i_brand#27, cc_name#28, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#26, i_brand#27, cc_name#28], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] + +(38) Filter [codegen id : 15] +Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33, rn#35] +Condition : isnotnull(rn#35) + +(39) Project [codegen id : 15] +Output [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] +Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#33, rn#35] + +(40) BroadcastExchange +Input [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] + 1)),false), [id=#36] + +(41) BroadcastHashJoin [codegen id : 23] +Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#25] +Right keys [4]: [i_category#26, i_brand#27, cc_name#28, (rn#35 + 1)] +Join condition: None + +(42) Project [codegen id : 23] +Output [9]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, sum_sales#33] +Input [13]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, i_category#26, i_brand#27, cc_name#28, sum_sales#33, rn#35] + +(43) ReusedExchange [Reuses operator id: 35] +Output [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] + +(44) Sort [codegen id : 21] +Input [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] +Arguments: [i_category#37 ASC NULLS FIRST, i_brand#38 ASC NULLS FIRST, cc_name#39 ASC NULLS FIRST, d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST], false, 0 + +(45) Window +Input [6]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42] +Arguments: [rank(d_year#40, d_moy#41) windowspecdefinition(i_category#37, i_brand#38, cc_name#39, d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#43], [i_category#37, i_brand#38, cc_name#39], [d_year#40 ASC NULLS FIRST, d_moy#41 ASC NULLS FIRST] + +(46) Filter [codegen id : 22] +Input [7]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42, rn#43] +Condition : isnotnull(rn#43) + +(47) Project [codegen id : 22] +Output [5]: [i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] +Input [7]: [i_category#37, i_brand#38, cc_name#39, d_year#40, d_moy#41, sum_sales#42, rn#43] + +(48) BroadcastExchange +Input [5]: [i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, true] - 1)),false), [id=#44] + +(49) BroadcastHashJoin [codegen id : 23] +Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#25] +Right keys [4]: [i_category#37, i_brand#38, cc_name#39, (rn#43 - 1)] +Join condition: None + +(50) Project [codegen id : 23] +Output [9]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, sum_sales#33 AS psum#45, sum_sales#42 AS nsum#46] +Input [14]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, sum_sales#20, avg_monthly_sales#23, rn#25, sum_sales#33, i_category#37, i_brand#38, cc_name#39, sum_sales#42, rn#43] + +(51) TakeOrderedAndProject +Input [9]: [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, psum#45, nsum#46] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#14, d_year#10, d_moy#11, avg_monthly_sales#23, sum_sales#20, psum#45, nsum#46] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt index 3d71eb1d9..432441bb7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q57/simplified.txt @@ -1,77 +1,84 @@ -TakeOrderedAndProject [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,nsum,psum,sum_sales] - WholeStageCodegen - Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,sum_sales,sum_sales,sum_sales] - BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] - Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales,sum_sales] - BroadcastHashJoin [cc_name,cc_name,i_brand,i_brand,i_category,i_category,rn,rn] - Project [avg_monthly_sales,cc_name,d_moy,d_year,i_brand,i_category,rn,sum_sales] - Filter [avg_monthly_sales,rn,sum_sales] - InputAdapter - Window [_w0,cc_name,d_year,i_brand,i_category] - WholeStageCodegen - Filter [d_year] - InputAdapter - Window [cc_name,d_moy,d_year,i_brand,i_category] - WholeStageCodegen - Sort [cc_name,d_moy,d_year,i_brand,i_category] - InputAdapter - Exchange [cc_name,i_brand,i_category] #1 - WholeStageCodegen - HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum,sum(UnscaledValue(cs_sales_price))] [_w0,sum,sum(UnscaledValue(cs_sales_price)),sum_sales] - InputAdapter - Exchange [cc_name,d_moy,d_year,i_brand,i_category] #2 - WholeStageCodegen - HashAggregate [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category,sum,sum] [sum,sum] - Project [cc_name,cs_sales_price,d_moy,d_year,i_brand,i_category] - BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] - Project [cs_call_center_sk,cs_sales_price,d_moy,d_year,i_brand,i_category] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_call_center_sk,cs_sales_price,cs_sold_date_sk,i_brand,i_category] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [i_brand,i_category,i_item_sk] - Filter [i_brand,i_category,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_item_sk] [i_brand,i_category,i_item_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] - Filter [cs_call_center_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk,d_moy,d_year] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - Project [cc_call_center_sk,cc_name] - Filter [cc_call_center_sk,cc_name] - Scan parquet default.call_center [cc_call_center_sk,cc_name] [cc_call_center_sk,cc_name] +TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_year,d_moy,psum,nsum] + WholeStageCodegen (23) + Project [i_category,i_brand,cc_name,d_year,d_moy,avg_monthly_sales,sum_sales,sum_sales,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales,rn,sum_sales] + BroadcastHashJoin [i_category,i_brand,cc_name,rn,i_category,i_brand,cc_name,rn] + Filter [d_year,avg_monthly_sales,sum_sales,rn] + InputAdapter + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (8) + Sort [i_category,i_brand,cc_name,d_year,d_moy] + InputAdapter + Exchange [i_category,i_brand,cc_name] #1 + WholeStageCodegen (7) + Project [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,avg_monthly_sales] + InputAdapter + Window [_w0,i_category,i_brand,cc_name,d_year] + WholeStageCodegen (6) + Sort [i_category,i_brand,cc_name,d_year] + InputAdapter + Exchange [i_category,i_brand,cc_name,d_year] #2 + WholeStageCodegen (5) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,_w0,sum] + InputAdapter + Exchange [i_category,i_brand,cc_name,d_year,d_moy] #3 + WholeStageCodegen (4) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,cs_sales_price] [sum,sum] + Project [i_brand,i_category,cs_sales_price,d_year,d_moy,cc_name] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [i_brand,i_category,cs_call_center_sk,cs_sales_price,d_year,d_moy] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [i_brand,i_category,cs_sold_date_sk,cs_call_center_sk,cs_sales_price] + BroadcastHashJoin [i_item_sk,cs_item_sk] + Filter [i_item_sk,i_category,i_brand] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand,i_category] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Filter [cs_item_sk,cs_sold_date_sk,cs_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_item_sk,cs_sales_price] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Filter [cc_call_center_sk,cc_name] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_name] InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [cc_name,i_brand,i_category,rn,sum_sales] + BroadcastExchange #7 + WholeStageCodegen (15) + Project [i_category,i_brand,cc_name,sum_sales,rn] Filter [rn] InputAdapter - Window [cc_name,d_moy,d_year,i_brand,i_category] - WholeStageCodegen - Sort [cc_name,d_moy,d_year,i_brand,i_category] + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (14) + Sort [i_category,i_brand,cc_name,d_year,d_moy] InputAdapter - Exchange [cc_name,i_brand,i_category] #7 - WholeStageCodegen - HashAggregate [cc_name,d_moy,d_year,i_brand,i_category,sum,sum(UnscaledValue(cs_sales_price))] [sum,sum(UnscaledValue(cs_sales_price)),sum_sales] + Exchange [i_category,i_brand,cc_name] #8 + WholeStageCodegen (13) + HashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum] [sum(UnscaledValue(cs_sales_price)),sum_sales,sum] InputAdapter - ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum] [cc_name,d_moy,d_year,i_brand,i_category,sum] #2 + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #3 InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [cc_name,i_brand,i_category,rn,sum_sales] + BroadcastExchange #9 + WholeStageCodegen (22) + Project [i_category,i_brand,cc_name,sum_sales,rn] Filter [rn] InputAdapter - Window [cc_name,d_moy,d_year,i_brand,i_category] - WholeStageCodegen - Sort [cc_name,d_moy,d_year,i_brand,i_category] + Window [d_year,d_moy,i_category,i_brand,cc_name] + WholeStageCodegen (21) + Sort [i_category,i_brand,cc_name,d_year,d_moy] InputAdapter - ReusedExchange [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] [cc_name,d_moy,d_year,i_brand,i_category,sum_sales] #7 + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #8 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt index f0a3a143c..40a6836ae 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/explain.txt @@ -1,101 +1,477 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,ss_item_rev#2 ASC NULLS FIRST], output=[item_id#1,ss_item_rev#2,ss_dev#3,cs_item_rev#4,cs_dev#5,ws_item_rev#6,ws_dev#7,average#8]) -+- *(15) Project [item_id#1, ss_item_rev#2, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#3, cs_item_rev#4, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#4 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#5, ws_item_rev#6, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#6 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#7, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#2 as decimal(18,2))) + promote_precision(cast(cs_item_rev#4 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#6 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#8] - +- *(15) BroadcastHashJoin [item_id#1], [item_id#9], Inner, BuildRight, ((((((((cast(ss_item_rev#2 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#6)), DecimalType(19,3))) && (cast(ss_item_rev#2 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#6)), DecimalType(20,3)))) && (cast(cs_item_rev#4 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#6)), DecimalType(19,3)))) && (cast(cs_item_rev#4 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#6)), DecimalType(20,3)))) && (cast(ws_item_rev#6 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#2)), DecimalType(19,3)))) && (cast(ws_item_rev#6 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#2)), DecimalType(20,3)))) && (cast(ws_item_rev#6 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#4)), DecimalType(19,3)))) && (cast(ws_item_rev#6 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#4)), DecimalType(20,3)))) - :- *(15) Project [item_id#1, ss_item_rev#2, cs_item_rev#4] - : +- *(15) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight, ((((cast(ss_item_rev#2 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#4)), DecimalType(19,3))) && (cast(ss_item_rev#2 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#4)), DecimalType(20,3)))) && (cast(cs_item_rev#4 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#2)), DecimalType(19,3)))) && (cast(cs_item_rev#4 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#2)), DecimalType(20,3)))) - : :- *(15) Filter isnotnull(ss_item_rev#2) - : : +- *(15) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ss_ext_sales_price#12))]) - : : +- Exchange hashpartitioning(i_item_id#11, 5) - : : +- *(4) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#12))]) - : : +- *(4) Project [ss_ext_sales_price#12, i_item_id#11] - : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight - : : :- *(4) Project [ss_sold_date_sk#13, ss_ext_sales_price#12, i_item_id#11] - : : : +- *(4) BroadcastHashJoin [ss_item_sk#15], [i_item_sk#16], Inner, BuildRight - : : : :- *(4) Project [ss_sold_date_sk#13, ss_item_sk#15, ss_ext_sales_price#12] - : : : : +- *(4) Filter (isnotnull(ss_item_sk#15) && isnotnull(ss_sold_date_sk#13)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#15,ss_ext_sales_price#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [i_item_sk#16, i_item_id#11] - : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) - : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#14] - : : +- *(3) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight - : : :- *(3) Project [d_date_sk#14, d_date#17] - : : : +- *(3) Filter isnotnull(d_date_sk#14) - : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) - : : +- *(2) Project [d_date#17 AS d_date#17#18] - : : +- *(2) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery11869)) - : : : +- Subquery subquery11869 - : : : +- *(1) Project [d_week_seq#19] - : : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - : : +- Subquery subquery11869 - : : +- *(1) Project [d_week_seq#19] - : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- *(9) Filter isnotnull(cs_item_rev#4) - : +- *(9) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(cs_ext_sales_price#20))]) - : +- Exchange hashpartitioning(i_item_id#11, 5) - : +- *(8) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#20))]) - : +- *(8) Project [cs_ext_sales_price#20, i_item_id#11] - : +- *(8) BroadcastHashJoin [cs_sold_date_sk#21], [d_date_sk#14], Inner, BuildRight - : :- *(8) Project [cs_sold_date_sk#21, cs_ext_sales_price#20, i_item_id#11] - : : +- *(8) BroadcastHashJoin [cs_item_sk#22], [i_item_sk#16], Inner, BuildRight - : : :- *(8) Project [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#20] - : : : +- *(8) Filter (isnotnull(cs_item_sk#22) && isnotnull(cs_sold_date_sk#21)) - : : : +- *(8) FileScan parquet default.catalog_sales[cs_sold_date_sk#21,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [d_date_sk#14] - : +- *(7) BroadcastHashJoin [d_date#17], [d_date#17#23], LeftSemi, BuildRight - : :- *(7) Project [d_date_sk#14, d_date#17] - : : +- *(7) Filter isnotnull(d_date_sk#14) - : : +- *(7) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) - : +- *(6) Project [d_date#17 AS d_date#17#23] - : +- *(6) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery11873)) - : : +- Subquery subquery11873 - : : +- *(1) Project [d_week_seq#19] - : : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - : +- *(6) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - : +- Subquery subquery11873 - : +- *(1) Project [d_week_seq#19] - : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(14) Filter isnotnull(ws_item_rev#6) - +- *(14) HashAggregate(keys=[i_item_id#11], functions=[sum(UnscaledValue(ws_ext_sales_price#24))]) - +- Exchange hashpartitioning(i_item_id#11, 5) - +- *(13) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#24))]) - +- *(13) Project [ws_ext_sales_price#24, i_item_id#11] - +- *(13) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#14], Inner, BuildRight - :- *(13) Project [ws_sold_date_sk#25, ws_ext_sales_price#24, i_item_id#11] - : +- *(13) BroadcastHashJoin [ws_item_sk#26], [i_item_sk#16], Inner, BuildRight - : :- *(13) Project [ws_sold_date_sk#25, ws_item_sk#26, ws_ext_sales_price#24] - : : +- *(13) Filter (isnotnull(ws_item_sk#26) && isnotnull(ws_sold_date_sk#25)) - : : +- *(13) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_item_sk#26,ws_ext_sales_price#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(12) Project [d_date_sk#14] - +- *(12) BroadcastHashJoin [d_date#17], [d_date#17#27], LeftSemi, BuildRight - :- *(12) Project [d_date_sk#14, d_date#17] - : +- *(12) Filter isnotnull(d_date_sk#14) - : +- *(12) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) - +- *(11) Project [d_date#17 AS d_date#17#27] - +- *(11) Filter (isnotnull(d_week_seq#19) && (d_week_seq#19 = Subquery subquery11877)) - : +- Subquery subquery11877 - : +- *(1) Project [d_week_seq#19] - : +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - : +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct - +- *(11) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq)], ReadSchema: struct - +- Subquery subquery11877 - +- *(1) Project [d_week_seq#19] - +- *(1) Filter (isnotnull(d_date#17) && (cast(d_date#17 as string) = 2000-01-03)) - +- *(1) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (79) ++- * Project (78) + +- * BroadcastHashJoin Inner BuildRight (77) + :- * Project (52) + : +- * BroadcastHashJoin Inner BuildRight (51) + : :- * Filter (26) + : : +- * HashAggregate (25) + : : +- Exchange (24) + : : +- * HashAggregate (23) + : : +- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- BroadcastExchange (20) + : : +- * Project (19) + : : +- * BroadcastHashJoin LeftSemi BuildRight (18) + : : :- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (17) + : : +- * Project (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.date_dim (13) + : +- BroadcastExchange (50) + : +- * Filter (49) + : +- * HashAggregate (48) + : +- Exchange (47) + : +- * HashAggregate (46) + : +- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Project (32) + : : +- * BroadcastHashJoin Inner BuildRight (31) + : : :- * Filter (29) + : : : +- * ColumnarToRow (28) + : : : +- Scan parquet default.catalog_sales (27) + : : +- ReusedExchange (30) + : +- BroadcastExchange (43) + : +- * Project (42) + : +- * BroadcastHashJoin LeftSemi BuildRight (41) + : :- * Filter (35) + : : +- * ColumnarToRow (34) + : : +- Scan parquet default.date_dim (33) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet default.date_dim (36) + +- BroadcastExchange (76) + +- * Filter (75) + +- * HashAggregate (74) + +- Exchange (73) + +- * HashAggregate (72) + +- * Project (71) + +- * BroadcastHashJoin Inner BuildRight (70) + :- * Project (58) + : +- * BroadcastHashJoin Inner BuildRight (57) + : :- * Filter (55) + : : +- * ColumnarToRow (54) + : : +- Scan parquet default.web_sales (53) + : +- ReusedExchange (56) + +- BroadcastExchange (69) + +- * Project (68) + +- * BroadcastHashJoin LeftSemi BuildRight (67) + :- * Filter (61) + : +- * ColumnarToRow (60) + : +- Scan parquet default.date_dim (59) + +- BroadcastExchange (66) + +- * Project (65) + +- * Filter (64) + +- * ColumnarToRow (63) + +- Scan parquet default.date_dim (62) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [2]: [i_item_sk#4, i_item_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(7) BroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 4] +Output [3]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_item_id#5] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#7, d_date#8] + +(12) Filter [codegen id : 3] +Input [2]: [d_date_sk#7, d_date#8] +Condition : isnotnull(d_date_sk#7) + +(13) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] + +(15) Filter [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = Subquery scalar-subquery#10, [id=#11])) + +(16) Project [codegen id : 2] +Output [1]: [d_date#8 AS d_date#8#12] +Input [2]: [d_date#8, d_week_seq#9] + +(17) BroadcastExchange +Input [1]: [d_date#8#12] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#13] + +(18) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_date#8] +Right keys [1]: [d_date#8#12] +Join condition: None + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(20) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(22) Project [codegen id : 4] +Output [2]: [ss_ext_sales_price#3, i_item_id#5] +Input [4]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, d_date_sk#7] + +(23) HashAggregate [codegen id : 4] +Input [2]: [ss_ext_sales_price#3, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [i_item_id#5, sum#16] + +(24) Exchange +Input [2]: [i_item_id#5, sum#16] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#17] + +(25) HashAggregate [codegen id : 15] +Input [2]: [i_item_id#5, sum#16] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#18] +Results [2]: [i_item_id#5 AS item_id#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2) AS ss_item_rev#20] + +(26) Filter [codegen id : 15] +Input [2]: [item_id#19, ss_item_rev#20] +Condition : isnotnull(ss_item_rev#20) + +(27) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 8] +Input [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] + +(29) Filter [codegen id : 8] +Input [3]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23] +Condition : (isnotnull(cs_item_sk#22) AND isnotnull(cs_sold_date_sk#21)) + +(30) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#4, i_item_id#5] + +(31) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_item_sk#22] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(32) Project [codegen id : 8] +Output [3]: [cs_sold_date_sk#21, cs_ext_sales_price#23, i_item_id#5] +Input [5]: [cs_sold_date_sk#21, cs_item_sk#22, cs_ext_sales_price#23, i_item_sk#4, i_item_id#5] + +(33) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#7, d_date#8] + +(35) Filter [codegen id : 7] +Input [2]: [d_date_sk#7, d_date#8] +Condition : isnotnull(d_date_sk#7) + +(36) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [2]: [d_date#8, d_week_seq#9] + +(38) Filter [codegen id : 6] +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = ReusedSubquery Subquery scalar-subquery#10, [id=#11])) + +(39) Project [codegen id : 6] +Output [1]: [d_date#8 AS d_date#8#24] +Input [2]: [d_date#8, d_week_seq#9] + +(40) BroadcastExchange +Input [1]: [d_date#8#24] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#25] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [d_date#8] +Right keys [1]: [d_date#8#24] +Join condition: None + +(42) Project [codegen id : 7] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(43) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(44) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [cs_sold_date_sk#21] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(45) Project [codegen id : 8] +Output [2]: [cs_ext_sales_price#23, i_item_id#5] +Input [4]: [cs_sold_date_sk#21, cs_ext_sales_price#23, i_item_id#5, d_date_sk#7] + +(46) HashAggregate [codegen id : 8] +Input [2]: [cs_ext_sales_price#23, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#23))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_item_id#5, sum#28] + +(47) Exchange +Input [2]: [i_item_id#5, sum#28] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#29] + +(48) HashAggregate [codegen id : 9] +Input [2]: [i_item_id#5, sum#28] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#23))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#23))#30] +Results [2]: [i_item_id#5 AS item_id#31, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#23))#30,17,2) AS cs_item_rev#32] + +(49) Filter [codegen id : 9] +Input [2]: [item_id#31, cs_item_rev#32] +Condition : isnotnull(cs_item_rev#32) + +(50) BroadcastExchange +Input [2]: [item_id#31, cs_item_rev#32] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#33] + +(51) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#31] +Join condition: ((((cast(ss_item_rev#20 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#32)), DecimalType(19,3), true)) AND (cast(ss_item_rev#20 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#32)), DecimalType(20,3), true))) AND (cast(cs_item_rev#32 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#20)), DecimalType(19,3), true))) AND (cast(cs_item_rev#32 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#20)), DecimalType(20,3), true))) + +(52) Project [codegen id : 15] +Output [3]: [item_id#19, ss_item_rev#20, cs_item_rev#32] +Input [4]: [item_id#19, ss_item_rev#20, item_id#31, cs_item_rev#32] + +(53) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(54) ColumnarToRow [codegen id : 13] +Input [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] + +(55) Filter [codegen id : 13] +Input [3]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36] +Condition : (isnotnull(ws_item_sk#35) AND isnotnull(ws_sold_date_sk#34)) + +(56) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#4, i_item_id#5] + +(57) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_item_sk#35] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(58) Project [codegen id : 13] +Output [3]: [ws_sold_date_sk#34, ws_ext_sales_price#36, i_item_id#5] +Input [5]: [ws_sold_date_sk#34, ws_item_sk#35, ws_ext_sales_price#36, i_item_sk#4, i_item_id#5] + +(59) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(60) ColumnarToRow [codegen id : 12] +Input [2]: [d_date_sk#7, d_date#8] + +(61) Filter [codegen id : 12] +Input [2]: [d_date_sk#7, d_date#8] +Condition : isnotnull(d_date_sk#7) + +(62) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq)] +ReadSchema: struct + +(63) ColumnarToRow [codegen id : 11] +Input [2]: [d_date#8, d_week_seq#9] + +(64) Filter [codegen id : 11] +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_week_seq#9) AND (d_week_seq#9 = ReusedSubquery Subquery scalar-subquery#10, [id=#11])) + +(65) Project [codegen id : 11] +Output [1]: [d_date#8 AS d_date#8#37] +Input [2]: [d_date#8, d_week_seq#9] + +(66) BroadcastExchange +Input [1]: [d_date#8#37] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#38] + +(67) BroadcastHashJoin [codegen id : 12] +Left keys [1]: [d_date#8] +Right keys [1]: [d_date#8#37] +Join condition: None + +(68) Project [codegen id : 12] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(69) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] + +(70) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [ws_sold_date_sk#34] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(71) Project [codegen id : 13] +Output [2]: [ws_ext_sales_price#36, i_item_id#5] +Input [4]: [ws_sold_date_sk#34, ws_ext_sales_price#36, i_item_id#5, d_date_sk#7] + +(72) HashAggregate [codegen id : 13] +Input [2]: [ws_ext_sales_price#36, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] +Aggregate Attributes [1]: [sum#40] +Results [2]: [i_item_id#5, sum#41] + +(73) Exchange +Input [2]: [i_item_id#5, sum#41] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#42] + +(74) HashAggregate [codegen id : 14] +Input [2]: [i_item_id#5, sum#41] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#43] +Results [2]: [i_item_id#5 AS item_id#44, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#43,17,2) AS ws_item_rev#45] + +(75) Filter [codegen id : 14] +Input [2]: [item_id#44, ws_item_rev#45] +Condition : isnotnull(ws_item_rev#45) + +(76) BroadcastExchange +Input [2]: [item_id#44, ws_item_rev#45] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#46] + +(77) BroadcastHashJoin [codegen id : 15] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#44] +Join condition: ((((((((cast(ss_item_rev#20 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#45)), DecimalType(19,3), true)) AND (cast(ss_item_rev#20 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#45)), DecimalType(20,3), true))) AND (cast(cs_item_rev#32 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#45)), DecimalType(19,3), true))) AND (cast(cs_item_rev#32 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#45)), DecimalType(20,3), true))) AND (cast(ws_item_rev#45 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#20)), DecimalType(19,3), true))) AND (cast(ws_item_rev#45 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#20)), DecimalType(20,3), true))) AND (cast(ws_item_rev#45 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#32)), DecimalType(19,3), true))) AND (cast(ws_item_rev#45 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#32)), DecimalType(20,3), true))) + +(78) Project [codegen id : 15] +Output [8]: [item_id#19, ss_item_rev#20, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ss_dev#47, cs_item_rev#32, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#32 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS cs_dev#48, ws_item_rev#45, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#45 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true))), DecimalType(38,21), true)) / 3.000000000000000000000), DecimalType(38,21), true)) * 100.000000000000000000000), DecimalType(38,17), true) AS ws_dev#49, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#20 as decimal(18,2))) + promote_precision(cast(cs_item_rev#32 as decimal(18,2)))), DecimalType(18,2), true) as decimal(19,2))) + promote_precision(cast(ws_item_rev#45 as decimal(19,2)))), DecimalType(19,2), true)) / 3.00), DecimalType(23,6), true) AS average#50] +Input [5]: [item_id#19, ss_item_rev#20, cs_item_rev#32, item_id#44, ws_item_rev#45] + +(79) TakeOrderedAndProject +Input [8]: [item_id#19, ss_item_rev#20, ss_dev#47, cs_item_rev#32, cs_dev#48, ws_item_rev#45, ws_dev#49, average#50] +Arguments: 100, [item_id#19 ASC NULLS FIRST, ss_item_rev#20 ASC NULLS FIRST], [item_id#19, ss_item_rev#20, ss_dev#47, cs_item_rev#32, cs_dev#48, ws_item_rev#45, ws_dev#49, average#50] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 15 Hosting Expression = Subquery scalar-subquery#10, [id=#11] +* Project (83) ++- * Filter (82) + +- * ColumnarToRow (81) + +- Scan parquet default.date_dim (80) + + +(80) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), EqualTo(d_date,2000-01-03)] +ReadSchema: struct + +(81) ColumnarToRow [codegen id : 1] +Input [2]: [d_date#8, d_week_seq#9] + +(82) Filter [codegen id : 1] +Input [2]: [d_date#8, d_week_seq#9] +Condition : (isnotnull(d_date#8) AND (d_date#8 = 10959)) + +(83) Project [codegen id : 1] +Output [1]: [d_week_seq#9] +Input [2]: [d_date#8, d_week_seq#9] + +Subquery:2 Hosting operator id = 38 Hosting Expression = ReusedSubquery Subquery scalar-subquery#10, [id=#11] + +Subquery:3 Hosting operator id = 64 Hosting Expression = ReusedSubquery Subquery scalar-subquery#10, [id=#11] + + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt index ea45e8665..5081efe94 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q58/simplified.txt @@ -1,133 +1,125 @@ -TakeOrderedAndProject [average,cs_dev,cs_item_rev,item_id,ss_dev,ss_item_rev,ws_dev,ws_item_rev] - WholeStageCodegen - Project [cs_item_rev,item_id,ss_item_rev,ws_item_rev] - BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev,ws_item_rev] - Project [cs_item_rev,item_id,ss_item_rev] - BroadcastHashJoin [cs_item_rev,item_id,item_id,ss_item_rev] +TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev,ws_dev,average] + WholeStageCodegen (15) + Project [item_id,ss_item_rev,cs_item_rev,ws_item_rev] + BroadcastHashJoin [item_id,item_id,ss_item_rev,ws_item_rev,cs_item_rev] + Project [item_id,ss_item_rev,cs_item_rev] + BroadcastHashJoin [item_id,item_id,ss_item_rev,cs_item_rev] Filter [ss_item_rev] - HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [item_id,ss_item_rev,sum,sum(UnscaledValue(ss_ext_sales_price))] + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),item_id,ss_item_rev,sum] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_item_id,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_item_id,ss_ext_sales_price,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + WholeStageCodegen (4) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_id,i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] + WholeStageCodegen (1) + Filter [i_item_sk,i_item_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Project [d_date,d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date] Filter [d_week_seq] Subquery #1 - WholeStageCodegen + WholeStageCodegen (1) Project [d_week_seq] Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Subquery #1 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (9) Filter [cs_item_rev] - HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [cs_item_rev,item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] + HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),item_id,cs_item_rev,sum] InputAdapter Exchange [i_item_id] #6 - WholeStageCodegen - HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] + WholeStageCodegen (8) + HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] Project [cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_sales_price,cs_sold_date_sk,i_item_id] + Project [cs_sold_date_sk,cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_ext_sales_price] InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 + ReusedExchange [i_item_sk,i_item_id] #2 InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (7) Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Project [d_date,d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #8 - WholeStageCodegen + WholeStageCodegen (6) Project [d_date] Filter [d_week_seq] - Subquery #2 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Subquery #2 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + ReusedSubquery [d_week_seq] #1 + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] InputAdapter BroadcastExchange #9 - WholeStageCodegen + WholeStageCodegen (14) Filter [ws_item_rev] - HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [item_id,sum,sum(UnscaledValue(ws_ext_sales_price)),ws_item_rev] + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),item_id,ws_item_rev,sum] InputAdapter Exchange [i_item_id] #10 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] - Project [i_item_id,ws_ext_sales_price] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_item_id,ws_ext_sales_price,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + WholeStageCodegen (13) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_sales_price] InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 + ReusedExchange [i_item_sk,i_item_id] #2 InputAdapter BroadcastExchange #11 - WholeStageCodegen + WholeStageCodegen (12) Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Project [d_date,d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #12 - WholeStageCodegen + WholeStageCodegen (11) Project [d_date] Filter [d_week_seq] - Subquery #3 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] - Subquery #3 - WholeStageCodegen - Project [d_week_seq] - Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + ReusedSubquery [d_week_seq] #1 + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt index 1f9cdf6b6..6edd0e4b0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/explain.txt @@ -1,43 +1,249 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[s_store_name1#1 ASC NULLS FIRST,s_store_id1#2 ASC NULLS FIRST,d_week_seq1#3 ASC NULLS FIRST], output=[s_store_name1#1,s_store_id1#2,d_week_seq1#3,(sun_sales1 / sun_sales2)#4,(mon_sales1 / mon_sales2)#5,(tue_sales1 / tue_sales2)#6,(wed_sales1 / wed_sales2)#7,(thu_sales1 / thu_sales2)#8,(fri_sales1 / fri_sales2)#9,(sat_sales1 / sat_sales2)#10]) -+- *(10) Project [s_store_name1#1, s_store_id1#2, d_week_seq1#3, CheckOverflow((promote_precision(sun_sales1#11) / promote_precision(sun_sales2#12)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#4, CheckOverflow((promote_precision(mon_sales1#13) / promote_precision(mon_sales2#14)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#5, CheckOverflow((promote_precision(tue_sales1#15) / promote_precision(tue_sales2#16)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#6, CheckOverflow((promote_precision(wed_sales1#17) / promote_precision(wed_sales2#18)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#7, CheckOverflow((promote_precision(thu_sales1#19) / promote_precision(thu_sales2#20)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#8, CheckOverflow((promote_precision(fri_sales1#21) / promote_precision(fri_sales2#22)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#9, CheckOverflow((promote_precision(sat_sales1#23) / promote_precision(sat_sales2#24)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#10] - +- *(10) BroadcastHashJoin [s_store_id1#2, d_week_seq1#3], [s_store_id2#25, (d_week_seq2#26 - 52)], Inner, BuildRight - :- *(10) Project [s_store_name#27 AS s_store_name1#1, d_week_seq#28 AS d_week_seq1#3, s_store_id#29 AS s_store_id1#2, sun_sales#30 AS sun_sales1#11, mon_sales#31 AS mon_sales1#13, tue_sales#32 AS tue_sales1#15, wed_sales#33 AS wed_sales1#17, thu_sales#34 AS thu_sales1#19, fri_sales#35 AS fri_sales1#21, sat_sales#36 AS sat_sales1#23] - : +- *(10) BroadcastHashJoin [d_week_seq#28], [d_week_seq#37], Inner, BuildRight - : :- *(10) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29, s_store_name#27] - : : +- *(10) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight - : : :- *(10) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) - : : : +- Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 5) - : : : +- *(2) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) - : : : +- *(2) Project [ss_store_sk#38, ss_sales_price#41, d_week_seq#28, d_day_name#40] - : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#42], [d_date_sk#43], Inner, BuildRight - : : : :- *(2) Project [ss_sold_date_sk#42, ss_store_sk#38, ss_sales_price#41] - : : : : +- *(2) Filter (isnotnull(ss_sold_date_sk#42) && isnotnull(ss_store_sk#38)) - : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#42,ss_store_sk#38,ss_sales_price#41] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#43, d_week_seq#28, d_day_name#40] - : : : +- *(1) Filter (isnotnull(d_date_sk#43) && isnotnull(d_week_seq#28)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#43,d_week_seq#28,d_day_name#40] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [s_store_sk#39, s_store_id#29, s_store_name#27] - : : +- *(3) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) - : : +- *(3) FileScan parquet default.store[s_store_sk#39,s_store_id#29,s_store_name#27] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [d_week_seq#37] - : +- *(4) Filter (((isnotnull(d_month_seq#44) && (d_month_seq#44 >= 1212)) && (d_month_seq#44 <= 1223)) && isnotnull(d_week_seq#37)) - : +- *(4) FileScan parquet default.date_dim[d_month_seq#44,d_week_seq#37] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223),..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52))) - +- *(9) Project [d_week_seq#28 AS d_week_seq2#26, s_store_id#29 AS s_store_id2#25, sun_sales#30 AS sun_sales2#12, mon_sales#31 AS mon_sales2#14, tue_sales#32 AS tue_sales2#16, wed_sales#33 AS wed_sales2#18, thu_sales#34 AS thu_sales2#20, fri_sales#35 AS fri_sales2#22, sat_sales#36 AS sat_sales2#24] - +- *(9) BroadcastHashJoin [d_week_seq#28], [d_week_seq#45], Inner, BuildRight - :- *(9) Project [d_week_seq#28, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#29] - : +- *(9) BroadcastHashJoin [ss_store_sk#38], [s_store_sk#39], Inner, BuildRight - : :- *(9) HashAggregate(keys=[d_week_seq#28, ss_store_sk#38], functions=[sum(UnscaledValue(CASE WHEN (d_day_name#40 = Sunday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Monday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Tuesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Wednesday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Thursday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Friday) THEN ss_sales_price#41 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#40 = Saturday) THEN ss_sales_price#41 ELSE null END))]) - : : +- ReusedExchange [d_week_seq#28, ss_store_sk#38, sum#46, sum#47, sum#48, sum#49, sum#50, sum#51, sum#52], Exchange hashpartitioning(d_week_seq#28, ss_store_sk#38, 5) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [s_store_sk#39, s_store_id#29] - : +- *(7) Filter (isnotnull(s_store_sk#39) && isnotnull(s_store_id#29)) - : +- *(7) FileScan parquet default.store[s_store_sk#39,s_store_id#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [d_week_seq#45] - +- *(8) Filter (((isnotnull(d_month_seq#53) && (d_month_seq#53 >= 1224)) && (d_month_seq#53 <= 1235)) && isnotnull(d_week_seq#45)) - +- *(8) FileScan parquet default.date_dim[d_month_seq#53,d_week_seq#45] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235),..., ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (44) ++- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (18) + : : +- * BroadcastHashJoin Inner BuildRight (17) + : : :- * HashAggregate (12) + : : : +- Exchange (11) + : : : +- * HashAggregate (10) + : : : +- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.store (13) + : +- BroadcastExchange (23) + : +- * Project (22) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.date_dim (19) + +- BroadcastExchange (41) + +- * Project (40) + +- * BroadcastHashJoin Inner BuildRight (39) + :- * Project (33) + : +- * BroadcastHashJoin Inner BuildRight (32) + : :- * HashAggregate (27) + : : +- ReusedExchange (26) + : +- BroadcastExchange (31) + : +- * Filter (30) + : +- * ColumnarToRow (29) + : +- Scan parquet default.store (28) + +- BroadcastExchange (38) + +- * Project (37) + +- * Filter (36) + +- * ColumnarToRow (35) + +- Scan parquet default.date_dim (34) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) + +(7) BroadcastExchange +Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(9) Project [codegen id : 2] +Output [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Input [6]: [ss_sold_date_sk#1, ss_store_sk#2, ss_sales_price#3, d_date_sk#4, d_week_seq#5, d_day_name#6] + +(10) HashAggregate [codegen id : 2] +Input [4]: [ss_store_sk#2, ss_sales_price#3, d_week_seq#5, d_day_name#6] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum#8, sum#9, sum#10, sum#11, sum#12, sum#13, sum#14] +Results [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] + +(11) Exchange +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#2, 5), true, [id=#22] + +(12) HashAggregate [codegen id : 10] +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29] +Results [9]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#29,17,2) AS sat_sales#36] + +(13) Scan parquet default.store +Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] + +(15) Filter [codegen id : 3] +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) + +(16) BroadcastExchange +Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] + +(17) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#37] +Join condition: None + +(18) Project [codegen id : 10] +Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39] +Input [12]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39] + +(19) Scan parquet default.date_dim +Output [2]: [d_month_seq#41, d_week_seq#42] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_week_seq)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 4] +Input [2]: [d_month_seq#41, d_week_seq#42] + +(21) Filter [codegen id : 4] +Input [2]: [d_month_seq#41, d_week_seq#42] +Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1212)) AND (d_month_seq#41 <= 1223)) AND isnotnull(d_week_seq#42)) + +(22) Project [codegen id : 4] +Output [1]: [d_week_seq#42] +Input [2]: [d_month_seq#41, d_week_seq#42] + +(23) BroadcastExchange +Input [1]: [d_week_seq#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#43] + +(24) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#42] +Join condition: None + +(25) Project [codegen id : 10] +Output [10]: [s_store_name#39 AS s_store_name1#44, d_week_seq#5 AS d_week_seq1#45, s_store_id#38 AS s_store_id1#46, sun_sales#30 AS sun_sales1#47, mon_sales#31 AS mon_sales1#48, tue_sales#32 AS tue_sales1#49, wed_sales#33 AS wed_sales1#50, thu_sales#34 AS thu_sales1#51, fri_sales#35 AS fri_sales1#52, sat_sales#36 AS sat_sales1#53] +Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#42] + +(26) ReusedExchange [Reuses operator id: 11] +Output [9]: [d_week_seq#5, ss_store_sk#2, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] + +(27) HashAggregate [codegen id : 9] +Input [9]: [d_week_seq#5, ss_store_sk#2, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] +Keys [2]: [d_week_seq#5, ss_store_sk#2] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#66, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#67] +Results [9]: [d_week_seq#5, ss_store_sk#2, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday) THEN ss_sales_price#3 ELSE null END))#61,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday) THEN ss_sales_price#3 ELSE null END))#62,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday) THEN ss_sales_price#3 ELSE null END))#63,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#3 ELSE null END))#64,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday) THEN ss_sales_price#3 ELSE null END))#65,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday) THEN ss_sales_price#3 ELSE null END))#66,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday) THEN ss_sales_price#3 ELSE null END))#67,17,2) AS sat_sales#36] + +(28) Scan parquet default.store +Output [2]: [s_store_sk#37, s_store_id#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 7] +Input [2]: [s_store_sk#37, s_store_id#38] + +(30) Filter [codegen id : 7] +Input [2]: [s_store_sk#37, s_store_id#38] +Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) + +(31) BroadcastExchange +Input [2]: [s_store_sk#37, s_store_id#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#68] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#37] +Join condition: None + +(33) Project [codegen id : 9] +Output [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38] +Input [11]: [d_week_seq#5, ss_store_sk#2, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38] + +(34) Scan parquet default.date_dim +Output [2]: [d_month_seq#69, d_week_seq#70] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235), IsNotNull(d_week_seq)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 8] +Input [2]: [d_month_seq#69, d_week_seq#70] + +(36) Filter [codegen id : 8] +Input [2]: [d_month_seq#69, d_week_seq#70] +Condition : (((isnotnull(d_month_seq#69) AND (d_month_seq#69 >= 1224)) AND (d_month_seq#69 <= 1235)) AND isnotnull(d_week_seq#70)) + +(37) Project [codegen id : 8] +Output [1]: [d_week_seq#70] +Input [2]: [d_month_seq#69, d_week_seq#70] + +(38) BroadcastExchange +Input [1]: [d_week_seq#70] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#71] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [d_week_seq#5] +Right keys [1]: [d_week_seq#70] +Join condition: None + +(40) Project [codegen id : 9] +Output [9]: [d_week_seq#5 AS d_week_seq2#72, s_store_id#38 AS s_store_id2#73, sun_sales#30 AS sun_sales2#74, mon_sales#31 AS mon_sales2#75, tue_sales#32 AS tue_sales2#76, wed_sales#33 AS wed_sales2#77, thu_sales#34 AS thu_sales2#78, fri_sales#35 AS fri_sales2#79, sat_sales#36 AS sat_sales2#80] +Input [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, d_week_seq#70] + +(41) BroadcastExchange +Input [9]: [d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [id=#81] + +(42) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [s_store_id1#46, d_week_seq1#45] +Right keys [2]: [s_store_id2#73, (d_week_seq2#72 - 52)] +Join condition: None + +(43) Project [codegen id : 10] +Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#74)), DecimalType(37,20), true) AS (sun_sales1 / sun_sales2)#82, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#75)), DecimalType(37,20), true) AS (mon_sales1 / mon_sales2)#83, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales2#76)), DecimalType(37,20), true) AS (tue_sales1 / tue_sales2)#84, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#77)), DecimalType(37,20), true) AS (wed_sales1 / wed_sales2)#85, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#78)), DecimalType(37,20), true) AS (thu_sales1 / thu_sales2)#86, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#79)), DecimalType(37,20), true) AS (fri_sales1 / fri_sales2)#87, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#80)), DecimalType(37,20), true) AS (sat_sales1 / sat_sales2)#88] +Input [19]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] + +(44) TakeOrderedAndProject +Input [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales2)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] +Arguments: 100, [s_store_name1#44 ASC NULLS FIRST, s_store_id1#46 ASC NULLS FIRST, d_week_seq1#45 ASC NULLS FIRST], [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales2)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt index 038219a53..3f3cc409e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q59/simplified.txt @@ -1,58 +1,66 @@ -TakeOrderedAndProject [(fri_sales1 / fri_sales2),(mon_sales1 / mon_sales2),(sat_sales1 / sat_sales2),(sun_sales1 / sun_sales2),(thu_sales1 / thu_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),d_week_seq1,s_store_id1,s_store_name1] - WholeStageCodegen - Project [d_week_seq1,fri_sales1,fri_sales2,mon_sales1,mon_sales2,s_store_id1,s_store_name1,sat_sales1,sat_sales2,sun_sales1,sun_sales2,thu_sales1,thu_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2] - BroadcastHashJoin [d_week_seq1,d_week_seq2,s_store_id1,s_store_id2] - Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] +TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_sales2),(mon_sales1 / mon_sales2),(tue_sales1 / tue_sales2),(wed_sales1 / wed_sales2),(thu_sales1 / thu_sales2),(fri_sales1 / fri_sales2),(sat_sales1 / sat_sales2)] + WholeStageCodegen (10) + Project [s_store_name1,s_store_id1,d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2] + BroadcastHashJoin [s_store_id1,d_week_seq1,s_store_id2,d_week_seq2] + Project [s_store_name,d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - Project [d_week_seq,fri_sales,mon_sales,s_store_id,s_store_name,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] - BroadcastHashJoin [s_store_sk,ss_store_sk] - HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id,s_store_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] InputAdapter Exchange [d_week_seq,ss_store_sk] #1 - WholeStageCodegen - HashAggregate [d_day_name,d_week_seq,ss_sales_price,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_day_name,d_week_seq,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_sales_price,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (2) + HashAggregate [d_week_seq,ss_store_sk,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Project [ss_store_sk,ss_sales_price,d_week_seq,d_day_name] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_sales_price] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [d_date_sk,d_day_name,d_week_seq] - Filter [d_date_sk,d_week_seq] - Scan parquet default.date_dim [d_date_sk,d_day_name,d_week_seq] [d_date_sk,d_day_name,d_week_seq] + WholeStageCodegen (1) + Filter [d_date_sk,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [s_store_id,s_store_name,s_store_sk] - Filter [s_store_id,s_store_sk] - Scan parquet default.store [s_store_id,s_store_name,s_store_sk] [s_store_id,s_store_name,s_store_sk] + WholeStageCodegen (3) + Filter [s_store_sk,s_store_id] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id,s_store_name] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (4) Project [d_week_seq] Filter [d_month_seq,d_week_seq] - Scan parquet default.date_dim [d_month_seq,d_week_seq] [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_week_seq] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] + WholeStageCodegen (9) + Project [d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales] BroadcastHashJoin [d_week_seq,d_week_seq] - Project [d_week_seq,fri_sales,mon_sales,s_store_id,sat_sales,sun_sales,thu_sales,tue_sales,wed_sales] - BroadcastHashJoin [s_store_sk,ss_store_sk] - HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END))] [fri_sales,mon_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum,sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sun_sales,thu_sales,tue_sales,wed_sales] + Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday ) THEN ss_sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum] InputAdapter - ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 + ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1 InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [s_store_id,s_store_sk] - Filter [s_store_id,s_store_sk] - Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] + WholeStageCodegen (7) + Filter [s_store_sk,s_store_id] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (8) Project [d_week_seq] Filter [d_month_seq,d_week_seq] - Scan parquet default.date_dim [d_month_seq,d_week_seq] [d_month_seq,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_week_seq] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt index 68c396079..822d24b2f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/explain.txt @@ -1,58 +1,301 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[cnt#1 ASC NULLS FIRST], output=[state#2,cnt#1]) -+- *(8) Project [state#2, cnt#1] - +- *(8) Filter (count(1)#3 >= 10) - +- *(8) HashAggregate(keys=[ca_state#4], functions=[count(1)]) - +- Exchange hashpartitioning(ca_state#4, 5) - +- *(7) HashAggregate(keys=[ca_state#4], functions=[partial_count(1)]) - +- *(7) Project [ca_state#4] - +- *(7) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - :- *(7) Project [ca_state#4, ss_item_sk#5] - : +- *(7) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight - : :- *(7) Project [ca_state#4, ss_sold_date_sk#7, ss_item_sk#5] - : : +- *(7) BroadcastHashJoin [c_customer_sk#9], [ss_customer_sk#10], Inner, BuildRight - : : :- *(7) Project [ca_state#4, c_customer_sk#9] - : : : +- *(7) BroadcastHashJoin [ca_address_sk#11], [c_current_addr_sk#12], Inner, BuildRight - : : : :- *(7) Project [ca_address_sk#11, ca_state#4] - : : : : +- *(7) Filter isnotnull(ca_address_sk#11) - : : : : +- *(7) FileScan parquet default.customer_address[ca_address_sk#11,ca_state#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : +- *(1) Project [c_customer_sk#9, c_current_addr_sk#12] - : : : +- *(1) Filter (isnotnull(c_current_addr_sk#12) && isnotnull(c_customer_sk#9)) - : : : +- *(1) FileScan parquet default.customer[c_customer_sk#9,c_current_addr_sk#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) - : : +- *(2) Project [ss_sold_date_sk#7, ss_item_sk#5, ss_customer_sk#10] - : : +- *(2) Filter ((isnotnull(ss_customer_sk#10) && isnotnull(ss_sold_date_sk#7)) && isnotnull(ss_item_sk#5)) - : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#5,ss_customer_sk#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [d_date_sk#8] - : +- *(3) Filter ((isnotnull(d_month_seq#13) && (d_month_seq#13 = Subquery subquery982)) && isnotnull(d_date_sk#8)) - : : +- Subquery subquery982 - : : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) - : : +- Exchange hashpartitioning(d_month_seq#13, 5) - : : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) - : : +- *(1) Project [d_month_seq#13] - : : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) - : : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct - : +- *(3) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)], ReadSchema: struct - : +- Subquery subquery982 - : +- *(2) HashAggregate(keys=[d_month_seq#13], functions=[]) - : +- Exchange hashpartitioning(d_month_seq#13, 5) - : +- *(1) HashAggregate(keys=[d_month_seq#13], functions=[]) - : +- *(1) Project [d_month_seq#13] - : +- *(1) Filter (((isnotnull(d_year#14) && isnotnull(d_moy#15)) && (d_year#14 = 2000)) && (d_moy#15 = 1)) - : +- *(1) FileScan parquet default.date_dim[d_month_seq#13,d_year#14,d_moy#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(6) Project [i_item_sk#6] - +- *(6) Filter (cast(i_current_price#16 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#17)), DecimalType(14,7))) - +- *(6) BroadcastHashJoin [i_category#18], [i_category#18#19], LeftOuter, BuildRight - :- *(6) Project [i_item_sk#6, i_current_price#16, i_category#18] - : +- *(6) Filter (isnotnull(i_current_price#16) && isnotnull(i_item_sk#6)) - : +- *(6) FileScan parquet default.item[i_item_sk#6,i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) - +- *(5) HashAggregate(keys=[i_category#18], functions=[avg(UnscaledValue(i_current_price#16))]) - +- Exchange hashpartitioning(i_category#18, 5) - +- *(4) HashAggregate(keys=[i_category#18], functions=[partial_avg(UnscaledValue(i_current_price#16))]) - +- *(4) Project [i_current_price#16, i_category#18] - +- *(4) Filter isnotnull(i_category#18) - +- *(4) FileScan parquet default.item[i_current_price#16,i_category#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (44) ++- * Project (43) + +- * Filter (42) + +- * HashAggregate (41) + +- Exchange (40) + +- * HashAggregate (39) + +- * Project (38) + +- * BroadcastHashJoin Inner BuildRight (37) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.customer_address (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.store_sales (10) + : +- BroadcastExchange (20) + : +- * Project (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.date_dim (16) + +- BroadcastExchange (36) + +- * Project (35) + +- * Filter (34) + +- * BroadcastHashJoin LeftOuter BuildRight (33) + :- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.item (23) + +- BroadcastExchange (32) + +- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet default.item (26) + + +(1) Scan parquet default.customer_address +Output [2]: [ca_address_sk#1, ca_state#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] + +(3) Filter [codegen id : 7] +Input [2]: [ca_address_sk#1, ca_state#2] +Condition : isnotnull(ca_address_sk#1) + +(4) Scan parquet default.customer +Output [2]: [c_customer_sk#3, c_current_addr_sk#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] + +(6) Filter [codegen id : 1] +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) + +(7) BroadcastExchange +Input [2]: [c_customer_sk#3, c_current_addr_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#5] + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ca_address_sk#1] +Right keys [1]: [c_current_addr_sk#4] +Join condition: None + +(9) Project [codegen id : 7] +Output [2]: [ca_state#2, c_customer_sk#3] +Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] + +(10) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] + +(12) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] +Condition : ((isnotnull(ss_customer_sk#8) AND isnotnull(ss_sold_date_sk#6)) AND isnotnull(ss_item_sk#7)) + +(13) BroadcastExchange +Input [3]: [ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#9] + +(14) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_customer_sk#3] +Right keys [1]: [ss_customer_sk#8] +Join condition: None + +(15) Project [codegen id : 7] +Output [3]: [ca_state#2, ss_sold_date_sk#6, ss_item_sk#7] +Input [5]: [ca_state#2, c_customer_sk#3, ss_sold_date_sk#6, ss_item_sk#7, ss_customer_sk#8] + +(16) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_month_seq#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#10, d_month_seq#11] + +(18) Filter [codegen id : 3] +Input [2]: [d_date_sk#10, d_month_seq#11] +Condition : ((isnotnull(d_month_seq#11) AND (d_month_seq#11 = Subquery scalar-subquery#12, [id=#13])) AND isnotnull(d_date_sk#10)) + +(19) Project [codegen id : 3] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_month_seq#11] + +(20) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#6] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(22) Project [codegen id : 7] +Output [2]: [ca_state#2, ss_item_sk#7] +Input [4]: [ca_state#2, ss_sold_date_sk#6, ss_item_sk#7, d_date_sk#10] + +(23) Scan parquet default.item +Output [3]: [i_item_sk#15, i_current_price#16, i_category#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 6] +Input [3]: [i_item_sk#15, i_current_price#16, i_category#17] + +(25) Filter [codegen id : 6] +Input [3]: [i_item_sk#15, i_current_price#16, i_category#17] +Condition : (isnotnull(i_current_price#16) AND isnotnull(i_item_sk#15)) + +(26) Scan parquet default.item +Output [2]: [i_current_price#16, i_category#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 4] +Input [2]: [i_current_price#16, i_category#17] + +(28) Filter [codegen id : 4] +Input [2]: [i_current_price#16, i_category#17] +Condition : isnotnull(i_category#17) + +(29) HashAggregate [codegen id : 4] +Input [2]: [i_current_price#16, i_category#17] +Keys [1]: [i_category#17] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#16))] +Aggregate Attributes [2]: [sum#18, count#19] +Results [3]: [i_category#17, sum#20, count#21] + +(30) Exchange +Input [3]: [i_category#17, sum#20, count#21] +Arguments: hashpartitioning(i_category#17, 5), true, [id=#22] + +(31) HashAggregate [codegen id : 5] +Input [3]: [i_category#17, sum#20, count#21] +Keys [1]: [i_category#17] +Functions [1]: [avg(UnscaledValue(i_current_price#16))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#16))#23] +Results [2]: [cast((avg(UnscaledValue(i_current_price#16))#23 / 100.0) as decimal(11,6)) AS avg(i_current_price)#24, i_category#17 AS i_category#17#25] + +(32) BroadcastExchange +Input [2]: [avg(i_current_price)#24, i_category#17#25] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#26] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_category#17] +Right keys [1]: [i_category#17#25] +Join condition: None + +(34) Filter [codegen id : 6] +Input [5]: [i_item_sk#15, i_current_price#16, i_category#17, avg(i_current_price)#24, i_category#17#25] +Condition : (cast(i_current_price#16 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#24)), DecimalType(14,7), true)) + +(35) Project [codegen id : 6] +Output [1]: [i_item_sk#15] +Input [5]: [i_item_sk#15, i_current_price#16, i_category#17, avg(i_current_price)#24, i_category#17#25] + +(36) BroadcastExchange +Input [1]: [i_item_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] + +(37) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#7] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(38) Project [codegen id : 7] +Output [1]: [ca_state#2] +Input [3]: [ca_state#2, ss_item_sk#7, i_item_sk#15] + +(39) HashAggregate [codegen id : 7] +Input [1]: [ca_state#2] +Keys [1]: [ca_state#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [2]: [ca_state#2, count#29] + +(40) Exchange +Input [2]: [ca_state#2, count#29] +Arguments: hashpartitioning(ca_state#2, 5), true, [id=#30] + +(41) HashAggregate [codegen id : 8] +Input [2]: [ca_state#2, count#29] +Keys [1]: [ca_state#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#31] +Results [3]: [ca_state#2 AS state#32, count(1)#31 AS cnt#33, count(1)#31 AS count(1)#34] + +(42) Filter [codegen id : 8] +Input [3]: [state#32, cnt#33, count(1)#34] +Condition : (count(1)#34 >= 10) + +(43) Project [codegen id : 8] +Output [2]: [state#32, cnt#33] +Input [3]: [state#32, cnt#33, count(1)#34] + +(44) TakeOrderedAndProject +Input [2]: [state#32, cnt#33] +Arguments: 100, [cnt#33 ASC NULLS FIRST], [state#32, cnt#33] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 18 Hosting Expression = Subquery scalar-subquery#12, [id=#13] +* HashAggregate (51) ++- Exchange (50) + +- * HashAggregate (49) + +- * Project (48) + +- * Filter (47) + +- * ColumnarToRow (46) + +- Scan parquet default.date_dim (45) + + +(45) Scan parquet default.date_dim +Output [3]: [d_month_seq#11, d_year#35, d_moy#36] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 1] +Input [3]: [d_month_seq#11, d_year#35, d_moy#36] + +(47) Filter [codegen id : 1] +Input [3]: [d_month_seq#11, d_year#35, d_moy#36] +Condition : (((isnotnull(d_year#35) AND isnotnull(d_moy#36)) AND (d_year#35 = 2000)) AND (d_moy#36 = 1)) + +(48) Project [codegen id : 1] +Output [1]: [d_month_seq#11] +Input [3]: [d_month_seq#11, d_year#35, d_moy#36] + +(49) HashAggregate [codegen id : 1] +Input [1]: [d_month_seq#11] +Keys [1]: [d_month_seq#11] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#11] + +(50) Exchange +Input [1]: [d_month_seq#11] +Arguments: hashpartitioning(d_month_seq#11, 5), true, [id=#37] + +(51) HashAggregate [codegen id : 2] +Input [1]: [d_month_seq#11] +Keys [1]: [d_month_seq#11] +Functions: [] +Aggregate Attributes: [] +Results [1]: [d_month_seq#11] + + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt index 2bf134280..97ba16294 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q6/simplified.txt @@ -1,78 +1,77 @@ TakeOrderedAndProject [cnt,state] - WholeStageCodegen - Project [cnt,state] + WholeStageCodegen (8) + Project [state,cnt] Filter [count(1)] - HashAggregate [ca_state,count,count(1)] [cnt,count,count(1),count(1),state] + HashAggregate [ca_state,count] [count(1),state,cnt,count(1),count] InputAdapter Exchange [ca_state] #1 - WholeStageCodegen - HashAggregate [ca_state,count,count] [count,count] + WholeStageCodegen (7) + HashAggregate [ca_state] [count,count] Project [ca_state] - BroadcastHashJoin [i_item_sk,ss_item_sk] + BroadcastHashJoin [ss_item_sk,i_item_sk] Project [ca_state,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ca_state,ss_item_sk,ss_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ca_state,ss_sold_date_sk,ss_item_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_sk,ca_state] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ca_address_sk,ca_state] - Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + Project [ca_state,c_customer_sk] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] + WholeStageCodegen (1) + Filter [c_current_addr_sk,c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] [ss_customer_sk,ss_item_sk,ss_sold_date_sk] + WholeStageCodegen (2) + Filter [ss_customer_sk,ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] - Filter [d_date_sk,d_month_seq] + Filter [d_month_seq,d_date_sk] Subquery #1 - WholeStageCodegen + WholeStageCodegen (2) HashAggregate [d_month_seq] InputAdapter Exchange [d_month_seq] #5 - WholeStageCodegen + WholeStageCodegen (1) HashAggregate [d_month_seq] Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] - Subquery #1 - WholeStageCodegen - HashAggregate [d_month_seq] - InputAdapter - Exchange [d_month_seq] #5 - WholeStageCodegen - HashAggregate [d_month_seq] - Project [d_month_seq] - Filter [d_moy,d_year] - Scan parquet default.date_dim [d_month_seq,d_moy,d_year] [d_month_seq,d_moy,d_year] + Filter [d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_month_seq,d_year,d_moy] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (6) Project [i_item_sk] - Filter [avg(i_current_price),i_current_price] + Filter [i_current_price,avg(i_current_price)] BroadcastHashJoin [i_category,i_category] - Project [i_category,i_current_price,i_item_sk] - Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_category,i_current_price,i_item_sk] [i_category,i_current_price,i_item_sk] + Filter [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_current_price,i_category] InputAdapter BroadcastExchange #7 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(i_current_price)),count,i_category,sum] [avg(UnscaledValue(i_current_price)),avg(i_current_price),count,i_category,sum] + WholeStageCodegen (5) + HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),i_category,sum,count] InputAdapter Exchange [i_category] #8 - WholeStageCodegen - HashAggregate [count,count,i_category,i_current_price,sum,sum] [count,count,sum,sum] - Project [i_category,i_current_price] - Filter [i_category] - Scan parquet default.item [i_category,i_current_price] [i_category,i_current_price] + WholeStageCodegen (4) + HashAggregate [i_category,i_current_price] [sum,count,sum,count] + Filter [i_category] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_current_price,i_category] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt index cdee71697..f838f8f1a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/explain.txt @@ -1,65 +1,378 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST,total_sales#2 ASC NULLS FIRST], output=[i_item_id#1,total_sales#2]) -+- *(20) HashAggregate(keys=[i_item_id#1], functions=[sum(total_sales#3)]) - +- Exchange hashpartitioning(i_item_id#1, 5) - +- *(19) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(total_sales#3)]) - +- Union - :- *(6) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- Exchange hashpartitioning(i_item_id#1, 5) - : +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- *(5) Project [ss_ext_sales_price#4, i_item_id#1] - : +- *(5) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - : :- *(5) Project [ss_item_sk#5, ss_ext_sales_price#4] - : : +- *(5) BroadcastHashJoin [ss_addr_sk#7], [ca_address_sk#8], Inner, BuildRight - : : :- *(5) Project [ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#9, ss_item_sk#5, ss_addr_sk#7, ss_ext_sales_price#4] - : : : : +- *(5) Filter ((isnotnull(ss_sold_date_sk#9) && isnotnull(ss_addr_sk#7)) && isnotnull(ss_item_sk#5)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#5,ss_addr_sk#7,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#10] - : : : +- *(1) Filter ((((isnotnull(d_year#11) && isnotnull(d_moy#12)) && (d_year#11 = 1998)) && (d_moy#12 = 9)) && isnotnull(d_date_sk#10)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#10,d_year#11,d_moy#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [ca_address_sk#8] - : : +- *(2) Filter ((isnotnull(ca_gmt_offset#13) && (ca_gmt_offset#13 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) BroadcastHashJoin [i_item_id#1], [i_item_id#1#14], LeftSemi, BuildRight - : :- *(4) Project [i_item_sk#6, i_item_id#1] - : : +- *(4) Filter isnotnull(i_item_sk#6) - : : +- *(4) FileScan parquet default.item[i_item_sk#6,i_item_id#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- *(3) Project [i_item_id#1 AS i_item_id#1#14] - : +- *(3) Filter (isnotnull(i_category#15) && (i_category#15 = Music)) - : +- *(3) FileScan parquet default.item[i_item_id#1,i_category#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)], ReadSchema: struct - :- *(12) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- Exchange hashpartitioning(i_item_id#1, 5) - : +- *(11) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#16))]) - : +- *(11) Project [cs_ext_sales_price#16, i_item_id#1] - : +- *(11) BroadcastHashJoin [cs_item_sk#17], [i_item_sk#6], Inner, BuildRight - : :- *(11) Project [cs_item_sk#17, cs_ext_sales_price#16] - : : +- *(11) BroadcastHashJoin [cs_bill_addr_sk#18], [ca_address_sk#8], Inner, BuildRight - : : :- *(11) Project [cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#19], [d_date_sk#10], Inner, BuildRight - : : : :- *(11) Project [cs_sold_date_sk#19, cs_bill_addr_sk#18, cs_item_sk#17, cs_ext_sales_price#16] - : : : : +- *(11) Filter ((isnotnull(cs_sold_date_sk#19) && isnotnull(cs_bill_addr_sk#18)) && isnotnull(cs_item_sk#17)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#19,cs_bill_addr_sk#18,cs_item_sk#17,cs_ext_sales_price#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(18) HashAggregate(keys=[i_item_id#1], functions=[sum(UnscaledValue(ws_ext_sales_price#20))]) - +- Exchange hashpartitioning(i_item_id#1, 5) - +- *(17) HashAggregate(keys=[i_item_id#1], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#20))]) - +- *(17) Project [ws_ext_sales_price#20, i_item_id#1] - +- *(17) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#6], Inner, BuildRight - :- *(17) Project [ws_item_sk#21, ws_ext_sales_price#20] - : +- *(17) BroadcastHashJoin [ws_bill_addr_sk#22], [ca_address_sk#8], Inner, BuildRight - : :- *(17) Project [ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : +- *(17) BroadcastHashJoin [ws_sold_date_sk#23], [d_date_sk#10], Inner, BuildRight - : : :- *(17) Project [ws_sold_date_sk#23, ws_item_sk#21, ws_bill_addr_sk#22, ws_ext_sales_price#20] - : : : +- *(17) Filter ((isnotnull(ws_sold_date_sk#23) && isnotnull(ws_bill_addr_sk#22)) && isnotnull(ws_item_sk#21)) - : : : +- *(17) FileScan parquet default.web_sales[ws_sold_date_sk#23,ws_item_sk#21,ws_bill_addr_sk#22,ws_ext_sales_price#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#10], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [ca_address_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [i_item_sk#6, i_item_id#1], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (67) ++- * HashAggregate (66) + +- Exchange (65) + +- * HashAggregate (64) + +- Union (63) + :- * HashAggregate (32) + : +- Exchange (31) + : +- * HashAggregate (30) + : +- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer_address (11) + : +- BroadcastExchange (27) + : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : :- * Filter (20) + : : +- * ColumnarToRow (19) + : : +- Scan parquet default.item (18) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) + :- * HashAggregate (47) + : +- Exchange (46) + : +- * HashAggregate (45) + : +- * Project (44) + : +- * BroadcastHashJoin Inner BuildRight (43) + : :- * Project (41) + : : +- * BroadcastHashJoin Inner BuildRight (40) + : : :- * Project (38) + : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : :- * Filter (35) + : : : : +- * ColumnarToRow (34) + : : : : +- Scan parquet default.catalog_sales (33) + : : : +- ReusedExchange (36) + : : +- ReusedExchange (39) + : +- ReusedExchange (42) + +- * HashAggregate (62) + +- Exchange (61) + +- * HashAggregate (60) + +- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * Project (53) + : : +- * BroadcastHashJoin Inner BuildRight (52) + : : :- * Filter (50) + : : : +- * ColumnarToRow (49) + : : : +- Scan parquet default.web_sales (48) + : : +- ReusedExchange (51) + : +- ReusedExchange (54) + +- ReusedExchange (57) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 5] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#5, d_year#6, d_moy#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] +Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 9)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [3]: [d_date_sk#5, d_year#6, d_moy#7] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 5] +Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#9, ca_gmt_offset#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] +Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9)) + +(14) Project [codegen id : 2] +Output [1]: [ca_address_sk#9] +Input [2]: [ca_address_sk#9, ca_gmt_offset#10] + +(15) BroadcastExchange +Input [1]: [ca_address_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#3] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(17) Project [codegen id : 5] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#4] +Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#12, i_item_id#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] + +(20) Filter [codegen id : 4] +Input [2]: [i_item_sk#12, i_item_id#13] +Condition : isnotnull(i_item_sk#12) + +(21) Scan parquet default.item +Output [2]: [i_item_id#13, i_category#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_id#13, i_category#14] + +(23) Filter [codegen id : 3] +Input [2]: [i_item_id#13, i_category#14] +Condition : (isnotnull(i_category#14) AND (i_category#14 = Music)) + +(24) Project [codegen id : 3] +Output [1]: [i_item_id#13 AS i_item_id#13#15] +Input [2]: [i_item_id#13, i_category#14] + +(25) BroadcastExchange +Input [1]: [i_item_id#13#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#16] + +(26) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_id#13] +Right keys [1]: [i_item_id#13#15] +Join condition: None + +(27) BroadcastExchange +Input [2]: [i_item_sk#12, i_item_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(29) Project [codegen id : 5] +Output [2]: [ss_ext_sales_price#4, i_item_id#13] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_item_id#13] + +(30) HashAggregate [codegen id : 5] +Input [2]: [ss_ext_sales_price#4, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum#18] +Results [2]: [i_item_id#13, sum#19] + +(31) Exchange +Input [2]: [i_item_id#13, sum#19] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 6] +Input [2]: [i_item_id#13, sum#19] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22] + +(33) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] + +(35) Filter [codegen id : 11] +Input [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Condition : ((isnotnull(cs_sold_date_sk#23) AND isnotnull(cs_bill_addr_sk#24)) AND isnotnull(cs_item_sk#25)) + +(36) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#23] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(38) Project [codegen id : 11] +Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26] +Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5] + +(39) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_addr_sk#24] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(41) Project [codegen id : 11] +Output [2]: [cs_item_sk#25, cs_ext_sales_price#26] +Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9] + +(42) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#25] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(44) Project [codegen id : 11] +Output [2]: [cs_ext_sales_price#26, i_item_id#13] +Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_item_id#13] + +(45) HashAggregate [codegen id : 11] +Input [2]: [cs_ext_sales_price#26, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [i_item_id#13, sum#28] + +(46) Exchange +Input [2]: [i_item_id#13, sum#28] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [2]: [i_item_id#13, sum#28] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31] + +(48) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] + +(50) Filter [codegen id : 17] +Input [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Condition : ((isnotnull(ws_sold_date_sk#32) AND isnotnull(ws_bill_addr_sk#34)) AND isnotnull(ws_item_sk#33)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(52) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(53) Project [codegen id : 17] +Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35] +Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5] + +(54) ReusedExchange [Reuses operator id: 15] +Output [1]: [ca_address_sk#9] + +(55) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_bill_addr_sk#34] +Right keys [1]: [ca_address_sk#9] +Join condition: None + +(56) Project [codegen id : 17] +Output [2]: [ws_item_sk#33, ws_ext_sales_price#35] +Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9] + +(57) ReusedExchange [Reuses operator id: 27] +Output [2]: [i_item_sk#12, i_item_id#13] + +(58) BroadcastHashJoin [codegen id : 17] +Left keys [1]: [ws_item_sk#33] +Right keys [1]: [i_item_sk#12] +Join condition: None + +(59) Project [codegen id : 17] +Output [2]: [ws_ext_sales_price#35, i_item_id#13] +Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_item_id#13] + +(60) HashAggregate [codegen id : 17] +Input [2]: [ws_ext_sales_price#35, i_item_id#13] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum#36] +Results [2]: [i_item_id#13, sum#37] + +(61) Exchange +Input [2]: [i_item_id#13, sum#37] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#38] + +(62) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#13, sum#37] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39] +Results [2]: [i_item_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40] + +(63) Union + +(64) HashAggregate [codegen id : 19] +Input [2]: [i_item_id#13, total_sales#22] +Keys [1]: [i_item_id#13] +Functions [1]: [partial_sum(total_sales#22)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_item_id#13, sum#43, isEmpty#44] + +(65) Exchange +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_item_id#13, 5), true, [id=#45] + +(66) HashAggregate [codegen id : 20] +Input [3]: [i_item_id#13, sum#43, isEmpty#44] +Keys [1]: [i_item_id#13] +Functions [1]: [sum(total_sales#22)] +Aggregate Attributes [1]: [sum(total_sales#22)#46] +Results [2]: [i_item_id#13, sum(total_sales#22)#46 AS total_sales#47] + +(67) TakeOrderedAndProject +Input [2]: [i_item_id#13, total_sales#47] +Arguments: 100, [i_item_id#13 ASC NULLS FIRST, total_sales#47 ASC NULLS FIRST], [i_item_id#13, total_sales#47] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt index 01ea963ce..fb9e4e507 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q60/simplified.txt @@ -1,91 +1,101 @@ TakeOrderedAndProject [i_item_id,total_sales] - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(total_sales)] [sum,sum(total_sales),total_sales] + WholeStageCodegen (20) + HashAggregate [i_item_id,sum,isEmpty] [sum(total_sales),total_sales,sum,isEmpty] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,total_sales] [sum,sum] + WholeStageCodegen (19) + HashAggregate [i_item_id,total_sales] [sum,isEmpty,sum,isEmpty] InputAdapter Union - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total_sales] + WholeStageCodegen (6) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ss_ext_sales_price)),total_sales,sum] InputAdapter Exchange [i_item_id] #2 - WholeStageCodegen - HashAggregate [i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_item_id,ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_addr_sk,ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_addr_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_addr_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [ca_address_sk] - Filter [ca_address_sk,ca_gmt_offset] - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) BroadcastHashJoin [i_item_id,i_item_id] - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (3) Project [i_item_id] Filter [i_category] - Scan parquet default.item [i_category,i_item_id] [i_category,i_item_id] - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(cs_ext_sales_price))] [sum,sum(UnscaledValue(cs_ext_sales_price)),total_sales] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_id,i_category] + WholeStageCodegen (12) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum] InputAdapter Exchange [i_item_id] #7 - WholeStageCodegen - HashAggregate [cs_ext_sales_price,i_item_id,sum,sum] [sum,sum] + WholeStageCodegen (11) + HashAggregate [i_item_id,cs_ext_sales_price] [sum,sum] Project [cs_ext_sales_price,i_item_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk] - BroadcastHashJoin [ca_address_sk,cs_bill_addr_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk] + Project [cs_item_sk,cs_ext_sales_price] + BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk] + Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] - Filter [cs_bill_addr_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] [cs_bill_addr_sk,cs_ext_sales_price,cs_item_sk,cs_sold_date_sk] + Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 + ReusedExchange [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(UnscaledValue(ws_ext_sales_price))] [sum,sum(UnscaledValue(ws_ext_sales_price)),total_sales] + ReusedExchange [i_item_sk,i_item_id] #5 + WholeStageCodegen (18) + HashAggregate [i_item_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum] InputAdapter Exchange [i_item_id] #8 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,ws_ext_sales_price] [sum,sum] - Project [i_item_id,ws_ext_sales_price] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [ca_address_sk,ws_bill_addr_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] - Filter [ws_bill_addr_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] [ws_bill_addr_sk,ws_ext_sales_price,ws_item_sk,ws_sold_date_sk] + WholeStageCodegen (17) + HashAggregate [i_item_id,ws_ext_sales_price] [sum,sum] + Project [ws_ext_sales_price,i_item_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk] + Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #4 + ReusedExchange [ca_address_sk] #4 InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #5 + ReusedExchange [i_item_sk,i_item_id] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt index 7197bef72..f56f48726 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/explain.txt @@ -1,68 +1,396 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[promotions#1 ASC NULLS FIRST,total#2 ASC NULLS FIRST], output=[promotions#1,total#2,(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#3]) -+- *(16) Project [promotions#1, total#2, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#1 as decimal(15,4))) / promote_precision(cast(total#2 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#3] - +- BroadcastNestedLoopJoin BuildRight, Inner - :- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- Exchange SinglePartition - : +- *(7) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) - : +- *(7) Project [ss_ext_sales_price#4] - : +- *(7) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - : :- *(7) Project [ss_item_sk#5, ss_ext_sales_price#4] - : : +- *(7) BroadcastHashJoin [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight - : : :- *(7) Project [ss_item_sk#5, ss_ext_sales_price#4, c_current_addr_sk#7] - : : : +- *(7) BroadcastHashJoin [ss_customer_sk#9], [c_customer_sk#10], Inner, BuildRight - : : : :- *(7) Project [ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] - : : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight - : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] - : : : : : +- *(7) BroadcastHashJoin [ss_promo_sk#13], [p_promo_sk#14], Inner, BuildRight - : : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_promo_sk#13, ss_ext_sales_price#4] - : : : : : : +- *(7) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight - : : : : : : :- *(7) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_promo_sk#13, ss_ext_sales_price#4] - : : : : : : : +- *(7) Filter ((((isnotnull(ss_store_sk#15) && isnotnull(ss_promo_sk#13)) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) - : : : : : : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_promo_sk#13,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(2) Project [p_promo_sk#14] - : : : : : +- *(2) Filter ((((p_channel_dmail#18 = Y) || (p_channel_email#19 = Y)) || (p_channel_tv#20 = Y)) && isnotnull(p_promo_sk#14)) - : : : : : +- *(2) FileScan parquet default.promotion[p_promo_sk#14,p_channel_dmail#18,p_channel_email#19,p_channel_tv#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(3) Project [d_date_sk#12] - : : : : +- *(3) Filter ((((isnotnull(d_year#21) && isnotnull(d_moy#22)) && (d_year#21 = 1998)) && (d_moy#22 = 11)) && isnotnull(d_date_sk#12)) - : : : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_year#21,d_moy#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [c_customer_sk#10, c_current_addr_sk#7] - : : : +- *(4) Filter (isnotnull(c_customer_sk#10) && isnotnull(c_current_addr_sk#7)) - : : : +- *(4) FileScan parquet default.customer[c_customer_sk#10,c_current_addr_sk#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [ca_address_sk#8] - : : +- *(5) Filter ((isnotnull(ca_gmt_offset#23) && (ca_gmt_offset#23 = -5.00)) && isnotnull(ca_address_sk#8)) - : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#8,ca_gmt_offset#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [i_item_sk#6] - : +- *(6) Filter ((isnotnull(i_category#24) && (i_category#24 = Jewelry)) && isnotnull(i_item_sk#6)) - : +- *(6) FileScan parquet default.item[i_item_sk#6,i_category#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange IdentityBroadcastMode - +- *(15) HashAggregate(keys=[], functions=[sum(UnscaledValue(ss_ext_sales_price#4))]) - +- Exchange SinglePartition - +- *(14) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#4))]) - +- *(14) Project [ss_ext_sales_price#4] - +- *(14) BroadcastHashJoin [ss_item_sk#5], [i_item_sk#6], Inner, BuildRight - :- *(14) Project [ss_item_sk#5, ss_ext_sales_price#4] - : +- *(14) BroadcastHashJoin [c_current_addr_sk#7], [ca_address_sk#8], Inner, BuildRight - : :- *(14) Project [ss_item_sk#5, ss_ext_sales_price#4, c_current_addr_sk#7] - : : +- *(14) BroadcastHashJoin [ss_customer_sk#9], [c_customer_sk#10], Inner, BuildRight - : : :- *(14) Project [ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] - : : : +- *(14) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight - : : : :- *(14) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_ext_sales_price#4] - : : : : +- *(14) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight - : : : : :- *(14) Project [ss_sold_date_sk#11, ss_item_sk#5, ss_customer_sk#9, ss_store_sk#15, ss_ext_sales_price#4] - : : : : : +- *(14) Filter (((isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#11)) && isnotnull(ss_customer_sk#9)) && isnotnull(ss_item_sk#5)) - : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#5,ss_customer_sk#9,ss_store_sk#15,ss_ext_sales_price#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item..., ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] + +(3) Filter [codegen id : 7] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6] +Condition : ((((isnotnull(ss_store_sk#4) AND isnotnull(ss_promo_sk#5)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.store +Output [2]: [s_store_sk#7, s_gmt_offset#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [s_store_sk#7, s_gmt_offset#8] + +(6) Filter [codegen id : 1] +Input [2]: [s_store_sk#7, s_gmt_offset#8] +Condition : ((isnotnull(s_gmt_offset#8) AND (s_gmt_offset#8 = -5.00)) AND isnotnull(s_store_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [s_store_sk#7] +Input [2]: [s_store_sk#7, s_gmt_offset#8] + +(8) BroadcastExchange +Input [1]: [s_store_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(10) Project [codegen id : 7] +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_promo_sk#5, ss_ext_sales_price#6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_promo_sk#5, ss_ext_sales_price#6, s_store_sk#7] + +(11) Scan parquet default.promotion +Output [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] + +(13) Filter [codegen id : 2] +Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] +Condition : ((((p_channel_dmail#11 = Y) OR (p_channel_email#12 = Y)) OR (p_channel_tv#13 = Y)) AND isnotnull(p_promo_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [p_promo_sk#10] +Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] + +(15) BroadcastExchange +Input [1]: [p_promo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(16) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_promo_sk#5] +Right keys [1]: [p_promo_sk#10] +Join condition: None + +(17) Project [codegen id : 7] +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_promo_sk#5, ss_ext_sales_price#6, p_promo_sk#10] + +(18) Scan parquet default.date_dim +Output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] + +(20) Filter [codegen id : 3] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Condition : ((((isnotnull(d_year#16) AND isnotnull(d_moy#17)) AND (d_year#16 = 1998)) AND (d_moy#17 = 11)) AND isnotnull(d_date_sk#15)) + +(21) Project [codegen id : 3] +Output [1]: [d_date_sk#15] +Input [3]: [d_date_sk#15, d_year#16, d_moy#17] + +(22) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(23) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(24) Project [codegen id : 7] +Output [3]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, d_date_sk#15] + +(25) Scan parquet default.customer +Output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] + +(27) Filter [codegen id : 4] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Condition : (isnotnull(c_customer_sk#19) AND isnotnull(c_current_addr_sk#20)) + +(28) BroadcastExchange +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(29) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#19] +Join condition: None + +(30) Project [codegen id : 7] +Output [3]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20] +Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, c_customer_sk#19, c_current_addr_sk#20] + +(31) Scan parquet default.customer_address +Output [2]: [ca_address_sk#22, ca_gmt_offset#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 5] +Input [2]: [ca_address_sk#22, ca_gmt_offset#23] + +(33) Filter [codegen id : 5] +Input [2]: [ca_address_sk#22, ca_gmt_offset#23] +Condition : ((isnotnull(ca_gmt_offset#23) AND (ca_gmt_offset#23 = -5.00)) AND isnotnull(ca_address_sk#22)) + +(34) Project [codegen id : 5] +Output [1]: [ca_address_sk#22] +Input [2]: [ca_address_sk#22, ca_gmt_offset#23] + +(35) BroadcastExchange +Input [1]: [ca_address_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] + +(36) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#20] +Right keys [1]: [ca_address_sk#22] +Join condition: None + +(37) Project [codegen id : 7] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#6] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20, ca_address_sk#22] + +(38) Scan parquet default.item +Output [2]: [i_item_sk#25, i_category#26] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry), IsNotNull(i_item_sk)] +ReadSchema: struct + +(39) ColumnarToRow [codegen id : 6] +Input [2]: [i_item_sk#25, i_category#26] + +(40) Filter [codegen id : 6] +Input [2]: [i_item_sk#25, i_category#26] +Condition : ((isnotnull(i_category#26) AND (i_category#26 = Jewelry)) AND isnotnull(i_item_sk#25)) + +(41) Project [codegen id : 6] +Output [1]: [i_item_sk#25] +Input [2]: [i_item_sk#25, i_category#26] + +(42) BroadcastExchange +Input [1]: [i_item_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] + +(43) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#25] +Join condition: None + +(44) Project [codegen id : 7] +Output [1]: [ss_ext_sales_price#6] +Input [3]: [ss_item_sk#2, ss_ext_sales_price#6, i_item_sk#25] + +(45) HashAggregate [codegen id : 7] +Input [1]: [ss_ext_sales_price#6] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#28] +Results [1]: [sum#29] + +(46) Exchange +Input [1]: [sum#29] +Arguments: SinglePartition, true, [id=#30] + +(47) HashAggregate [codegen id : 8] +Input [1]: [sum#29] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#31] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#31,17,2) AS promotions#32] + +(48) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(49) ColumnarToRow [codegen id : 14] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] + +(50) Filter [codegen id : 14] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6] +Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_item_sk#2)) + +(51) ReusedExchange [Reuses operator id: 8] +Output [1]: [s_store_sk#7] + +(52) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(53) Project [codegen id : 14] +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6, s_store_sk#7] + +(54) ReusedExchange [Reuses operator id: 22] +Output [1]: [d_date_sk#15] + +(55) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(56) Project [codegen id : 14] +Output [3]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, d_date_sk#15] + +(57) ReusedExchange [Reuses operator id: 28] +Output [2]: [c_customer_sk#19, c_current_addr_sk#20] + +(58) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#19] +Join condition: None + +(59) Project [codegen id : 14] +Output [3]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20] +Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ext_sales_price#6, c_customer_sk#19, c_current_addr_sk#20] + +(60) ReusedExchange [Reuses operator id: 35] +Output [1]: [ca_address_sk#22] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_current_addr_sk#20] +Right keys [1]: [ca_address_sk#22] +Join condition: None + +(62) Project [codegen id : 14] +Output [2]: [ss_item_sk#2, ss_ext_sales_price#6] +Input [4]: [ss_item_sk#2, ss_ext_sales_price#6, c_current_addr_sk#20, ca_address_sk#22] + +(63) ReusedExchange [Reuses operator id: 42] +Output [1]: [i_item_sk#25] + +(64) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#25] +Join condition: None + +(65) Project [codegen id : 14] +Output [1]: [ss_ext_sales_price#6] +Input [3]: [ss_item_sk#2, ss_ext_sales_price#6, i_item_sk#25] + +(66) HashAggregate [codegen id : 14] +Input [1]: [ss_ext_sales_price#6] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum#33] +Results [1]: [sum#34] + +(67) Exchange +Input [1]: [sum#34] +Arguments: SinglePartition, true, [id=#35] + +(68) HashAggregate [codegen id : 15] +Input [1]: [sum#34] +Keys: [] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#36] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#36,17,2) AS total#37] + +(69) BroadcastExchange +Input [1]: [total#37] +Arguments: IdentityBroadcastMode, [id=#38] + +(70) BroadcastNestedLoopJoin +Join condition: None + +(71) Project [codegen id : 16] +Output [3]: [promotions#32, total#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#37 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] +Input [2]: [promotions#32, total#37] + +(72) TakeOrderedAndProject +Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] +Arguments: 100, [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/simplified.txt index 7a6550c72..da7565167 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q61/simplified.txt @@ -1,92 +1,105 @@ -TakeOrderedAndProject [(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20))),promotions,total] - WholeStageCodegen +TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))] + WholeStageCodegen (16) Project [promotions,total] InputAdapter BroadcastNestedLoopJoin - WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price))] [promotions,sum,sum(UnscaledValue(ss_ext_sales_price))] + WholeStageCodegen (8) + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [ss_ext_sales_price,sum,sum] [sum,sum] + WholeStageCodegen (7) + HashAggregate [ss_ext_sales_price] [sum,sum] Project [ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - BroadcastHashJoin [p_promo_sk,ss_promo_sk] - Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_sold_date_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] - Filter [ss_customer_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] + Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_promo_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [s_store_sk] Filter [s_gmt_offset,s_store_sk] - Scan parquet default.store [s_gmt_offset,s_store_sk] [s_gmt_offset,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_gmt_offset] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [p_promo_sk] Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] - Scan parquet default.promotion [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] + WholeStageCodegen (4) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_addr_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (5) Project [ca_address_sk] - Filter [ca_address_sk,ca_gmt_offset] - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (6) Project [i_item_sk] Filter [i_category,i_item_sk] - Scan parquet default.item [i_category,i_item_sk] [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_category] BroadcastExchange #8 - WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ss_ext_sales_price))] [sum,sum(UnscaledValue(ss_ext_sales_price)),total] + WholeStageCodegen (15) + HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum] InputAdapter Exchange #9 - WholeStageCodegen - HashAggregate [ss_ext_sales_price,sum,sum] [sum,sum] + WholeStageCodegen (14) + HashAggregate [ss_ext_sales_price] [sum,sum] Project [ss_ext_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_ext_sales_price] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_customer_sk,ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price] InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #2 + ReusedExchange [s_store_sk] #2 InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 + ReusedExchange [d_date_sk] #4 InputAdapter - ReusedExchange [c_current_addr_sk,c_customer_sk] [c_current_addr_sk,c_customer_sk] #5 + ReusedExchange [c_customer_sk,c_current_addr_sk] #5 InputAdapter - ReusedExchange [ca_address_sk] [ca_address_sk] #6 + ReusedExchange [ca_address_sk] #6 InputAdapter - ReusedExchange [i_item_sk] [i_item_sk] #7 + ReusedExchange [i_item_sk] #7 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/explain.txt index f8c84a70e..05ce467c3 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/explain.txt @@ -1,32 +1,183 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[substring(w_warehouse_name, 1, 20)#1 ASC NULLS FIRST,sm_type#2 ASC NULLS FIRST,web_name#3 ASC NULLS FIRST], output=[substring(w_warehouse_name, 1, 20)#1,sm_type#2,web_name#3,30 days #4,31 - 60 days #5,61 - 90 days #6,91 - 120 days #7,>120 days #8]) -+- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3, 5) - +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, web_name#3], functions=[partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 30) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 60) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 90) && ((ws_ship_date_sk#11 - ws_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#11 - ws_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) - +- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, w_warehouse_name#9, sm_type#2, web_name#3] - +- *(5) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight - :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, w_warehouse_name#9, sm_type#2, web_name#3] - : +- *(5) BroadcastHashJoin [ws_web_site_sk#14], [web_site_sk#15], Inner, BuildRight - : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, w_warehouse_name#9, sm_type#2] - : : +- *(5) BroadcastHashJoin [ws_ship_mode_sk#16], [sm_ship_mode_sk#17], Inner, BuildRight - : : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, ws_ship_mode_sk#16, w_warehouse_name#9] - : : : +- *(5) BroadcastHashJoin [ws_warehouse_sk#18], [w_warehouse_sk#19], Inner, BuildRight - : : : :- *(5) Project [ws_sold_date_sk#12, ws_ship_date_sk#11, ws_web_site_sk#14, ws_ship_mode_sk#16, ws_warehouse_sk#18] - : : : : +- *(5) Filter (((isnotnull(ws_warehouse_sk#18) && isnotnull(ws_ship_mode_sk#16)) && isnotnull(ws_web_site_sk#14)) && isnotnull(ws_ship_date_sk#11)) - : : : : +- *(5) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_ship_date_sk#11,ws_web_site_sk#14,ws_ship_mode_sk#16,ws_warehouse_sk#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] - : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) - : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [web_site_sk#15, web_name#3] - : +- *(3) Filter isnotnull(web_site_sk#15) - : +- *(3) FileScan parquet default.web_site[web_site_sk#15,web_name#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [d_date_sk#13] - +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.warehouse (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.ship_mode (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.web_site (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet default.date_dim (22) + + +(1) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5] +Condition : (((isnotnull(ws_warehouse_sk#5) AND isnotnull(ws_ship_mode_sk#4)) AND isnotnull(ws_web_site_sk#3)) AND isnotnull(ws_ship_date_sk#2)) + +(4) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_warehouse_sk#5] +Right keys [1]: [w_warehouse_sk#6] +Join condition: None + +(9) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, w_warehouse_name#7] +Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(10) Scan parquet default.ship_mode +Output [2]: [sm_ship_mode_sk#9, sm_type#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [sm_ship_mode_sk#9, sm_type#10] + +(12) Filter [codegen id : 2] +Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Condition : isnotnull(sm_ship_mode_sk#9) + +(13) BroadcastExchange +Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_mode_sk#4] +Right keys [1]: [sm_ship_mode_sk#9] +Join condition: None + +(15) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, w_warehouse_name#7, sm_type#10] +Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, w_warehouse_name#7, sm_ship_mode_sk#9, sm_type#10] + +(16) Scan parquet default.web_site +Output [2]: [web_site_sk#12, web_name#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [web_site_sk#12, web_name#13] + +(18) Filter [codegen id : 3] +Input [2]: [web_site_sk#12, web_name#13] +Condition : isnotnull(web_site_sk#12) + +(19) BroadcastExchange +Input [2]: [web_site_sk#12, web_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#12] +Join condition: None + +(21) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13] +Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, w_warehouse_name#7, sm_type#10, web_site_sk#12, web_name#13] + +(22) Scan parquet default.date_dim +Output [2]: [d_date_sk#15, d_month_seq#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#15, d_month_seq#16] + +(24) Filter [codegen id : 4] +Input [2]: [d_date_sk#15, d_month_seq#16] +Condition : (((isnotnull(d_month_seq#16) AND (d_month_seq#16 >= 1200)) AND (d_month_seq#16 <= 1211)) AND isnotnull(d_date_sk#15)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_month_seq#16] + +(26) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_date_sk#2] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13] +Input [6]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13, d_date_sk#15] + +(29) HashAggregate [codegen id : 5] +Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13] +Keys [3]: [substr(w_warehouse_name#7, 1, 20) AS substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13] +Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] +Results [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] + +(30) Exchange +Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, 5), true, [id=#29] + +(31) HashAggregate [codegen id : 6] +Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Keys [3]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13] +Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] +Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] + +(32) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#10 ASC NULLS FIRST, web_name#13 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt index b8879730e..803326b2a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q62/simplified.txt @@ -1,42 +1,48 @@ -TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sm_type,substring(w_warehouse_name, 1, 20),web_name] - WholeStageCodegen - HashAggregate [sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),web_name] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) && ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) && ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) && ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] +TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (6) + HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] InputAdapter - Exchange [sm_type,substring(w_warehouse_name, 1, 20),web_name] #1 - WholeStageCodegen - HashAggregate [sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] [substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] - BroadcastHashJoin [d_date_sk,ws_ship_date_sk] - Project [sm_type,w_warehouse_name,web_name,ws_ship_date_sk,ws_sold_date_sk] - BroadcastHashJoin [web_site_sk,ws_web_site_sk] - Project [sm_type,w_warehouse_name,ws_ship_date_sk,ws_sold_date_sk,ws_web_site_sk] - BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk] - Project [w_warehouse_name,ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_web_site_sk] - BroadcastHashJoin [w_warehouse_sk,ws_warehouse_sk] - Project [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] - Filter [ws_ship_date_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] [ws_ship_date_sk,ws_ship_mode_sk,ws_sold_date_sk,ws_warehouse_sk,ws_web_site_sk] + Exchange [substr(w_warehouse_name, 1, 20),sm_type,web_name] #1 + WholeStageCodegen (5) + HashAggregate [w_warehouse_name,sm_type,web_name,ws_ship_date_sk,ws_sold_date_sk] [sum,sum,sum,sum,sum,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum] + Project [ws_sold_date_sk,ws_ship_date_sk,w_warehouse_name,sm_type,web_name] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_ship_date_sk,w_warehouse_name,sm_type,web_name] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,w_warehouse_name,sm_type] + BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] + Project [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,w_warehouse_name] + BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] + Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [sm_ship_mode_sk,sm_type] - Filter [sm_ship_mode_sk] - Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] [sm_ship_mode_sk,sm_type] + WholeStageCodegen (2) + Filter [sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [web_name,web_site_sk] - Filter [web_site_sk] - Scan parquet default.web_site [web_name,web_site_sk] [web_name,web_site_sk] + WholeStageCodegen (3) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_name] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt index 7025e4c24..284a9203a 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/explain.txt @@ -1,31 +1,180 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_manager_id#1 ASC NULLS FIRST,avg_monthly_sales#2 ASC NULLS FIRST,sum_sales#3 ASC NULLS FIRST], output=[i_manager_id#1,sum_sales#3,avg_monthly_sales#2]) -+- *(7) Project [i_manager_id#1, sum_sales#3, avg_monthly_sales#2] - +- *(7) Filter (CASE WHEN (avg_monthly_sales#2 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#3 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) - +- Window [avg(_w0#4) windowspecdefinition(i_manager_id#1, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_manager_id#1] - +- *(6) Sort [i_manager_id#1 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_manager_id#1, 5) - +- *(5) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[sum(UnscaledValue(ss_sales_price#6))]) - +- Exchange hashpartitioning(i_manager_id#1, d_moy#5, 5) - +- *(4) HashAggregate(keys=[i_manager_id#1, d_moy#5], functions=[partial_sum(UnscaledValue(ss_sales_price#6))]) - +- *(4) Project [i_manager_id#1, ss_sales_price#6, d_moy#5] - +- *(4) BroadcastHashJoin [ss_store_sk#7], [s_store_sk#8], Inner, BuildRight - :- *(4) Project [i_manager_id#1, ss_store_sk#7, ss_sales_price#6, d_moy#5] - : +- *(4) BroadcastHashJoin [ss_sold_date_sk#9], [d_date_sk#10], Inner, BuildRight - : :- *(4) Project [i_manager_id#1, ss_sold_date_sk#9, ss_store_sk#7, ss_sales_price#6] - : : +- *(4) BroadcastHashJoin [i_item_sk#11], [ss_item_sk#12], Inner, BuildRight - : : :- *(4) Project [i_item_sk#11, i_manager_id#1] - : : : +- *(4) Filter ((((i_category#13 IN (Books,Children,Electronics) && i_class#14 IN (personal,portable,refernece,self-help)) && i_brand#15 IN (scholaramalgamalg #16,scholaramalgamalg #17,exportiunivamalg #18,scholaramalgamalg #18)) || ((i_category#13 IN (Women,Music,Men) && i_class#14 IN (accessories,classical,fragrances,pants)) && i_brand#15 IN (amalgimporto #19,edu packscholar #19,exportiimporto #19,importoamalg #19))) && isnotnull(i_item_sk#11)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#11,i_brand#15,i_class#14,i_category#13,i_manager_id#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,refernece..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- *(1) Project [ss_sold_date_sk#9, ss_item_sk#12, ss_store_sk#7, ss_sales_price#6] - : : +- *(1) Filter ((isnotnull(ss_item_sk#12) && isnotnull(ss_sold_date_sk#9)) && isnotnull(ss_store_sk#7)) - : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#9,ss_item_sk#12,ss_store_sk#7,ss_sales_price#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#10, d_moy#5] - : +- *(2) Filter (d_month_seq#20 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) && isnotnull(d_date_sk#10)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#10,d_month_seq#20,d_moy#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [s_store_sk#8] - +- *(3) Filter isnotnull(s_store_sk#8) - +- *(3) FileScan parquet default.store[s_store_sk#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (32) ++- * Project (31) + +- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Project (23) + +- * BroadcastHashJoin Inner BuildRight (22) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.store_sales (5) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.date_dim (11) + +- BroadcastExchange (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet default.store (18) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(And(In(i_category, [Books,Children,Electronics]),In(i_class, [personal,portable,refernece,self-help])),In(i_brand, [scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8])),And(And(In(i_category, [Women,Music,Men]),In(i_class, [accessories,classical,fragrances,pants])),In(i_brand, [amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] +Condition : ((((i_category#4 IN (Books,Children,Electronics) AND i_class#3 IN (personal,portable,refernece,self-help)) AND i_brand#2 IN (scholaramalgamalg #6,scholaramalgamalg #7,exportiunivamalg #8,scholaramalgamalg #8)) OR ((i_category#4 IN (Women,Music,Men) AND i_class#3 IN (accessories,classical,fragrances,pants)) AND i_brand#2 IN (amalgimporto #9,edu packscholar #9,exportiimporto #9,importoamalg #9))) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [2]: [i_item_sk#1, i_manager_id#5] +Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] + +(5) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(7) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Condition : ((isnotnull(ss_item_sk#11) AND isnotnull(ss_sold_date_sk#10)) AND isnotnull(ss_store_sk#12)) + +(8) BroadcastExchange +Input [4]: [ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#14] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#11] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13] +Input [6]: [i_item_sk#1, i_manager_id#5, ss_sold_date_sk#10, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] + +(11) Scan parquet default.date_dim +Output [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [In(d_month_seq, [1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] + +(13) Filter [codegen id : 2] +Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] +Condition : (d_month_seq#16 INSET (1200,1211,1205,1201,1206,1210,1207,1202,1209,1203,1208,1204) AND isnotnull(d_date_sk#15)) + +(14) Project [codegen id : 2] +Output [2]: [d_date_sk#15, d_moy#17] +Input [3]: [d_date_sk#15, d_month_seq#16, d_moy#17] + +(15) BroadcastExchange +Input [2]: [d_date_sk#15, d_moy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#10] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(17) Project [codegen id : 4] +Output [4]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, d_moy#17] +Input [6]: [i_manager_id#5, ss_sold_date_sk#10, ss_store_sk#12, ss_sales_price#13, d_date_sk#15, d_moy#17] + +(18) Scan parquet default.store +Output [1]: [s_store_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [1]: [s_store_sk#19] + +(20) Filter [codegen id : 3] +Input [1]: [s_store_sk#19] +Condition : isnotnull(s_store_sk#19) + +(21) BroadcastExchange +Input [1]: [s_store_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#12] +Right keys [1]: [s_store_sk#19] +Join condition: None + +(23) Project [codegen id : 4] +Output [3]: [i_manager_id#5, ss_sales_price#13, d_moy#17] +Input [5]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, d_moy#17, s_store_sk#19] + +(24) HashAggregate [codegen id : 4] +Input [3]: [i_manager_id#5, ss_sales_price#13, d_moy#17] +Keys [2]: [i_manager_id#5, d_moy#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum#21] +Results [3]: [i_manager_id#5, d_moy#17, sum#22] + +(25) Exchange +Input [3]: [i_manager_id#5, d_moy#17, sum#22] +Arguments: hashpartitioning(i_manager_id#5, d_moy#17, 5), true, [id=#23] + +(26) HashAggregate [codegen id : 5] +Input [3]: [i_manager_id#5, d_moy#17, sum#22] +Keys [2]: [i_manager_id#5, d_moy#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#24] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS sum_sales#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#24,17,2) AS _w0#26] + +(27) Exchange +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: hashpartitioning(i_manager_id#5, 5), true, [id=#27] + +(28) Sort [codegen id : 6] +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 + +(29) Window +Input [3]: [i_manager_id#5, sum_sales#25, _w0#26] +Arguments: [avg(_w0#26) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#28], [i_manager_id#5] + +(30) Filter [codegen id : 7] +Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] +Condition : (CASE WHEN (avg_monthly_sales#28 > 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#25 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#28 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(31) Project [codegen id : 7] +Output [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] +Input [4]: [i_manager_id#5, sum_sales#25, _w0#26, avg_monthly_sales#28] + +(32) TakeOrderedAndProject +Input [3]: [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#28 ASC NULLS FIRST, sum_sales#25 ASC NULLS FIRST], [i_manager_id#5, sum_sales#25, avg_monthly_sales#28] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt index fc602334c..7272c01ab 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q63/simplified.txt @@ -1,43 +1,49 @@ -TakeOrderedAndProject [avg_monthly_sales,i_manager_id,sum_sales] - WholeStageCodegen - Project [avg_monthly_sales,i_manager_id,sum_sales] +TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales] + WholeStageCodegen (7) + Project [i_manager_id,sum_sales,avg_monthly_sales] Filter [avg_monthly_sales,sum_sales] InputAdapter Window [_w0,i_manager_id] - WholeStageCodegen + WholeStageCodegen (6) Sort [i_manager_id] InputAdapter Exchange [i_manager_id] #1 - WholeStageCodegen - HashAggregate [d_moy,i_manager_id,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + WholeStageCodegen (5) + HashAggregate [i_manager_id,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] InputAdapter - Exchange [d_moy,i_manager_id] #2 - WholeStageCodegen - HashAggregate [d_moy,i_manager_id,ss_sales_price,sum,sum] [sum,sum] - Project [d_moy,i_manager_id,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_moy,i_manager_id,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_manager_id,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Exchange [i_manager_id,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_manager_id,d_moy,ss_sales_price] [sum,sum] + Project [i_manager_id,ss_sales_price,d_moy] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_manager_id,ss_store_sk,ss_sales_price,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_manager_id,ss_sold_date_sk,ss_store_sk,ss_sales_price] BroadcastHashJoin [i_item_sk,ss_item_sk] Project [i_item_sk,i_manager_id] - Filter [i_brand,i_category,i_class,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_manager_id] [i_brand,i_category,i_class,i_item_sk,i_manager_id] + Filter [i_category,i_class,i_brand,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk,d_moy] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] [d_date_sk,d_month_seq,d_moy] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt index 5a211f617..0e658ff99 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/explain.txt @@ -1,170 +1,918 @@ == Physical Plan == -*(43) Sort [product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(product_name#1 ASC NULLS FIRST, store_name#2 ASC NULLS FIRST, cnt#3 ASC NULLS FIRST, 5) - +- *(42) Project [product_name#1, store_name#2, store_zip#4, b_street_number#5, b_streen_name#6, b_city#7, b_zip#8, c_street_number#9, c_street_name#10, c_city#11, c_zip#12, syear#13, cnt#14, s1#15, s2#16, s3#17, s1#18, s2#19, s3#20, syear#21, cnt#3] - +- *(42) BroadcastHashJoin [item_sk#22, store_name#2, store_zip#4], [item_sk#23, store_name#24, store_zip#25], Inner, BuildRight, (cnt#3 <= cnt#14) - :- *(42) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) - : +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 5) - : +- *(20) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) - : +- *(20) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] - : +- *(20) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight - : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : : +- *(20) BroadcastHashJoin [hd_income_band_sk#45], [ib_income_band_sk#46], Inner, BuildRight - : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : : : +- *(20) BroadcastHashJoin [hd_income_band_sk#47], [ib_income_band_sk#48], Inner, BuildRight - : : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : : : : +- *(20) BroadcastHashJoin [c_current_addr_sk#49], [ca_address_sk#50], Inner, BuildRight - : : : : :- *(20) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] - : : : : : +- *(20) BroadcastHashJoin [ss_addr_sk#51], [ca_address_sk#52], Inner, BuildRight - : : : : : :- *(20) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45] - : : : : : : +- *(20) BroadcastHashJoin [c_current_hdemo_sk#53], [hd_demo_sk#54], Inner, BuildRight - : : : : : : :- *(20) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47] - : : : : : : : +- *(20) BroadcastHashJoin [ss_hdemo_sk#55], [hd_demo_sk#56], Inner, BuildRight - : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : : +- *(20) BroadcastHashJoin [ss_promo_sk#57], [p_promo_sk#58], Inner, BuildRight - : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : : : +- *(20) BroadcastHashJoin [c_current_cdemo_sk#59], [cd_demo_sk#60], Inner, BuildRight, NOT (cd_marital_status#61 = cd_marital_status#62) - : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, cd_marital_status#61] - : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_cdemo_sk#63], [cd_demo_sk#64], Inner, BuildRight - : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : : : : : +- *(20) BroadcastHashJoin [c_first_shipto_date_sk#65], [d_date_sk#66], Inner, BuildRight - : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, d_year#39] - : : : : : : : : : : : : +- *(20) BroadcastHashJoin [c_first_sales_date_sk#67], [d_date_sk#68], Inner, BuildRight - : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] - : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_customer_sk#69], [c_customer_sk#70], Inner, BuildRight - : : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29] - : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_store_sk#71], [s_store_sk#72], Inner, BuildRight - : : : : : : : : : : : : : : :- *(20) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38] - : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_sold_date_sk#73], [d_date_sk#74], Inner, BuildRight - : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [ss_item_sk#44], [cs_item_sk#75], Inner, BuildRight - : : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : : : +- *(20) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight - : : : : : : : : : : : : : : : : : :- *(20) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : : : : +- *(20) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) - : : : : : : : : : : : : : : : : : : +- *(20) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct - : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : : : : : : : +- *(4) Project [cs_item_sk#75] - : : : : : : : : : : : : : : : : +- *(4) Filter (isnotnull(sum(cs_ext_list_price#79)#80) && (cast(sum(cs_ext_list_price#79)#80 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))#84)), DecimalType(21,2)))) - : : : : : : : : : : : : : : : : +- *(4) HashAggregate(keys=[cs_item_sk#75], functions=[sum(UnscaledValue(cs_ext_list_price#79)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) - : : : : : : : : : : : : : : : : +- Exchange hashpartitioning(cs_item_sk#75, 5) - : : : : : : : : : : : : : : : : +- *(3) HashAggregate(keys=[cs_item_sk#75], functions=[partial_sum(UnscaledValue(cs_ext_list_price#79)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#81 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#82 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#83 as decimal(9,2)))), DecimalType(9,2)))]) - : : : : : : : : : : : : : : : : +- *(3) Project [cs_item_sk#75, cs_ext_list_price#79, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] - : : : : : : : : : : : : : : : : +- *(3) BroadcastHashJoin [cs_item_sk#75, cs_order_number#85], [cr_item_sk#86, cr_order_number#87], Inner, BuildRight - : : : : : : : : : : : : : : : : :- *(3) Project [cs_item_sk#75, cs_order_number#85, cs_ext_list_price#79] - : : : : : : : : : : : : : : : : : +- *(3) Filter (isnotnull(cs_item_sk#75) && isnotnull(cs_order_number#85)) - : : : : : : : : : : : : : : : : : +- *(3) FileScan parquet default.catalog_sales[cs_item_sk#75,cs_order_number#85,cs_ext_list_price#79] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)], ReadSchema: struct - : : : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) - : : : : : : : : : : : : : : : : +- *(2) Project [cr_item_sk#86, cr_order_number#87, cr_refunded_cash#81, cr_reversed_charge#82, cr_store_credit#83] - : : : : : : : : : : : : : : : : +- *(2) Filter (isnotnull(cr_item_sk#86) && isnotnull(cr_order_number#87)) - : : : : : : : : : : : : : : : : +- *(2) FileScan parquet default.catalog_returns[cr_item_sk#86,cr_order_number#87,cr_refunded_cash#81,cr_reversed_charge#82,cr_store_credit#83] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct - : : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : : : : : +- *(6) Project [s_store_sk#72, s_store_name#28, s_zip#29] - : : : : : : : : : : : : : : +- *(6) Filter ((isnotnull(s_store_sk#72) && isnotnull(s_store_name#28)) && isnotnull(s_zip#29)) - : : : : : : : : : : : : : : +- *(6) FileScan parquet default.store[s_store_sk#72,s_store_name#28,s_zip#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)], ReadSchema: struct - : : : : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : : : : +- *(7) Project [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] - : : : : : : : : : : : : : +- *(7) Filter (((((isnotnull(c_customer_sk#70) && isnotnull(c_first_sales_date_sk#67)) && isnotnull(c_first_shipto_date_sk#65)) && isnotnull(c_current_cdemo_sk#59)) && isnotnull(c_current_hdemo_sk#53)) && isnotnull(c_current_addr_sk#49)) - : : : : : : : : : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#70,c_current_cdemo_sk#59,c_current_hdemo_sk#53,c_current_addr_sk#49,c_first_shipto_date_sk#65,c_first_sales_date_sk#67] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), I..., ReadSchema: struct - : : : : : : : : : : : +- ReusedExchange [d_date_sk#66, d_year#40], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : +- *(10) Project [cd_demo_sk#64, cd_marital_status#61] - : : : : : : : : : : +- *(10) Filter (isnotnull(cd_demo_sk#64) && isnotnull(cd_marital_status#61)) - : : : : : : : : : : +- *(10) FileScan parquet default.customer_demographics[cd_demo_sk#64,cd_marital_status#61] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)], ReadSchema: struct - : : : : : : : : : +- ReusedExchange [cd_demo_sk#60, cd_marital_status#62], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : +- *(12) Project [p_promo_sk#58] - : : : : : : : : +- *(12) Filter isnotnull(p_promo_sk#58) - : : : : : : : : +- *(12) FileScan parquet default.promotion[p_promo_sk#58] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct - : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : +- *(13) Project [hd_demo_sk#56, hd_income_band_sk#47] - : : : : : : : +- *(13) Filter (isnotnull(hd_demo_sk#56) && isnotnull(hd_income_band_sk#47)) - : : : : : : : +- *(13) FileScan parquet default.household_demographics[hd_demo_sk#56,hd_income_band_sk#47] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct - : : : : : : +- ReusedExchange [hd_demo_sk#54, hd_income_band_sk#45], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(15) Project [ca_address_sk#52, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] - : : : : : +- *(15) Filter isnotnull(ca_address_sk#52) - : : : : : +- *(15) FileScan parquet default.customer_address[ca_address_sk#52,ca_street_number#30,ca_street_name#31,ca_city#32,ca_zip#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - : : : : +- ReusedExchange [ca_address_sk#50, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(17) Project [ib_income_band_sk#48] - : : : +- *(17) Filter isnotnull(ib_income_band_sk#48) - : : : +- *(17) FileScan parquet default.income_band[ib_income_band_sk#48] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_income_band_sk)], ReadSchema: struct - : : +- ReusedExchange [ib_income_band_sk#46], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(19) Project [i_item_sk#27, i_product_name#26] - : +- *(19) Filter ((((((isnotnull(i_current_price#88) && i_color#89 IN (purple,burlywood,indian,spring,floral,medium)) && (i_current_price#88 >= 64.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 74.00)) && (cast(i_current_price#88 as decimal(12,2)) >= 65.00)) && (cast(i_current_price#88 as decimal(12,2)) <= 79.00)) && isnotnull(i_item_sk#27)) - : +- *(19) FileScan parquet default.item[i_item_sk#27,i_current_price#88,i_color#89,i_product_name#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), Greater..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, string, true], input[2, string, true])) - +- *(41) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[count(1), sum(UnscaledValue(ss_wholesale_cost#41)), sum(UnscaledValue(ss_list_price#42)), sum(UnscaledValue(ss_coupon_amt#43))]) - +- Exchange hashpartitioning(i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40, 5) - +- *(40) HashAggregate(keys=[i_product_name#26, i_item_sk#27, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, d_year#38, d_year#39, d_year#40], functions=[partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#41)), partial_sum(UnscaledValue(ss_list_price#42)), partial_sum(UnscaledValue(ss_coupon_amt#43))]) - +- *(40) Project [ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, d_year#39, d_year#40, s_store_name#28, s_zip#29, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37, i_item_sk#27, i_product_name#26] - +- *(40) BroadcastHashJoin [ss_item_sk#44], [i_item_sk#27], Inner, BuildRight - :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : +- *(40) BroadcastHashJoin [hd_income_band_sk#45], [ib_income_band_sk#46], Inner, BuildRight - : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : : +- *(40) BroadcastHashJoin [hd_income_band_sk#47], [ib_income_band_sk#48], Inner, BuildRight - : : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37] - : : : +- *(40) BroadcastHashJoin [c_current_addr_sk#49], [ca_address_sk#50], Inner, BuildRight - : : : :- *(40) Project [ss_item_sk#44, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33] - : : : : +- *(40) BroadcastHashJoin [ss_addr_sk#51], [ca_address_sk#52], Inner, BuildRight - : : : : :- *(40) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47, hd_income_band_sk#45] - : : : : : +- *(40) BroadcastHashJoin [c_current_hdemo_sk#53], [hd_demo_sk#54], Inner, BuildRight - : : : : : :- *(40) Project [ss_item_sk#44, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, hd_income_band_sk#47] - : : : : : : +- *(40) BroadcastHashJoin [ss_hdemo_sk#55], [hd_demo_sk#56], Inner, BuildRight - : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : +- *(40) BroadcastHashJoin [ss_promo_sk#57], [p_promo_sk#58], Inner, BuildRight - : : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : : +- *(40) BroadcastHashJoin [c_current_cdemo_sk#59], [cd_demo_sk#60], Inner, BuildRight, NOT (cd_marital_status#61 = cd_marital_status#62) - : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40, cd_marital_status#61] - : : : : : : : : : +- *(40) BroadcastHashJoin [ss_cdemo_sk#63], [cd_demo_sk#64], Inner, BuildRight - : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, d_year#39, d_year#40] - : : : : : : : : : : +- *(40) BroadcastHashJoin [c_first_shipto_date_sk#65], [d_date_sk#66], Inner, BuildRight - : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, d_year#39] - : : : : : : : : : : : +- *(40) BroadcastHashJoin [c_first_sales_date_sk#67], [d_date_sk#68], Inner, BuildRight - : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67] - : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_customer_sk#69], [c_customer_sk#70], Inner, BuildRight - : : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38, s_store_name#28, s_zip#29] - : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_store_sk#71], [s_store_sk#72], Inner, BuildRight - : : : : : : : : : : : : : :- *(40) Project [ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43, d_year#38] - : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_sold_date_sk#73], [d_date_sk#74], Inner, BuildRight - : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [ss_item_sk#44], [cs_item_sk#75], Inner, BuildRight - : : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : : +- *(40) BroadcastHashJoin [cast(ss_item_sk#44 as bigint), cast(ss_ticket_number#76 as bigint)], [sr_item_sk#77, sr_ticket_number#78], Inner, BuildRight - : : : : : : : : : : : : : : : : :- *(40) Project [ss_sold_date_sk#73, ss_item_sk#44, ss_customer_sk#69, ss_cdemo_sk#63, ss_hdemo_sk#55, ss_addr_sk#51, ss_store_sk#71, ss_promo_sk#57, ss_ticket_number#76, ss_wholesale_cost#41, ss_list_price#42, ss_coupon_amt#43] - : : : : : : : : : : : : : : : : : +- *(40) Filter ((((((((isnotnull(ss_item_sk#44) && isnotnull(ss_ticket_number#76)) && isnotnull(ss_sold_date_sk#73)) && isnotnull(ss_store_sk#71)) && isnotnull(ss_customer_sk#69)) && isnotnull(ss_cdemo_sk#63)) && isnotnull(ss_promo_sk#57)) && isnotnull(ss_hdemo_sk#55)) && isnotnull(ss_addr_sk#51)) - : : : : : : : : : : : : : : : : : +- *(40) FileScan parquet default.store_sales[ss_sold_date_sk#73,ss_item_sk#44,ss_customer_sk#69,ss_cdemo_sk#63,ss_hdemo_sk#55,ss_addr_sk#51,ss_store_sk#71,ss_promo_sk#57,ss_ticket_number#76,ss_wholesale_cost#41,ss_list_price#42,ss_coupon_amt#43] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_sto..., ReadSchema: struct - : : : : : : : : : : : : : +- ReusedExchange [s_store_sk#72, s_store_name#28, s_zip#29], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : : : +- ReusedExchange [c_customer_sk#70, c_current_cdemo_sk#59, c_current_hdemo_sk#53, c_current_addr_sk#49, c_first_shipto_date_sk#65, c_first_sales_date_sk#67], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : : +- ReusedExchange [d_date_sk#68, d_year#39], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : : +- ReusedExchange [d_date_sk#66, d_year#40], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : : +- ReusedExchange [cd_demo_sk#64, cd_marital_status#61], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : +- ReusedExchange [cd_demo_sk#60, cd_marital_status#62], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : +- ReusedExchange [p_promo_sk#58], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- ReusedExchange [hd_demo_sk#56, hd_income_band_sk#47], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- ReusedExchange [hd_demo_sk#54, hd_income_band_sk#45], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- ReusedExchange [ca_address_sk#52, ca_street_number#30, ca_street_name#31, ca_city#32, ca_zip#33], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- ReusedExchange [ca_address_sk#50, ca_street_number#34, ca_street_name#35, ca_city#36, ca_zip#37], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- ReusedExchange [ib_income_band_sk#48], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [ib_income_band_sk#46], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [i_item_sk#27, i_product_name#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +* Sort (170) ++- Exchange (169) + +- * Project (168) + +- * BroadcastHashJoin Inner BuildRight (167) + :- * HashAggregate (105) + : +- Exchange (104) + : +- * HashAggregate (103) + : +- * Project (102) + : +- * BroadcastHashJoin Inner BuildRight (101) + : :- * Project (95) + : : +- * BroadcastHashJoin Inner BuildRight (94) + : : :- * Project (92) + : : : +- * BroadcastHashJoin Inner BuildRight (91) + : : : :- * Project (86) + : : : : +- * BroadcastHashJoin Inner BuildRight (85) + : : : : :- * Project (83) + : : : : : +- * BroadcastHashJoin Inner BuildRight (82) + : : : : : :- * Project (77) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (76) + : : : : : : :- * Project (74) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (73) + : : : : : : : :- * Project (68) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (67) + : : : : : : : : :- * Project (62) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (61) + : : : : : : : : : :- * Project (59) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (58) + : : : : : : : : : : :- * Project (53) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (52) + : : : : : : : : : : : :- * Project (50) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (49) + : : : : : : : : : : : : :- * Project (44) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (43) + : : : : : : : : : : : : : :- * Project (38) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (37) + : : : : : : : : : : : : : : :- * Project (32) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (31) + : : : : : : : : : : : : : : : :- * Project (26) + : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : : : : : : : : : : : : : : :- * Project (9) + : : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : : : : : : : : : : : : :- * Filter (3) + : : : : : : : : : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : : : : : : : : : +- Scan parquet default.store_sales (1) + : : : : : : : : : : : : : : : : : +- BroadcastExchange (7) + : : : : : : : : : : : : : : : : : +- * Filter (6) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (5) + : : : : : : : : : : : : : : : : : +- Scan parquet default.store_returns (4) + : : : : : : : : : : : : : : : : +- BroadcastExchange (24) + : : : : : : : : : : : : : : : : +- * Project (23) + : : : : : : : : : : : : : : : : +- * Filter (22) + : : : : : : : : : : : : : : : : +- * HashAggregate (21) + : : : : : : : : : : : : : : : : +- Exchange (20) + : : : : : : : : : : : : : : : : +- * HashAggregate (19) + : : : : : : : : : : : : : : : : +- * Project (18) + : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (17) + : : : : : : : : : : : : : : : : :- * Filter (12) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (11) + : : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_sales (10) + : : : : : : : : : : : : : : : : +- BroadcastExchange (16) + : : : : : : : : : : : : : : : : +- * Filter (15) + : : : : : : : : : : : : : : : : +- * ColumnarToRow (14) + : : : : : : : : : : : : : : : : +- Scan parquet default.catalog_returns (13) + : : : : : : : : : : : : : : : +- BroadcastExchange (30) + : : : : : : : : : : : : : : : +- * Filter (29) + : : : : : : : : : : : : : : : +- * ColumnarToRow (28) + : : : : : : : : : : : : : : : +- Scan parquet default.date_dim (27) + : : : : : : : : : : : : : : +- BroadcastExchange (36) + : : : : : : : : : : : : : : +- * Filter (35) + : : : : : : : : : : : : : : +- * ColumnarToRow (34) + : : : : : : : : : : : : : : +- Scan parquet default.store (33) + : : : : : : : : : : : : : +- BroadcastExchange (42) + : : : : : : : : : : : : : +- * Filter (41) + : : : : : : : : : : : : : +- * ColumnarToRow (40) + : : : : : : : : : : : : : +- Scan parquet default.customer (39) + : : : : : : : : : : : : +- BroadcastExchange (48) + : : : : : : : : : : : : +- * Filter (47) + : : : : : : : : : : : : +- * ColumnarToRow (46) + : : : : : : : : : : : : +- Scan parquet default.date_dim (45) + : : : : : : : : : : : +- ReusedExchange (51) + : : : : : : : : : : +- BroadcastExchange (57) + : : : : : : : : : : +- * Filter (56) + : : : : : : : : : : +- * ColumnarToRow (55) + : : : : : : : : : : +- Scan parquet default.customer_demographics (54) + : : : : : : : : : +- ReusedExchange (60) + : : : : : : : : +- BroadcastExchange (66) + : : : : : : : : +- * Filter (65) + : : : : : : : : +- * ColumnarToRow (64) + : : : : : : : : +- Scan parquet default.promotion (63) + : : : : : : : +- BroadcastExchange (72) + : : : : : : : +- * Filter (71) + : : : : : : : +- * ColumnarToRow (70) + : : : : : : : +- Scan parquet default.household_demographics (69) + : : : : : : +- ReusedExchange (75) + : : : : : +- BroadcastExchange (81) + : : : : : +- * Filter (80) + : : : : : +- * ColumnarToRow (79) + : : : : : +- Scan parquet default.customer_address (78) + : : : : +- ReusedExchange (84) + : : : +- BroadcastExchange (90) + : : : +- * Filter (89) + : : : +- * ColumnarToRow (88) + : : : +- Scan parquet default.income_band (87) + : : +- ReusedExchange (93) + : +- BroadcastExchange (100) + : +- * Project (99) + : +- * Filter (98) + : +- * ColumnarToRow (97) + : +- Scan parquet default.item (96) + +- BroadcastExchange (166) + +- * HashAggregate (165) + +- Exchange (164) + +- * HashAggregate (163) + +- * Project (162) + +- * BroadcastHashJoin Inner BuildRight (161) + :- * Project (159) + : +- * BroadcastHashJoin Inner BuildRight (158) + : :- * Project (156) + : : +- * BroadcastHashJoin Inner BuildRight (155) + : : :- * Project (153) + : : : +- * BroadcastHashJoin Inner BuildRight (152) + : : : :- * Project (150) + : : : : +- * BroadcastHashJoin Inner BuildRight (149) + : : : : :- * Project (147) + : : : : : +- * BroadcastHashJoin Inner BuildRight (146) + : : : : : :- * Project (144) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (143) + : : : : : : :- * Project (141) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (140) + : : : : : : : :- * Project (138) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (137) + : : : : : : : : :- * Project (135) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (134) + : : : : : : : : : :- * Project (132) + : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (131) + : : : : : : : : : : :- * Project (129) + : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (128) + : : : : : : : : : : : :- * Project (126) + : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (125) + : : : : : : : : : : : : :- * Project (123) + : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (122) + : : : : : : : : : : : : : :- * Project (120) + : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (119) + : : : : : : : : : : : : : : :- * Project (114) + : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (113) + : : : : : : : : : : : : : : : :- * Project (111) + : : : : : : : : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (110) + : : : : : : : : : : : : : : : : :- * Filter (108) + : : : : : : : : : : : : : : : : : +- * ColumnarToRow (107) + : : : : : : : : : : : : : : : : : +- Scan parquet default.store_sales (106) + : : : : : : : : : : : : : : : : +- ReusedExchange (109) + : : : : : : : : : : : : : : : +- ReusedExchange (112) + : : : : : : : : : : : : : : +- BroadcastExchange (118) + : : : : : : : : : : : : : : +- * Filter (117) + : : : : : : : : : : : : : : +- * ColumnarToRow (116) + : : : : : : : : : : : : : : +- Scan parquet default.date_dim (115) + : : : : : : : : : : : : : +- ReusedExchange (121) + : : : : : : : : : : : : +- ReusedExchange (124) + : : : : : : : : : : : +- ReusedExchange (127) + : : : : : : : : : : +- ReusedExchange (130) + : : : : : : : : : +- ReusedExchange (133) + : : : : : : : : +- ReusedExchange (136) + : : : : : : : +- ReusedExchange (139) + : : : : : : +- ReusedExchange (142) + : : : : : +- ReusedExchange (145) + : : : : +- ReusedExchange (148) + : : : +- ReusedExchange (151) + : : +- ReusedExchange (154) + : +- ReusedExchange (157) + +- ReusedExchange (160) + + +(1) Scan parquet default.store_sales +Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 20] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] + +(3) Filter [codegen id : 20] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Condition : ((((((((isnotnull(ss_item_sk#2) AND isnotnull(ss_ticket_number#9)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_cdemo_sk#4)) AND isnotnull(ss_promo_sk#8)) AND isnotnull(ss_hdemo_sk#5)) AND isnotnull(ss_addr_sk#6)) + +(4) Scan parquet default.store_returns +Output [2]: [sr_item_sk#13, sr_ticket_number#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#13, sr_ticket_number#14] + +(6) Filter [codegen id : 1] +Input [2]: [sr_item_sk#13, sr_ticket_number#14] +Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14)) + +(7) BroadcastExchange +Input [2]: [sr_item_sk#13, sr_ticket_number#14] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#15] + +(8) BroadcastHashJoin [codegen id : 20] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] +Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] +Join condition: None + +(9) Project [codegen id : 20] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#13, sr_ticket_number#14] + +(10) Scan parquet default.catalog_sales +Output [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 3] +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] + +(12) Filter [codegen id : 3] +Input [3]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18] +Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_order_number#17)) + +(13) Scan parquet default.catalog_returns +Output [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 2] +Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] + +(15) Filter [codegen id : 2] +Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Condition : (isnotnull(cr_item_sk#19) AND isnotnull(cr_order_number#20)) + +(16) BroadcastExchange +Input [5]: [cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#24] + +(17) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [cs_item_sk#16, cs_order_number#17] +Right keys [2]: [cr_item_sk#19, cr_order_number#20] +Join condition: None + +(18) Project [codegen id : 3] +Output [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Input [8]: [cs_item_sk#16, cs_order_number#17, cs_ext_list_price#18, cr_item_sk#19, cr_order_number#20, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] + +(19) HashAggregate [codegen id : 3] +Input [5]: [cs_item_sk#16, cs_ext_list_price#18, cr_refunded_cash#21, cr_reversed_charge#22, cr_store_credit#23] +Keys [1]: [cs_item_sk#16] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#18)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))] +Aggregate Attributes [3]: [sum#25, sum#26, isEmpty#27] +Results [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] + +(20) Exchange +Input [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(cs_item_sk#16, 5), true, [id=#31] + +(21) HashAggregate [codegen id : 4] +Input [4]: [cs_item_sk#16, sum#28, sum#29, isEmpty#30] +Keys [1]: [cs_item_sk#16] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#18)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#18))#32, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#33] +Results [3]: [cs_item_sk#16, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#18))#32,17,2) AS sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#33 AS sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] + +(22) Filter [codegen id : 4] +Input [3]: [cs_item_sk#16, sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] +Condition : (isnotnull(sum(cs_ext_list_price#18)#34) AND (cast(sum(cs_ext_list_price#18)#34 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35)), DecimalType(21,2), true))) + +(23) Project [codegen id : 4] +Output [1]: [cs_item_sk#16] +Input [3]: [cs_item_sk#16, sum(cs_ext_list_price#18)#34, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#21 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#22 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#23 as decimal(9,2)))), DecimalType(9,2), true))#35] + +(24) BroadcastExchange +Input [1]: [cs_item_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] + +(25) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [cs_item_sk#16] +Join condition: None + +(26) Project [codegen id : 20] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#16] + +(27) Scan parquet default.date_dim +Output [2]: [d_date_sk#37, d_year#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#37, d_year#38] + +(29) Filter [codegen id : 5] +Input [2]: [d_date_sk#37, d_year#38] +Condition : ((isnotnull(d_year#38) AND (d_year#38 = 1999)) AND isnotnull(d_date_sk#37)) + +(30) BroadcastExchange +Input [2]: [d_date_sk#37, d_year#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#39] + +(31) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#37] +Join condition: None + +(32) Project [codegen id : 20] +Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38] +Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#37, d_year#38] + +(33) Scan parquet default.store +Output [3]: [s_store_sk#40, s_store_name#41, s_zip#42] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 6] +Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] + +(35) Filter [codegen id : 6] +Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] +Condition : ((isnotnull(s_store_sk#40) AND isnotnull(s_store_name#41)) AND isnotnull(s_zip#42)) + +(36) BroadcastExchange +Input [3]: [s_store_sk#40, s_store_name#41, s_zip#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#43] + +(37) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#40] +Join condition: None + +(38) Project [codegen id : 20] +Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42] +Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_sk#40, s_store_name#41, s_zip#42] + +(39) Scan parquet default.customer +Output [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(40) ColumnarToRow [codegen id : 7] +Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(41) Filter [codegen id : 7] +Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Condition : (((((isnotnull(c_customer_sk#44) AND isnotnull(c_first_sales_date_sk#49)) AND isnotnull(c_first_shipto_date_sk#48)) AND isnotnull(c_current_cdemo_sk#45)) AND isnotnull(c_current_hdemo_sk#46)) AND isnotnull(c_current_addr_sk#47)) + +(42) BroadcastExchange +Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#50] + +(43) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#44] +Join condition: None + +(44) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(45) Scan parquet default.date_dim +Output [2]: [d_date_sk#51, d_year#52] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#51, d_year#52] + +(47) Filter [codegen id : 8] +Input [2]: [d_date_sk#51, d_year#52] +Condition : isnotnull(d_date_sk#51) + +(48) BroadcastExchange +Input [2]: [d_date_sk#51, d_year#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#53] + +(49) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_first_sales_date_sk#49] +Right keys [1]: [d_date_sk#51] +Join condition: None + +(50) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#52] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49, d_date_sk#51, d_year#52] + +(51) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#54, d_year#55] + +(52) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_first_shipto_date_sk#48] +Right keys [1]: [d_date_sk#54] +Join condition: None + +(53) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#52, d_date_sk#54, d_year#55] + +(54) Scan parquet default.customer_demographics +Output [2]: [cd_demo_sk#56, cd_marital_status#57] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 10] +Input [2]: [cd_demo_sk#56, cd_marital_status#57] + +(56) Filter [codegen id : 10] +Input [2]: [cd_demo_sk#56, cd_marital_status#57] +Condition : (isnotnull(cd_demo_sk#56) AND isnotnull(cd_marital_status#57)) + +(57) BroadcastExchange +Input [2]: [cd_demo_sk#56, cd_marital_status#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#58] + +(58) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#56] +Join condition: None + +(59) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_marital_status#57] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_demo_sk#56, cd_marital_status#57] + +(60) ReusedExchange [Reuses operator id: 57] +Output [2]: [cd_demo_sk#59, cd_marital_status#60] + +(61) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_current_cdemo_sk#45] +Right keys [1]: [cd_demo_sk#59] +Join condition: NOT (cd_marital_status#57 = cd_marital_status#60) + +(62) Project [codegen id : 20] +Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] +Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, cd_marital_status#57, cd_demo_sk#59, cd_marital_status#60] + +(63) Scan parquet default.promotion +Output [1]: [p_promo_sk#61] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(64) ColumnarToRow [codegen id : 12] +Input [1]: [p_promo_sk#61] + +(65) Filter [codegen id : 12] +Input [1]: [p_promo_sk#61] +Condition : isnotnull(p_promo_sk#61) + +(66) BroadcastExchange +Input [1]: [p_promo_sk#61] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] + +(67) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_promo_sk#8] +Right keys [1]: [p_promo_sk#61] +Join condition: None + +(68) Project [codegen id : 20] +Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, p_promo_sk#61] + +(69) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#63, hd_income_band_sk#64] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(70) ColumnarToRow [codegen id : 13] +Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] + +(71) Filter [codegen id : 13] +Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] +Condition : (isnotnull(hd_demo_sk#63) AND isnotnull(hd_income_band_sk#64)) + +(72) BroadcastExchange +Input [2]: [hd_demo_sk#63, hd_income_band_sk#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65] + +(73) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_hdemo_sk#5] +Right keys [1]: [hd_demo_sk#63] +Join condition: None + +(74) Project [codegen id : 20] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_demo_sk#63, hd_income_band_sk#64] + +(75) ReusedExchange [Reuses operator id: 72] +Output [2]: [hd_demo_sk#66, hd_income_band_sk#67] + +(76) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_current_hdemo_sk#46] +Right keys [1]: [hd_demo_sk#66] +Join condition: None + +(77) Project [codegen id : 20] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67] +Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_demo_sk#66, hd_income_band_sk#67] + +(78) Scan parquet default.customer_address +Output [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(79) ColumnarToRow [codegen id : 15] +Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(80) Filter [codegen id : 15] +Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Condition : isnotnull(ca_address_sk#68) + +(81) BroadcastExchange +Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#73] + +(82) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_addr_sk#6] +Right keys [1]: [ca_address_sk#68] +Join condition: None + +(83) Project [codegen id : 20] +Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(84) ReusedExchange [Reuses operator id: 81] +Output [5]: [ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(85) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [c_current_addr_sk#47] +Right keys [1]: [ca_address_sk#74] +Join condition: None + +(86) Project [codegen id : 20] +Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_address_sk#74, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] + +(87) Scan parquet default.income_band +Output [1]: [ib_income_band_sk#79] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(88) ColumnarToRow [codegen id : 17] +Input [1]: [ib_income_band_sk#79] + +(89) Filter [codegen id : 17] +Input [1]: [ib_income_band_sk#79] +Condition : isnotnull(ib_income_band_sk#79) + +(90) BroadcastExchange +Input [1]: [ib_income_band_sk#79] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#80] + +(91) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [hd_income_band_sk#64] +Right keys [1]: [ib_income_band_sk#79] +Join condition: None + +(92) Project [codegen id : 20] +Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#64, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ib_income_band_sk#79] + +(93) ReusedExchange [Reuses operator id: 90] +Output [1]: [ib_income_band_sk#81] + +(94) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [hd_income_band_sk#67] +Right keys [1]: [ib_income_band_sk#81] +Join condition: None + +(95) Project [codegen id : 20] +Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, hd_income_band_sk#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, ib_income_band_sk#81] + +(96) Scan parquet default.item +Output [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple,burlywood,indian,spring,floral,medium]), GreaterThanOrEqual(i_current_price,64.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(97) ColumnarToRow [codegen id : 19] +Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] + +(98) Filter [codegen id : 19] +Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] +Condition : ((((((isnotnull(i_current_price#83) AND i_color#84 IN (purple,burlywood,indian,spring,floral,medium)) AND (i_current_price#83 >= 64.00)) AND (cast(i_current_price#83 as decimal(12,2)) <= 74.00)) AND (cast(i_current_price#83 as decimal(12,2)) >= 65.00)) AND (cast(i_current_price#83 as decimal(12,2)) <= 79.00)) AND isnotnull(i_item_sk#82)) + +(99) Project [codegen id : 19] +Output [2]: [i_item_sk#82, i_product_name#85] +Input [4]: [i_item_sk#82, i_current_price#83, i_color#84, i_product_name#85] + +(100) BroadcastExchange +Input [2]: [i_item_sk#82, i_product_name#85] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#86] + +(101) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#82] +Join condition: None + +(102) Project [codegen id : 20] +Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#52, d_year#55, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#52, d_year#55, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] + +(103) HashAggregate [codegen id : 20] +Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#52, d_year#55, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, i_item_sk#82, i_product_name#85] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count#87, sum#88, sum#89, sum#90] +Results [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] + +(104) Exchange +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] +Arguments: hashpartitioning(i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, 5), true, [id=#95] + +(105) HashAggregate [codegen id : 42] +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55, count#91, sum#92, sum#93, sum#94] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#75, ca_street_name#76, ca_city#77, ca_zip#78, d_year#38, d_year#52, d_year#55] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count(1)#96, sum(UnscaledValue(ss_wholesale_cost#10))#97, sum(UnscaledValue(ss_list_price#11))#98, sum(UnscaledValue(ss_coupon_amt#12))#99] +Results [17]: [i_product_name#85 AS product_name#100, i_item_sk#82 AS item_sk#101, s_store_name#41 AS store_name#102, s_zip#42 AS store_zip#103, ca_street_number#69 AS b_street_number#104, ca_street_name#70 AS b_streen_name#105, ca_city#71 AS b_city#106, ca_zip#72 AS b_zip#107, ca_street_number#75 AS c_street_number#108, ca_street_name#76 AS c_street_name#109, ca_city#77 AS c_city#110, ca_zip#78 AS c_zip#111, d_year#38 AS syear#112, count(1)#96 AS cnt#113, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#97,17,2) AS s1#114, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#98,17,2) AS s2#115, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#99,17,2) AS s3#116] + +(106) Scan parquet default.store_sales +Output [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] +ReadSchema: struct + +(107) ColumnarToRow [codegen id : 40] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] + +(108) Filter [codegen id : 40] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Condition : ((((((((isnotnull(ss_item_sk#2) AND isnotnull(ss_ticket_number#9)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_cdemo_sk#4)) AND isnotnull(ss_promo_sk#8)) AND isnotnull(ss_hdemo_sk#5)) AND isnotnull(ss_addr_sk#6)) + +(109) ReusedExchange [Reuses operator id: 7] +Output [2]: [sr_item_sk#13, sr_ticket_number#14] + +(110) BroadcastHashJoin [codegen id : 40] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#9 as bigint)] +Right keys [2]: [sr_item_sk#13, sr_ticket_number#14] +Join condition: None + +(111) Project [codegen id : 40] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_ticket_number#9, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, sr_item_sk#13, sr_ticket_number#14] + +(112) ReusedExchange [Reuses operator id: 24] +Output [1]: [cs_item_sk#16] + +(113) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [cs_item_sk#16] +Join condition: None + +(114) Project [codegen id : 40] +Output [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12] +Input [12]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, cs_item_sk#16] + +(115) Scan parquet default.date_dim +Output [2]: [d_date_sk#37, d_year#38] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(116) ColumnarToRow [codegen id : 25] +Input [2]: [d_date_sk#37, d_year#38] + +(117) Filter [codegen id : 25] +Input [2]: [d_date_sk#37, d_year#38] +Condition : ((isnotnull(d_year#38) AND (d_year#38 = 2000)) AND isnotnull(d_date_sk#37)) + +(118) BroadcastExchange +Input [2]: [d_date_sk#37, d_year#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#117] + +(119) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#37] +Join condition: None + +(120) Project [codegen id : 40] +Output [11]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38] +Input [13]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_date_sk#37, d_year#38] + +(121) ReusedExchange [Reuses operator id: 36] +Output [3]: [s_store_sk#40, s_store_name#41, s_zip#42] + +(122) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#40] +Join condition: None + +(123) Project [codegen id : 40] +Output [12]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42] +Input [14]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_sk#40, s_store_name#41, s_zip#42] + +(124) ReusedExchange [Reuses operator id: 42] +Output [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(125) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_customer_sk#3] +Right keys [1]: [c_customer_sk#44] +Join condition: None + +(126) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] +Input [18]: [ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49] + +(127) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#118, d_year#119] + +(128) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_first_sales_date_sk#49] +Right keys [1]: [d_date_sk#118] +Join condition: None + +(129) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#119] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49, d_date_sk#118, d_year#119] + +(130) ReusedExchange [Reuses operator id: 48] +Output [2]: [d_date_sk#120, d_year#121] + +(131) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_first_shipto_date_sk#48] +Right keys [1]: [d_date_sk#120] +Join condition: None + +(132) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#119, d_date_sk#120, d_year#121] + +(133) ReusedExchange [Reuses operator id: 57] +Output [2]: [cd_demo_sk#56, cd_marital_status#57] + +(134) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_cdemo_sk#4] +Right keys [1]: [cd_demo_sk#56] +Join condition: None + +(135) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_marital_status#57] +Input [18]: [ss_item_sk#2, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_demo_sk#56, cd_marital_status#57] + +(136) ReusedExchange [Reuses operator id: 57] +Output [2]: [cd_demo_sk#122, cd_marital_status#123] + +(137) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_current_cdemo_sk#45] +Right keys [1]: [cd_demo_sk#122] +Join condition: NOT (cd_marital_status#57 = cd_marital_status#123) + +(138) Project [codegen id : 40] +Output [14]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] +Input [18]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, cd_marital_status#57, cd_demo_sk#122, cd_marital_status#123] + +(139) ReusedExchange [Reuses operator id: 66] +Output [1]: [p_promo_sk#61] + +(140) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_promo_sk#8] +Right keys [1]: [p_promo_sk#61] +Join condition: None + +(141) Project [codegen id : 40] +Output [13]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_promo_sk#8, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, p_promo_sk#61] + +(142) ReusedExchange [Reuses operator id: 72] +Output [2]: [hd_demo_sk#63, hd_income_band_sk#64] + +(143) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_hdemo_sk#5] +Right keys [1]: [hd_demo_sk#63] +Join condition: None + +(144) Project [codegen id : 40] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64] +Input [15]: [ss_item_sk#2, ss_hdemo_sk#5, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_demo_sk#63, hd_income_band_sk#64] + +(145) ReusedExchange [Reuses operator id: 72] +Output [2]: [hd_demo_sk#124, hd_income_band_sk#125] + +(146) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_current_hdemo_sk#46] +Right keys [1]: [hd_demo_sk#124] +Join condition: None + +(147) Project [codegen id : 40] +Output [13]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125] +Input [15]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_demo_sk#124, hd_income_band_sk#125] + +(148) ReusedExchange [Reuses operator id: 81] +Output [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(149) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_addr_sk#6] +Right keys [1]: [ca_address_sk#68] +Join condition: None + +(150) Project [codegen id : 40] +Output [16]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] +Input [18]: [ss_item_sk#2, ss_addr_sk#6, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72] + +(151) ReusedExchange [Reuses operator id: 81] +Output [5]: [ca_address_sk#126, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] + +(152) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [c_current_addr_sk#47] +Right keys [1]: [ca_address_sk#126] +Join condition: None + +(153) Project [codegen id : 40] +Output [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] +Input [21]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, c_current_addr_sk#47, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_address_sk#126, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] + +(154) ReusedExchange [Reuses operator id: 90] +Output [1]: [ib_income_band_sk#79] + +(155) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [hd_income_band_sk#64] +Right keys [1]: [ib_income_band_sk#79] +Join condition: None + +(156) Project [codegen id : 40] +Output [18]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] +Input [20]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#64, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, ib_income_band_sk#79] + +(157) ReusedExchange [Reuses operator id: 90] +Output [1]: [ib_income_band_sk#131] + +(158) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [hd_income_band_sk#125] +Right keys [1]: [ib_income_band_sk#131] +Join condition: None + +(159) Project [codegen id : 40] +Output [17]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, hd_income_band_sk#125, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, ib_income_band_sk#131] + +(160) ReusedExchange [Reuses operator id: 100] +Output [2]: [i_item_sk#82, i_product_name#85] + +(161) BroadcastHashJoin [codegen id : 40] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#82] +Join condition: None + +(162) Project [codegen id : 40] +Output [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#119, d_year#121, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] +Input [19]: [ss_item_sk#2, ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, s_store_name#41, s_zip#42, d_year#119, d_year#121, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] + +(163) HashAggregate [codegen id : 40] +Input [18]: [ss_wholesale_cost#10, ss_list_price#11, ss_coupon_amt#12, d_year#38, d_year#119, d_year#121, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, i_item_sk#82, i_product_name#85] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#10)), partial_sum(UnscaledValue(ss_list_price#11)), partial_sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count#132, sum#133, sum#134, sum#135] +Results [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] + +(164) Exchange +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] +Arguments: hashpartitioning(i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, 5), true, [id=#140] + +(165) HashAggregate [codegen id : 41] +Input [19]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121, count#136, sum#137, sum#138, sum#139] +Keys [15]: [i_product_name#85, i_item_sk#82, s_store_name#41, s_zip#42, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ca_street_number#127, ca_street_name#128, ca_city#129, ca_zip#130, d_year#38, d_year#119, d_year#121] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#10)), sum(UnscaledValue(ss_list_price#11)), sum(UnscaledValue(ss_coupon_amt#12))] +Aggregate Attributes [4]: [count(1)#141, sum(UnscaledValue(ss_wholesale_cost#10))#142, sum(UnscaledValue(ss_list_price#11))#143, sum(UnscaledValue(ss_coupon_amt#12))#144] +Results [8]: [i_item_sk#82 AS item_sk#145, s_store_name#41 AS store_name#146, s_zip#42 AS store_zip#147, d_year#38 AS syear#148, count(1)#141 AS cnt#149, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#10))#142,17,2) AS s1#150, MakeDecimal(sum(UnscaledValue(ss_list_price#11))#143,17,2) AS s2#151, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#144,17,2) AS s3#152] + +(166) BroadcastExchange +Input [8]: [item_sk#145, store_name#146, store_zip#147, syear#148, cnt#149, s1#150, s2#151, s3#152] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, string, true], input[2, string, true]),false), [id=#153] + +(167) BroadcastHashJoin [codegen id : 42] +Left keys [3]: [item_sk#101, store_name#102, store_zip#103] +Right keys [3]: [item_sk#145, store_name#146, store_zip#147] +Join condition: (cnt#149 <= cnt#113) + +(168) Project [codegen id : 42] +Output [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] +Input [25]: [product_name#100, item_sk#101, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, item_sk#145, store_name#146, store_zip#147, syear#148, cnt#149, s1#150, s2#151, s3#152] + +(169) Exchange +Input [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] +Arguments: rangepartitioning(product_name#100 ASC NULLS FIRST, store_name#102 ASC NULLS FIRST, cnt#149 ASC NULLS FIRST, 5), true, [id=#154] + +(170) Sort [codegen id : 43] +Input [21]: [product_name#100, store_name#102, store_zip#103, b_street_number#104, b_streen_name#105, b_city#106, b_zip#107, c_street_number#108, c_street_name#109, c_city#110, c_zip#111, syear#112, cnt#113, s1#114, s2#115, s3#116, s1#150, s2#151, s3#152, syear#148, cnt#149] +Arguments: [product_name#100 ASC NULLS FIRST, store_name#102 ASC NULLS FIRST, cnt#149 ASC NULLS FIRST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt index 2955e0f78..4c40a359c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q64/simplified.txt @@ -1,229 +1,246 @@ -WholeStageCodegen - Sort [cnt,product_name,store_name] +WholeStageCodegen (43) + Sort [product_name,store_name,cnt] InputAdapter - Exchange [cnt,product_name,store_name] #1 - WholeStageCodegen - Project [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,cnt,product_name,s1,s1,s2,s2,s3,s3,store_name,store_zip,syear,syear] - BroadcastHashJoin [cnt,cnt,item_sk,item_sk,store_name,store_name,store_zip,store_zip] - HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count(1),d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost))] [b_city,b_streen_name,b_street_number,b_zip,c_city,c_street_name,c_street_number,c_zip,cnt,count,count(1),item_sk,product_name,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] + Exchange [product_name,store_name,cnt] #1 + WholeStageCodegen (42) + Project [product_name,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,s1,s2,s3,syear,cnt] + BroadcastHashJoin [item_sk,store_name,store_zip,item_sk,store_name,store_zip,cnt,cnt] + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),product_name,item_sk,store_name,store_zip,b_street_number,b_streen_name,b_city,b_zip,c_street_number,c_street_name,c_city,c_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] InputAdapter - Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #2 - WholeStageCodegen - HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [count,count,sum,sum,sum,sum,sum,sum] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + Exchange [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year] #2 + WholeStageCodegen (20) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [p_promo_sk,ss_promo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [cs_item_sk,ss_item_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] - Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + BroadcastHashJoin [ss_item_sk,cs_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Filter [ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (4) Project [cs_item_sk] - Filter [sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(cs_ext_list_price)] - HashAggregate [cs_item_sk,sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(UnscaledValue(cs_ext_list_price))] [sum,sum,sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sum(UnscaledValue(cs_ext_list_price)),sum(cs_ext_list_price)] + Filter [sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true))] + HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum,sum,isEmpty] InputAdapter Exchange [cs_item_sk] #5 - WholeStageCodegen - HashAggregate [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [cr_refunded_cash,cr_reversed_charge,cr_store_credit,cs_ext_list_price,cs_item_sk] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_ext_list_price,cs_item_sk,cs_order_number] - Filter [cs_item_sk,cs_order_number] - Scan parquet default.catalog_sales [cs_ext_list_price,cs_item_sk,cs_order_number] [cs_ext_list_price,cs_item_sk,cs_order_number] + WholeStageCodegen (3) + HashAggregate [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [sum,sum,isEmpty,sum,sum,isEmpty] + Project [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + Filter [cs_item_sk,cs_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] - Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] + WholeStageCodegen (2) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #8 - WholeStageCodegen - Project [s_store_name,s_store_sk,s_zip] - Filter [s_store_name,s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] + WholeStageCodegen (6) + Filter [s_store_sk,s_store_name,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name,s_zip] InputAdapter BroadcastExchange #9 - WholeStageCodegen - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] - Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] - Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] + WholeStageCodegen (7) + Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] InputAdapter BroadcastExchange #10 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen (8) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 + ReusedExchange [d_date_sk,d_year] #10 InputAdapter BroadcastExchange #11 - WholeStageCodegen - Project [cd_demo_sk,cd_marital_status] - Filter [cd_demo_sk,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] + WholeStageCodegen (10) + Filter [cd_demo_sk,cd_marital_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] InputAdapter - ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 + ReusedExchange [cd_demo_sk,cd_marital_status] #11 InputAdapter BroadcastExchange #12 - WholeStageCodegen - Project [p_promo_sk] - Filter [p_promo_sk] - Scan parquet default.promotion [p_promo_sk] [p_promo_sk] + WholeStageCodegen (12) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk] InputAdapter BroadcastExchange #13 - WholeStageCodegen - Project [hd_demo_sk,hd_income_band_sk] - Filter [hd_demo_sk,hd_income_band_sk] - Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] + WholeStageCodegen (13) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] InputAdapter - ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 + ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 InputAdapter BroadcastExchange #14 - WholeStageCodegen - Project [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] - Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] + WholeStageCodegen (15) + Filter [ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] InputAdapter - ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #14 InputAdapter BroadcastExchange #15 - WholeStageCodegen - Project [ib_income_band_sk] - Filter [ib_income_band_sk] - Scan parquet default.income_band [ib_income_band_sk] [ib_income_band_sk] + WholeStageCodegen (17) + Filter [ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.income_band [ib_income_band_sk] InputAdapter - ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 + ReusedExchange [ib_income_band_sk] #15 InputAdapter BroadcastExchange #16 - WholeStageCodegen + WholeStageCodegen (19) Project [i_item_sk,i_product_name] - Filter [i_color,i_current_price,i_item_sk] - Scan parquet default.item [i_color,i_current_price,i_item_sk,i_product_name] [i_color,i_current_price,i_item_sk,i_product_name] + Filter [i_current_price,i_color,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_current_price,i_color,i_product_name] InputAdapter BroadcastExchange #17 - WholeStageCodegen - HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count(1),d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost))] [cnt,count,count(1),item_sk,s1,s2,s3,store_name,store_zip,sum,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_wholesale_cost)),syear] + WholeStageCodegen (41) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum] InputAdapter - Exchange [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip] #18 - WholeStageCodegen - HashAggregate [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,count,count,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [count,count,sum,sum,sum,sum,sum,sum] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,i_item_sk,i_product_name,s_store_name,s_zip,ss_coupon_amt,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + Exchange [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year] #18 + WholeStageCodegen (40) + HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum] + Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] - Project [ca_city,ca_city,ca_street_name,ca_street_name,ca_street_number,ca_street_number,ca_zip,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,ca_city,ca_street_name,ca_street_number,ca_zip,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [c_current_addr_sk,d_year,d_year,d_year,hd_income_band_sk,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] + Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,hd_income_band_sk,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_wholesale_cost] - BroadcastHashJoin [p_promo_sk,ss_promo_sk] - Project [c_current_addr_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,hd_income_band_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk,cd_marital_status,cd_marital_status] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cd_marital_status,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,d_year,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year] BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_shipto_date_sk,d_year,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,d_year] BroadcastHashJoin [c_first_sales_date_sk,d_date_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_first_sales_date_sk,c_first_shipto_date_sk,d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [d_year,s_store_name,s_zip,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_wholesale_cost] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_year,ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [cs_item_sk,ss_item_sk] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_wholesale_cost] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] - Filter [ss_addr_sk,ss_cdemo_sk,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] [ss_addr_sk,ss_cdemo_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_item_sk,ss_list_price,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number,ss_wholesale_cost] + Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + BroadcastHashJoin [ss_item_sk,cs_item_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Filter [ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt] InputAdapter - ReusedExchange [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] #3 + ReusedExchange [sr_item_sk,sr_ticket_number] #3 InputAdapter - ReusedExchange [cs_item_sk] [cs_item_sk] #4 + ReusedExchange [cs_item_sk] #4 InputAdapter BroadcastExchange #19 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen (25) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter - ReusedExchange [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] #8 + ReusedExchange [s_store_sk,s_store_name,s_zip] #8 InputAdapter - ReusedExchange [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk] #9 + ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #9 InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 + ReusedExchange [d_date_sk,d_year] #10 InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #10 + ReusedExchange [d_date_sk,d_year] #10 InputAdapter - ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 + ReusedExchange [cd_demo_sk,cd_marital_status] #11 InputAdapter - ReusedExchange [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] #11 + ReusedExchange [cd_demo_sk,cd_marital_status] #11 InputAdapter - ReusedExchange [p_promo_sk] [p_promo_sk] #12 + ReusedExchange [p_promo_sk] #12 InputAdapter - ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 + ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 InputAdapter - ReusedExchange [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] #13 + ReusedExchange [hd_demo_sk,hd_income_band_sk] #13 InputAdapter - ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #14 InputAdapter - ReusedExchange [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] [ca_address_sk,ca_city,ca_street_name,ca_street_number,ca_zip] #14 + ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #14 InputAdapter - ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 + ReusedExchange [ib_income_band_sk] #15 InputAdapter - ReusedExchange [ib_income_band_sk] [ib_income_band_sk] #15 + ReusedExchange [ib_income_band_sk] #15 InputAdapter - ReusedExchange [i_item_sk,i_product_name] [i_item_sk,i_product_name] #16 + ReusedExchange [i_item_sk,i_product_name] #16 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt index f29686b9f..ab87816b8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/explain.txt @@ -1,42 +1,245 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST,i_item_desc#2 ASC NULLS FIRST], output=[s_store_name#1,i_item_desc#2,revenue#3,i_current_price#4,i_wholesale_cost#5,i_brand#6]) -+- *(9) Project [s_store_name#1, i_item_desc#2, revenue#3, i_current_price#4, i_wholesale_cost#5, i_brand#6] - +- *(9) BroadcastHashJoin [ss_store_sk#7], [ss_store_sk#8], Inner, BuildRight, (cast(revenue#3 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#9)), DecimalType(23,7))) - :- *(9) Project [s_store_name#1, ss_store_sk#7, revenue#3, i_item_desc#2, i_current_price#4, i_wholesale_cost#5, i_brand#6] - : +- *(9) BroadcastHashJoin [ss_item_sk#10], [i_item_sk#11], Inner, BuildRight - : :- *(9) Project [s_store_name#1, ss_store_sk#7, ss_item_sk#10, revenue#3] - : : +- *(9) BroadcastHashJoin [s_store_sk#12], [ss_store_sk#7], Inner, BuildRight - : : :- *(9) Project [s_store_sk#12, s_store_name#1] - : : : +- *(9) Filter isnotnull(s_store_sk#12) - : : : +- *(9) FileScan parquet default.store[s_store_sk#12,s_store_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Filter isnotnull(revenue#3) - : : +- *(3) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[sum(UnscaledValue(ss_sales_price#13))]) - : : +- Exchange hashpartitioning(ss_store_sk#7, ss_item_sk#10, 5) - : : +- *(2) HashAggregate(keys=[ss_store_sk#7, ss_item_sk#10], functions=[partial_sum(UnscaledValue(ss_sales_price#13))]) - : : +- *(2) Project [ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] - : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight - : : :- *(2) Project [ss_sold_date_sk#14, ss_item_sk#10, ss_store_sk#7, ss_sales_price#13] - : : : +- *(2) Filter ((isnotnull(ss_sold_date_sk#14) && isnotnull(ss_store_sk#7)) && isnotnull(ss_item_sk#10)) - : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#14,ss_item_sk#10,ss_store_sk#7,ss_sales_price#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [d_date_sk#15] - : : +- *(1) Filter (((isnotnull(d_month_seq#16) && (d_month_seq#16 >= 1176)) && (d_month_seq#16 <= 1187)) && isnotnull(d_date_sk#15)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#15,d_month_seq#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187),..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [i_item_sk#11, i_item_desc#2, i_current_price#4, i_wholesale_cost#5, i_brand#6] - : +- *(4) Filter isnotnull(i_item_sk#11) - : +- *(4) FileScan parquet default.item[i_item_sk#11,i_item_desc#2,i_current_price#4,i_wholesale_cost#5,i_brand#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (42) ++- * Project (41) + +- * BroadcastHashJoin Inner BuildRight (40) + :- * Project (26) + : +- * BroadcastHashJoin Inner BuildRight (25) + : :- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store (1) + : : +- BroadcastExchange (18) + : : +- * Filter (17) + : : +- * HashAggregate (16) + : : +- Exchange (15) + : : +- * HashAggregate (14) + : : +- * Project (13) + : : +- * BroadcastHashJoin Inner BuildRight (12) + : : :- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.store_sales (4) + : : +- BroadcastExchange (11) + : : +- * Project (10) + : : +- * Filter (9) + : : +- * ColumnarToRow (8) + : : +- Scan parquet default.date_dim (7) + : +- BroadcastExchange (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.item (21) + +- BroadcastExchange (39) + +- * HashAggregate (38) + +- Exchange (37) + +- * HashAggregate (36) + +- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * BroadcastHashJoin Inner BuildRight (31) + :- * Filter (29) + : +- * ColumnarToRow (28) + : +- Scan parquet default.store_sales (27) + +- ReusedExchange (30) + + +(1) Scan parquet default.store +Output [2]: [s_store_sk#1, s_store_name#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [2]: [s_store_sk#1, s_store_name#2] + +(3) Filter [codegen id : 9] +Input [2]: [s_store_sk#1, s_store_name#2] +Condition : isnotnull(s_store_sk#1) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] + +(6) Filter [codegen id : 2] +Input [4]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Condition : ((isnotnull(ss_sold_date_sk#3) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_item_sk#4)) + +(7) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_month_seq#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(9) Filter [codegen id : 1] +Input [2]: [d_date_sk#7, d_month_seq#8] +Condition : (((isnotnull(d_month_seq#8) AND (d_month_seq#8 >= 1176)) AND (d_month_seq#8 <= 1187)) AND isnotnull(d_date_sk#7)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_month_seq#8] + +(11) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#3] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(13) Project [codegen id : 2] +Output [3]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Input [5]: [ss_sold_date_sk#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, d_date_sk#7] + +(14) HashAggregate [codegen id : 2] +Input [3]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6] +Keys [2]: [ss_store_sk#5, ss_item_sk#4] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum#10] +Results [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] + +(15) Exchange +Input [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] +Arguments: hashpartitioning(ss_store_sk#5, ss_item_sk#4, 5), true, [id=#12] + +(16) HashAggregate [codegen id : 3] +Input [3]: [ss_store_sk#5, ss_item_sk#4, sum#11] +Keys [2]: [ss_store_sk#5, ss_item_sk#4] +Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#13] +Results [3]: [ss_store_sk#5, ss_item_sk#4, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#13,17,2) AS revenue#14] + +(17) Filter [codegen id : 3] +Input [3]: [ss_store_sk#5, ss_item_sk#4, revenue#14] +Condition : isnotnull(revenue#14) + +(18) BroadcastExchange +Input [3]: [ss_store_sk#5, ss_item_sk#4, revenue#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(19) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [s_store_sk#1] +Right keys [1]: [ss_store_sk#5] +Join condition: None + +(20) Project [codegen id : 9] +Output [4]: [s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14] +Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14] + +(21) Scan parquet default.item +Output [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] + +(23) Filter [codegen id : 4] +Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Condition : isnotnull(i_item_sk#16) + +(24) BroadcastExchange +Input [5]: [i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] + +(25) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#16] +Join condition: None + +(26) Project [codegen id : 9] +Output [7]: [s_store_name#2, ss_store_sk#5, revenue#14, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Input [9]: [s_store_name#2, ss_store_sk#5, ss_item_sk#4, revenue#14, i_item_sk#16, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20] + +(27) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(28) ColumnarToRow [codegen id : 6] +Input [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] + +(29) Filter [codegen id : 6] +Input [4]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Condition : (isnotnull(ss_sold_date_sk#22) AND isnotnull(ss_store_sk#24)) + +(30) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#7] + +(31) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#22] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(32) Project [codegen id : 6] +Output [3]: [ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Input [5]: [ss_sold_date_sk#22, ss_item_sk#23, ss_store_sk#24, ss_sales_price#25, d_date_sk#7] + +(33) HashAggregate [codegen id : 6] +Input [3]: [ss_item_sk#23, ss_store_sk#24, ss_sales_price#25] +Keys [2]: [ss_store_sk#24, ss_item_sk#23] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum#26] +Results [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] + +(34) Exchange +Input [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] +Arguments: hashpartitioning(ss_store_sk#24, ss_item_sk#23, 5), true, [id=#28] + +(35) HashAggregate [codegen id : 7] +Input [3]: [ss_store_sk#24, ss_item_sk#23, sum#27] +Keys [2]: [ss_store_sk#24, ss_item_sk#23] +Functions [1]: [sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#25))#29] +Results [2]: [ss_store_sk#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#25))#29,17,2) AS revenue#30] + +(36) HashAggregate [codegen id : 7] +Input [2]: [ss_store_sk#24, revenue#30] +Keys [1]: [ss_store_sk#24] +Functions [1]: [partial_avg(revenue#30)] +Aggregate Attributes [2]: [sum#31, count#32] +Results [3]: [ss_store_sk#24, sum#33, count#34] + +(37) Exchange +Input [3]: [ss_store_sk#24, sum#33, count#34] +Arguments: hashpartitioning(ss_store_sk#24, 5), true, [id=#35] + +(38) HashAggregate [codegen id : 8] +Input [3]: [ss_store_sk#24, sum#33, count#34] +Keys [1]: [ss_store_sk#24] +Functions [1]: [avg(revenue#30)] +Aggregate Attributes [1]: [avg(revenue#30)#36] +Results [2]: [ss_store_sk#24, avg(revenue#30)#36 AS ave#37] + +(39) BroadcastExchange +Input [2]: [ss_store_sk#24, ave#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] + +(40) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [ss_store_sk#24] +Join condition: (cast(revenue#14 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#37)), DecimalType(23,7), true)) + +(41) Project [codegen id : 9] +Output [6]: [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Input [9]: [s_store_name#2, ss_store_sk#5, revenue#14, i_item_desc#17, i_current_price#18, i_wholesale_cost#19, i_brand#20, ss_store_sk#24, ave#37] + +(42) TakeOrderedAndProject +Input [6]: [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] +Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#17 ASC NULLS FIRST], [s_store_name#2, i_item_desc#17, revenue#14, i_current_price#18, i_wholesale_cost#19, i_brand#20] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt index 2250a433c..a4b468ffe 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q65/simplified.txt @@ -1,57 +1,63 @@ -TakeOrderedAndProject [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] - WholeStageCodegen - Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name] - BroadcastHashJoin [ave,revenue,ss_store_sk,ss_store_sk] - Project [i_brand,i_current_price,i_item_desc,i_wholesale_cost,revenue,s_store_name,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [revenue,s_store_name,ss_item_sk,ss_store_sk] +TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + WholeStageCodegen (9) + Project [s_store_name,i_item_desc,revenue,i_current_price,i_wholesale_cost,i_brand] + BroadcastHashJoin [ss_store_sk,ss_store_sk,revenue,ave] + Project [s_store_name,ss_store_sk,revenue,i_item_desc,i_current_price,i_wholesale_cost,i_brand] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [s_store_name,ss_store_sk,ss_item_sk,revenue] BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [s_store_name,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name] InputAdapter BroadcastExchange #1 - WholeStageCodegen + WholeStageCodegen (3) Filter [revenue] - HashAggregate [ss_item_sk,ss_store_sk,sum,sum(UnscaledValue(ss_sales_price))] [revenue,sum,sum(UnscaledValue(ss_sales_price))] + HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum] InputAdapter - Exchange [ss_item_sk,ss_store_sk] #2 - WholeStageCodegen - HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk,sum,sum] [sum,sum] - Project [ss_item_sk,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Exchange [ss_store_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] - Filter [i_item_sk] - Scan parquet default.item [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] [i_brand,i_current_price,i_item_desc,i_item_sk,i_wholesale_cost] + WholeStageCodegen (4) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand] InputAdapter BroadcastExchange #5 - WholeStageCodegen - HashAggregate [avg(revenue),count,ss_store_sk,sum] [ave,avg(revenue),count,sum] + WholeStageCodegen (8) + HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count] InputAdapter Exchange [ss_store_sk] #6 - WholeStageCodegen - HashAggregate [count,count,revenue,ss_store_sk,sum,sum] [count,count,sum,sum] - HashAggregate [ss_item_sk,ss_store_sk,sum,sum(UnscaledValue(ss_sales_price))] [revenue,sum,sum(UnscaledValue(ss_sales_price))] + WholeStageCodegen (7) + HashAggregate [ss_store_sk,revenue] [sum,count,sum,count] + HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum] InputAdapter - Exchange [ss_item_sk,ss_store_sk] #7 - WholeStageCodegen - HashAggregate [ss_item_sk,ss_sales_price,ss_store_sk,sum,sum] [sum,sum] - Project [ss_item_sk,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Exchange [ss_store_sk,ss_item_sk] #7 + WholeStageCodegen (6) + HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum] + Project [ss_item_sk,ss_store_sk,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt index 497af0999..fc18efd3d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/explain.txt @@ -1,54 +1,310 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[w_warehouse_name#1 ASC NULLS FIRST], output=[w_warehouse_name#1,w_warehouse_sq_ft#2,w_city#3,w_county#4,w_state#5,w_country#6,ship_carriers#7,year#8,jan_sales#9,feb_sales#10,mar_sales#11,apr_sales#12,may_sales#13,jun_sales#14,jul_sales#15,aug_sales#16,sep_sales#17,oct_sales#18,nov_sales#19,dec_sales#20,jan_sales_per_sq_foot#21,feb_sales_per_sq_foot#22,mar_sales_per_sq_foot#23,apr_sales_per_sq_foot#24,... 20 more fields]) -+- *(14) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8], functions=[sum(jan_sales#25), sum(feb_sales#26), sum(mar_sales#27), sum(apr_sales#28), sum(may_sales#29), sum(jun_sales#30), sum(jul_sales#31), sum(aug_sales#32), sum(sep_sales#33), sum(oct_sales#34), sum(nov_sales#35), sum(dec_sales#36), sum(CheckOverflow((promote_precision(jan_sales#25) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#26) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#27) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#28) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#29) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#30) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#31) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#32) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#33) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#34) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#35) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#36) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), ... 12 more fields]) - +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8, 5) - +- *(13) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, ship_carriers#7, year#8], functions=[partial_sum(jan_sales#25), partial_sum(feb_sales#26), partial_sum(mar_sales#27), partial_sum(apr_sales#28), partial_sum(may_sales#29), partial_sum(jun_sales#30), partial_sum(jul_sales#31), partial_sum(aug_sales#32), partial_sum(sep_sales#33), partial_sum(oct_sales#34), partial_sum(nov_sales#35), partial_sum(dec_sales#36), partial_sum(CheckOverflow((promote_precision(jan_sales#25) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#26) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#27) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#28) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#29) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#30) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#31) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#32) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#33) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#34) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#35) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#36) / promote_precision(cast(cast(w_warehouse_sq_ft#2 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))), ... 12 more fields]) - +- Union - :- *(6) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) - : +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 5) - : +- *(5) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#41 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#40 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) - : +- *(5) Project [ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - : +- *(5) BroadcastHashJoin [ws_ship_mode_sk#42], [sm_ship_mode_sk#43], Inner, BuildRight - : :- *(5) Project [ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - : : +- *(5) BroadcastHashJoin [ws_sold_time_sk#44], [t_time_sk#45], Inner, BuildRight - : : :- *(5) Project [ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - : : : +- *(5) BroadcastHashJoin [ws_sold_date_sk#46], [d_date_sk#47], Inner, BuildRight - : : : :- *(5) Project [ws_sold_date_sk#46, ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6] - : : : : +- *(5) BroadcastHashJoin [ws_warehouse_sk#48], [w_warehouse_sk#49], Inner, BuildRight - : : : : :- *(5) Project [ws_sold_date_sk#46, ws_sold_time_sk#44, ws_ship_mode_sk#42, ws_warehouse_sk#48, ws_quantity#40, ws_ext_sales_price#39, ws_net_paid#41] - : : : : : +- *(5) Filter (((isnotnull(ws_warehouse_sk#48) && isnotnull(ws_sold_date_sk#46)) && isnotnull(ws_sold_time_sk#44)) && isnotnull(ws_ship_mode_sk#42)) - : : : : : +- *(5) FileScan parquet default.web_sales[ws_sold_date_sk#46,ws_sold_time_sk#44,ws_ship_mode_sk#42,ws_warehouse_sk#48,ws_quantity#40,ws_ext_sales_price#39,ws_net_paid#41] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [t_time_sk#45] - : : +- *(3) Filter (((isnotnull(t_time#50) && (t_time#50 >= 30838)) && (t_time#50 <= 59638)) && isnotnull(t_time_sk#45)) - : : +- *(3) FileScan parquet default.time_dim[t_time_sk#45,t_time#50] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [sm_ship_mode_sk#43] - : +- *(4) Filter (sm_carrier#51 IN (DHL,BARIAN) && isnotnull(sm_ship_mode_sk#43)) - : +- *(4) FileScan parquet default.ship_mode[sm_ship_mode_sk#43,sm_carrier#51] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/ship_mode], PartitionFilters: [], PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)], ReadSchema: struct - +- *(12) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) - +- Exchange hashpartitioning(w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, 5) - +- *(11) HashAggregate(keys=[w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37], functions=[partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#52 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#38 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#54 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#53 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]) - +- *(11) Project [cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - +- *(11) BroadcastHashJoin [cs_ship_mode_sk#55], [sm_ship_mode_sk#43], Inner, BuildRight - :- *(11) Project [cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - : +- *(11) BroadcastHashJoin [cs_sold_time_sk#56], [t_time_sk#45], Inner, BuildRight - : :- *(11) Project [cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6, d_year#37, d_moy#38] - : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#57], [d_date_sk#47], Inner, BuildRight - : : :- *(11) Project [cs_sold_date_sk#57, cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54, w_warehouse_name#1, w_warehouse_sq_ft#2, w_city#3, w_county#4, w_state#5, w_country#6] - : : : +- *(11) BroadcastHashJoin [cs_warehouse_sk#58], [w_warehouse_sk#49], Inner, BuildRight - : : : :- *(11) Project [cs_sold_date_sk#57, cs_sold_time_sk#56, cs_ship_mode_sk#55, cs_warehouse_sk#58, cs_quantity#53, cs_sales_price#52, cs_net_paid_inc_tax#54] - : : : : +- *(11) Filter (((isnotnull(cs_warehouse_sk#58) && isnotnull(cs_sold_date_sk#57)) && isnotnull(cs_sold_time_sk#56)) && isnotnull(cs_ship_mode_sk#55)) - : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#57,cs_sold_time_sk#56,cs_ship_mode_sk#55,cs_warehouse_sk#58,cs_quantity#53,cs_sales_price#52,cs_net_paid_inc_tax#54] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs..., ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] + +(3) Filter [codegen id : 5] +Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7] +Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_sold_date_sk#1)) AND isnotnull(ws_sold_time_sk#2)) AND isnotnull(ws_ship_mode_sk#3)) + +(4) Scan parquet default.warehouse +Output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(6) Filter [codegen id : 1] +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Condition : isnotnull(w_warehouse_sk#8) + +(7) BroadcastExchange +Input [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_warehouse_sk#4] +Right keys [1]: [w_warehouse_sk#8] +Join condition: None + +(9) Project [codegen id : 5] +Output [12]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Input [14]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) + +(13) BroadcastExchange +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(15) Project [codegen id : 5] +Output [13]: [ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [15]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_date_sk#16, d_year#17, d_moy#18] + +(16) Scan parquet default.time_dim +Output [2]: [t_time_sk#20, t_time#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_time_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [t_time_sk#20, t_time#21] + +(18) Filter [codegen id : 3] +Input [2]: [t_time_sk#20, t_time#21] +Condition : (((isnotnull(t_time#21) AND (t_time#21 >= 30838)) AND (t_time#21 <= 59638)) AND isnotnull(t_time_sk#20)) + +(19) Project [codegen id : 3] +Output [1]: [t_time_sk#20] +Input [2]: [t_time_sk#20, t_time#21] + +(20) BroadcastExchange +Input [1]: [t_time_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_sold_time_sk#2] +Right keys [1]: [t_time_sk#20] +Join condition: None + +(22) Project [codegen id : 5] +Output [12]: [ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [14]: [ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, t_time_sk#20] + +(23) Scan parquet default.ship_mode +Output [2]: [sm_ship_mode_sk#23, sm_carrier#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] + +(25) Filter [codegen id : 4] +Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] +Condition : (sm_carrier#24 IN (DHL,BARIAN) AND isnotnull(sm_ship_mode_sk#23)) + +(26) Project [codegen id : 4] +Output [1]: [sm_ship_mode_sk#23] +Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] + +(27) BroadcastExchange +Input [1]: [sm_ship_mode_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(28) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ws_ship_mode_sk#3] +Right keys [1]: [sm_ship_mode_sk#23] +Join condition: None + +(29) Project [codegen id : 5] +Output [11]: [ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [13]: [ws_ship_mode_sk#3, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, sm_ship_mode_sk#23] + +(30) HashAggregate [codegen id : 5] +Input [11]: [ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] +Functions [24]: [partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73] +Results [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] + +(31) Exchange +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, 5), true, [id=#122] + +(32) HashAggregate [codegen id : 6] +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] +Functions [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146] +Results [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, DHL,BARIAN AS ship_carriers#147, d_year#17 AS year#148, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172] + +(33) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs_ship_mode_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 11] +Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] + +(35) Filter [codegen id : 11] +Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179] +Condition : (((isnotnull(cs_warehouse_sk#176) AND isnotnull(cs_sold_date_sk#173)) AND isnotnull(cs_sold_time_sk#174)) AND isnotnull(cs_ship_mode_sk#175)) + +(36) ReusedExchange [Reuses operator id: 7] +Output [7]: [w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(37) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_warehouse_sk#176] +Right keys [1]: [w_warehouse_sk#8] +Join condition: None + +(38) Project [codegen id : 11] +Output [12]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] +Input [14]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_sk#8, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14] + +(39) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] + +(40) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#173] +Right keys [1]: [d_date_sk#16] +Join condition: None + +(41) Project [codegen id : 11] +Output [13]: [cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [15]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_date_sk#16, d_year#17, d_moy#18] + +(42) ReusedExchange [Reuses operator id: 20] +Output [1]: [t_time_sk#20] + +(43) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_time_sk#174] +Right keys [1]: [t_time_sk#20] +Join condition: None + +(44) Project [codegen id : 11] +Output [12]: [cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [14]: [cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, t_time_sk#20] + +(45) ReusedExchange [Reuses operator id: 27] +Output [1]: [sm_ship_mode_sk#23] + +(46) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_mode_sk#175] +Right keys [1]: [sm_ship_mode_sk#23] +Join condition: None + +(47) Project [codegen id : 11] +Output [11]: [cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Input [13]: [cs_ship_mode_sk#175, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18, sm_ship_mode_sk#23] + +(48) HashAggregate [codegen id : 11] +Input [11]: [cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, d_moy#18] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] +Functions [24]: [partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#180, isEmpty#181, sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189, sum#190, isEmpty#191, sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227] +Results [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] + +(49) Exchange +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, 5), true, [id=#276] + +(50) HashAggregate [codegen id : 12] +Input [55]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275] +Keys [7]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, d_year#17] +Functions [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#277, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#278, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#279, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#280, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#281, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#282, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#283, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300] +Results [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, DHL,BARIAN AS ship_carriers#301, d_year#17 AS year#302, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#277 AS jan_sales#303, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#278 AS feb_sales#304, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#279 AS mar_sales#305, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#280 AS apr_sales#306, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#281 AS may_sales#307, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#282 AS jun_sales#308, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#283 AS jul_sales#309, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#284 AS aug_sales#310, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#285 AS sep_sales#311, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#286 AS oct_sales#312, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#287 AS nov_sales#313, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#178 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#288 AS dec_sales#314, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#289 AS jan_net#315, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#290 AS feb_net#316, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#291 AS mar_net#317, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#292 AS apr_net#318, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#293 AS may_net#319, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#294 AS jun_net#320, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#295 AS jul_net#321, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#296 AS aug_net#322, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#297 AS sep_net#323, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#298 AS oct_net#324, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#299 AS nov_net#325, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#179 as decimal(12,2))) * promote_precision(cast(cast(cs_quantity#177 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#300 AS dec_net#326] + +(51) Union + +(52) HashAggregate [codegen id : 13] +Input [32]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, jan_sales#149, feb_sales#150, mar_sales#151, apr_sales#152, may_sales#153, jun_sales#154, jul_sales#155, aug_sales#156, sep_sales#157, oct_sales#158, nov_sales#159, dec_sales#160, jan_net#161, feb_net#162, mar_net#163, apr_net#164, may_net#165, jun_net#166, jul_net#167, aug_net#168, sep_net#169, oct_net#170, nov_net#171, dec_net#172] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148] +Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)] +Aggregate Attributes [72]: [sum#327, isEmpty#328, sum#329, isEmpty#330, sum#331, isEmpty#332, sum#333, isEmpty#334, sum#335, isEmpty#336, sum#337, isEmpty#338, sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398] +Results [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] + +(53) Exchange +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] +Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, 5), true, [id=#471] + +(54) HashAggregate [codegen id : 14] +Input [80]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470] +Keys [8]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148] +Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)] +Aggregate Attributes [36]: [sum(jan_sales#149)#472, sum(feb_sales#150)#473, sum(mar_sales#151)#474, sum(apr_sales#152)#475, sum(may_sales#153)#476, sum(jun_sales#154)#477, sum(jul_sales#155)#478, sum(aug_sales#156)#479, sum(sep_sales#157)#480, sum(oct_sales#158)#481, sum(nov_sales#159)#482, sum(dec_sales#160)#483, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#484, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#485, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#486, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#487, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#488, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#489, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#490, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#491, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#492, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#493, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#494, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#495, sum(jan_net#161)#496, sum(feb_net#162)#497, sum(mar_net#163)#498, sum(apr_net#164)#499, sum(may_net#165)#500, sum(jun_net#166)#501, sum(jul_net#167)#502, sum(aug_net#168)#503, sum(sep_net#169)#504, sum(oct_net#170)#505, sum(nov_net#171)#506, sum(dec_net#172)#507] +Results [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, sum(jan_sales#149)#472 AS jan_sales#508, sum(feb_sales#150)#473 AS feb_sales#509, sum(mar_sales#151)#474 AS mar_sales#510, sum(apr_sales#152)#475 AS apr_sales#511, sum(may_sales#153)#476 AS may_sales#512, sum(jun_sales#154)#477 AS jun_sales#513, sum(jul_sales#155)#478 AS jul_sales#514, sum(aug_sales#156)#479 AS aug_sales#515, sum(sep_sales#157)#480 AS sep_sales#516, sum(oct_sales#158)#481 AS oct_sales#517, sum(nov_sales#159)#482 AS nov_sales#518, sum(dec_sales#160)#483 AS dec_sales#519, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#484 AS jan_sales_per_sq_foot#520, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#485 AS feb_sales_per_sq_foot#521, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#486 AS mar_sales_per_sq_foot#522, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#487 AS apr_sales_per_sq_foot#523, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#488 AS may_sales_per_sq_foot#524, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#489 AS jun_sales_per_sq_foot#525, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#490 AS jul_sales_per_sq_foot#526, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#491 AS aug_sales_per_sq_foot#527, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#492 AS sep_sales_per_sq_foot#528, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#493 AS oct_sales_per_sq_foot#529, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#494 AS nov_sales_per_sq_foot#530, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(cast(w_warehouse_sq_ft#10 as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true))#495 AS dec_sales_per_sq_foot#531, sum(jan_net#161)#496 AS jan_net#532, sum(feb_net#162)#497 AS feb_net#533, sum(mar_net#163)#498 AS mar_net#534, sum(apr_net#164)#499 AS apr_net#535, sum(may_net#165)#500 AS may_net#536, sum(jun_net#166)#501 AS jun_net#537, sum(jul_net#167)#502 AS jul_net#538, sum(aug_net#168)#503 AS aug_net#539, sum(sep_net#169)#504 AS sep_net#540, sum(oct_net#170)#505 AS oct_net#541, sum(nov_net#171)#506 AS nov_net#542, sum(dec_net#172)#507 AS dec_net#543] + +(55) TakeOrderedAndProject +Input [44]: [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, jan_sales#508, feb_sales#509, mar_sales#510, apr_sales#511, may_sales#512, jun_sales#513, jul_sales#514, aug_sales#515, sep_sales#516, oct_sales#517, nov_sales#518, dec_sales#519, jan_sales_per_sq_foot#520, feb_sales_per_sq_foot#521, mar_sales_per_sq_foot#522, apr_sales_per_sq_foot#523, may_sales_per_sq_foot#524, jun_sales_per_sq_foot#525, jul_sales_per_sq_foot#526, aug_sales_per_sq_foot#527, sep_sales_per_sq_foot#528, oct_sales_per_sq_foot#529, nov_sales_per_sq_foot#530, dec_sales_per_sq_foot#531, jan_net#532, feb_net#533, mar_net#534, apr_net#535, may_net#536, jun_net#537, jul_net#538, aug_net#539, sep_net#540, oct_net#541, nov_net#542, dec_net#543] +Arguments: 100, [w_warehouse_name#9 ASC NULLS FIRST], [w_warehouse_name#9, w_warehouse_sq_ft#10, w_city#11, w_county#12, w_state#13, w_country#14, ship_carriers#147, year#148, jan_sales#508, feb_sales#509, mar_sales#510, apr_sales#511, may_sales#512, jun_sales#513, jul_sales#514, aug_sales#515, sep_sales#516, oct_sales#517, nov_sales#518, dec_sales#519, jan_sales_per_sq_foot#520, feb_sales_per_sq_foot#521, mar_sales_per_sq_foot#522, apr_sales_per_sq_foot#523, ... 20 more fields] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/simplified.txt index d964b2833..ac7379973 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q66/simplified.txt @@ -1,75 +1,83 @@ -TakeOrderedAndProject [apr_net,apr_sales,apr_sales_per_sq_foot,aug_net,aug_sales,aug_sales_per_sq_foot,dec_net,dec_sales,dec_sales_per_sq_foot,feb_net,feb_sales,feb_sales_per_sq_foot,jan_net,jan_sales,jan_sales_per_sq_foot,jul_net,jul_sales,jul_sales_per_sq_foot,jun_net,jun_sales,jun_sales_per_sq_foot,mar_net,mar_sales,mar_sales_per_sq_foot,may_net,may_sales,may_sales_per_sq_foot,nov_net,nov_sales,nov_sales_per_sq_foot,oct_net,oct_sales,oct_sales_per_sq_foot,sep_net,sep_sales,sep_sales_per_sq_foot,ship_carriers,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] - WholeStageCodegen - HashAggregate [ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(apr_net),sum(apr_sales),sum(aug_net),sum(aug_sales),sum(dec_net),sum(dec_sales),sum(feb_net),sum(feb_sales),sum(jan_net),sum(jan_sales),sum(jul_net),sum(jul_sales),sum(jun_net),sum(jun_sales),sum(mar_net),sum(mar_sales),sum(may_net),sum(may_sales),sum(nov_net),sum(nov_sales),sum(oct_net),sum(oct_sales),sum(sep_net),sum(sep_sales),w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] [apr_net,apr_sales,apr_sales_per_sq_foot,aug_net,aug_sales,aug_sales_per_sq_foot,dec_net,dec_sales,dec_sales_per_sq_foot,feb_net,feb_sales,feb_sales_per_sq_foot,jan_net,jan_sales,jan_sales_per_sq_foot,jul_net,jul_sales,jul_sales_per_sq_foot,jun_net,jun_sales,jun_sales_per_sq_foot,mar_net,mar_sales,mar_sales_per_sq_foot,may_net,may_sales,may_sales_per_sq_foot,nov_net,nov_sales,nov_sales_per_sq_foot,oct_net,oct_sales,oct_sales_per_sq_foot,sep_net,sep_sales,sep_sales_per_sq_foot,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12))),sum(apr_net),sum(apr_sales),sum(aug_net),sum(aug_sales),sum(dec_net),sum(dec_sales),sum(feb_net),sum(feb_sales),sum(jan_net),sum(jan_sales),sum(jul_net),sum(jul_sales),sum(jun_net),sum(jun_sales),sum(mar_net),sum(mar_sales),sum(may_net),sum(may_sales),sum(nov_net),sum(nov_sales),sum(oct_net),sum(oct_sales),sum(sep_net),sum(sep_sales)] +TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] + WholeStageCodegen (14) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(cast(w_warehouse_sq_ft as decimal(10,0)) as decimal(28,2)))), DecimalType(38,12), true)),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [ship_carriers,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] #1 - WholeStageCodegen - HashAggregate [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,year] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] + Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1 + WholeStageCodegen (13) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter Union - WholeStageCodegen - HashAggregate [d_year,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),year] + WholeStageCodegen (6) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] #2 - WholeStageCodegen - HashAggregate [d_moy,d_year,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity] - BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk] - Project [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk] - BroadcastHashJoin [t_time_sk,ws_sold_time_sk] - Project [d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_time_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft,ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk] - BroadcastHashJoin [w_warehouse_sk,ws_warehouse_sk] - Project [ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] - Filter [ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] [ws_ext_sales_price,ws_net_paid,ws_quantity,ws_ship_mode_sk,ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk] + Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2 + WholeStageCodegen (5) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,ws_ext_sales_price,ws_quantity,ws_net_paid] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Project [ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk] + Project [ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] + Project [ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] + BroadcastHashJoin [ws_warehouse_sk,w_warehouse_sk] + Filter [ws_warehouse_sk,ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk,d_moy,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (3) Project [t_time_sk] Filter [t_time,t_time_sk] - Scan parquet default.time_dim [t_time,t_time_sk] [t_time,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_time] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (4) Project [sm_ship_mode_sk] Filter [sm_carrier,sm_ship_mode_sk] - Scan parquet default.ship_mode [sm_carrier,sm_ship_mode_sk] [sm_carrier,sm_ship_mode_sk] - WholeStageCodegen - HashAggregate [d_year,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [apr_net,apr_sales,aug_net,aug_sales,dec_net,dec_sales,feb_net,feb_sales,jan_net,jan_sales,jul_net,jul_sales,jun_net,jun_sales,mar_net,mar_sales,may_net,may_sales,nov_net,nov_sales,oct_net,oct_sales,sep_net,sep_sales,ship_carriers,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),year] + ColumnarToRow + InputAdapter + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_carrier] + WholeStageCodegen (12) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter - Exchange [d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] #7 - WholeStageCodegen - HashAggregate [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,d_moy,d_year,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] + Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #7 + WholeStageCodegen (11) + HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy,cs_sales_price,cs_quantity,cs_net_paid_inc_tax] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Project [cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] - Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] + Project [cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] BroadcastHashJoin [cs_sold_time_sk,t_time_sk] - Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_time_sk,d_moy,d_year,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] + Project [cs_sold_time_sk,cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,d_moy] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sq_ft] + Project [cs_sold_date_sk,cs_sold_time_sk,cs_ship_mode_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] - Filter [cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] - Scan parquet default.catalog_sales [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] [cs_net_paid_inc_tax,cs_quantity,cs_sales_price,cs_ship_mode_sk,cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk] + Filter [cs_warehouse_sk,cs_sold_date_sk,cs_sold_time_sk,cs_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax] InputAdapter - ReusedExchange [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] [w_city,w_country,w_county,w_state,w_warehouse_name,w_warehouse_sk,w_warehouse_sq_ft] #3 + ReusedExchange [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] #3 InputAdapter - ReusedExchange [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] #4 + ReusedExchange [d_date_sk,d_year,d_moy] #4 InputAdapter - ReusedExchange [t_time_sk] [t_time_sk] #5 + ReusedExchange [t_time_sk] #5 InputAdapter - ReusedExchange [sm_ship_mode_sk] [sm_ship_mode_sk] #6 + ReusedExchange [sm_ship_mode_sk] #6 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/explain.txt index 52478acb0..ae133938b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/explain.txt @@ -1,31 +1,175 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_category#1 ASC NULLS FIRST,i_class#2 ASC NULLS FIRST,i_brand#3 ASC NULLS FIRST,i_product_name#4 ASC NULLS FIRST,d_year#5 ASC NULLS FIRST,d_qoy#6 ASC NULLS FIRST,d_moy#7 ASC NULLS FIRST,s_store_id#8 ASC NULLS FIRST,sumsales#9 ASC NULLS FIRST,rk#10 ASC NULLS FIRST], output=[i_category#1,i_class#2,i_brand#3,i_product_name#4,d_year#5,d_qoy#6,d_moy#7,s_store_id#8,sumsales#9,rk#10]) -+- *(7) Filter (isnotnull(rk#10) && (rk#10 <= 100)) - +- Window [rank(sumsales#9) windowspecdefinition(i_category#1, sumsales#9 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#10], [i_category#1], [sumsales#9 DESC NULLS LAST] - +- *(6) Sort [i_category#1 ASC NULLS FIRST, sumsales#9 DESC NULLS LAST], false, 0 - +- Exchange hashpartitioning(i_category#1, 5) - +- *(5) HashAggregate(keys=[i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11], functions=[sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#12 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00))]) - +- Exchange hashpartitioning(i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11, 5) - +- *(4) HashAggregate(keys=[i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11], functions=[partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#12 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#13 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00))]) - +- *(4) Expand [List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, d_qoy#19, d_moy#20, s_store_id#21, 0), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, d_qoy#19, d_moy#20, null, 1), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, d_qoy#19, null, null, 3), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, d_year#18, null, null, null, 7), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, i_product_name#17, null, null, null, null, 15), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, i_brand#16, null, null, null, null, null, 31), List(ss_quantity#13, ss_sales_price#12, i_category#14, i_class#15, null, null, null, null, null, null, 63), List(ss_quantity#13, ss_sales_price#12, i_category#14, null, null, null, null, null, null, null, 127), List(ss_quantity#13, ss_sales_price#12, null, null, null, null, null, null, null, null, 255)], [ss_quantity#13, ss_sales_price#12, i_category#1, i_class#2, i_brand#3, i_product_name#4, d_year#5, d_qoy#6, d_moy#7, s_store_id#8, spark_grouping_id#11] - +- *(4) Project [ss_quantity#13, ss_sales_price#12, d_year#18, d_moy#20, d_qoy#19, i_category#22 AS i_category#14, i_class#23 AS i_class#15, i_brand#24 AS i_brand#16, i_product_name#25 AS i_product_name#17, d_year#18, d_qoy#19, d_moy#20, s_store_id#26 AS s_store_id#21] - +- *(4) BroadcastHashJoin [ss_item_sk#27], [i_item_sk#28], Inner, BuildRight - :- *(4) Project [ss_item_sk#27, ss_quantity#13, ss_sales_price#12, d_year#18, d_moy#20, d_qoy#19, s_store_id#26] - : +- *(4) BroadcastHashJoin [ss_store_sk#29], [s_store_sk#30], Inner, BuildRight - : :- *(4) Project [ss_item_sk#27, ss_store_sk#29, ss_quantity#13, ss_sales_price#12, d_year#18, d_moy#20, d_qoy#19] - : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight - : : :- *(4) Project [ss_sold_date_sk#31, ss_item_sk#27, ss_store_sk#29, ss_quantity#13, ss_sales_price#12] - : : : +- *(4) Filter ((isnotnull(ss_sold_date_sk#31) && isnotnull(ss_store_sk#29)) && isnotnull(ss_item_sk#27)) - : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#31,ss_item_sk#27,ss_store_sk#29,ss_quantity#13,ss_sales_price#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)], ReadSchema: struct= 1200)) && (d_month_seq#33 <= 1211)) && isnotnull(d_date_sk#32)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#32,d_month_seq#33,d_year#18,d_moy#20,d_qoy#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [s_store_sk#30, s_store_id#26] - : +- *(2) Filter isnotnull(s_store_sk#30) - : +- *(2) FileScan parquet default.store[s_store_sk#30,s_store_id#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [i_item_sk#28, i_brand#24, i_class#23, i_category#22, i_product_name#25] - +- *(3) Filter isnotnull(i_item_sk#28) - +- *(3) FileScan parquet default.item[i_item_sk#28,i_brand#24,i_class#23,i_category#22,i_product_name#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (31) ++- * Filter (30) + +- Window (29) + +- * Sort (28) + +- Exchange (27) + +- * HashAggregate (26) + +- Exchange (25) + +- * HashAggregate (24) + +- * Expand (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.item (17) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(6) Filter [codegen id : 1] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#6, d_month_seq#7, d_year#8, d_moy#9, d_qoy#10] + +(8) BroadcastExchange +Input [4]: [d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_date_sk#6, d_year#8, d_moy#9, d_qoy#10] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#12, s_store_id#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#12, s_store_id#13] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#12, s_store_id#13] +Condition : isnotnull(s_store_sk#12) + +(14) BroadcastExchange +Input [2]: [s_store_sk#12, s_store_id#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(16) Project [codegen id : 4] +Output [7]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13] +Input [9]: [ss_item_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_sk#12, s_store_id#13] + +(17) Scan parquet default.item +Output [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] + +(19) Filter [codegen id : 3] +Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Condition : isnotnull(i_item_sk#15) + +(20) BroadcastExchange +Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#15] +Join condition: None + +(22) Project [codegen id : 4] +Output [10]: [ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Input [12]: [ss_item_sk#2, ss_quantity#4, ss_sales_price#5, d_year#8, d_moy#9, d_qoy#10, s_store_id#13, i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] + +(23) Expand [codegen id : 4] +Input [10]: [ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13] +Arguments: [List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#13, 0), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, null, 1), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, null, null, 3), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, null, null, null, 7), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, i_product_name#19, null, null, null, null, 15), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, i_brand#16, null, null, null, null, null, 31), List(ss_quantity#4, ss_sales_price#5, i_category#18, i_class#17, null, null, null, null, null, null, 63), List(ss_quantity#4, ss_sales_price#5, i_category#18, null, null, null, null, null, null, null, 127), List(ss_quantity#4, ss_sales_price#5, null, null, null, null, null, null, null, null, 255)], [ss_quantity#4, ss_sales_price#5, i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] + +(24) HashAggregate [codegen id : 4] +Input [11]: [ss_quantity#4, ss_sales_price#5, i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] +Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] +Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [2]: [sum#30, isEmpty#31] +Results [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] + +(25) Exchange +Input [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] +Arguments: hashpartitioning(i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, 5), true, [id=#34] + +(26) HashAggregate [codegen id : 5] +Input [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] +Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] +Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35] +Results [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#5 as decimal(12,2))) * promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00))#35 AS sumsales#36] + +(27) Exchange +Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] +Arguments: hashpartitioning(i_category#21, 5), true, [id=#37] + +(28) Sort [codegen id : 6] +Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] +Arguments: [i_category#21 ASC NULLS FIRST, sumsales#36 DESC NULLS LAST], false, 0 + +(29) Window +Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] +Arguments: [rank(sumsales#36) windowspecdefinition(i_category#21, sumsales#36 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#38], [i_category#21], [sumsales#36 DESC NULLS LAST] + +(30) Filter [codegen id : 7] +Input [10]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] +Condition : (isnotnull(rk#38) AND (rk#38 <= 100)) + +(31) TakeOrderedAndProject +Input [10]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] +Arguments: 100, [i_category#21 ASC NULLS FIRST, i_class#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, i_product_name#24 ASC NULLS FIRST, d_year#25 ASC NULLS FIRST, d_qoy#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST, s_store_id#28 ASC NULLS FIRST, sumsales#36 ASC NULLS FIRST, rk#38 ASC NULLS FIRST], [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt index 49dac167f..b343c2d02 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q67/simplified.txt @@ -1,43 +1,48 @@ -TakeOrderedAndProject [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,rk,s_store_id,sumsales] - WholeStageCodegen +TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sumsales,rk] + WholeStageCodegen (7) Filter [rk] InputAdapter - Window [i_category,sumsales] - WholeStageCodegen + Window [sumsales,i_category] + WholeStageCodegen (6) Sort [i_category,sumsales] InputAdapter Exchange [i_category] #1 - WholeStageCodegen - HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id,sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00))] [sum,sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales] + WholeStageCodegen (5) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true), 0.00)),sumsales,sum,isEmpty] InputAdapter - Exchange [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id] #2 - WholeStageCodegen - HashAggregate [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,spark_grouping_id,ss_quantity,ss_sales_price,sum,sum] [sum,sum] - Expand [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] - Project [d_moy,d_qoy,d_year,i_brand,i_category,i_class,i_product_name,s_store_id,ss_quantity,ss_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [d_moy,d_qoy,d_year,s_store_id,ss_item_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_moy,d_qoy,d_year,ss_item_sk,ss_quantity,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,ss_sales_price,ss_quantity] [sum,isEmpty,sum,isEmpty] + Expand [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] + Project [ss_quantity,ss_sales_price,i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,d_year,d_moy,d_qoy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_moy,d_qoy,d_year] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] [d_date_sk,d_month_seq,d_moy,d_qoy,d_year] + WholeStageCodegen (1) + Project [d_date_sk,d_year,d_moy,d_qoy] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [s_store_id,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [i_brand,i_category,i_class,i_item_sk,i_product_name] - Filter [i_item_sk] - Scan parquet default.item [i_brand,i_category,i_class,i_item_sk,i_product_name] [i_brand,i_category,i_class,i_item_sk,i_product_name] + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt index 43c255c55..d0c618bdb 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/explain.txt @@ -1,41 +1,241 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,ss_ticket_number#2 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#3,ca_city#4,bought_city#5,ss_ticket_number#2,extended_price#6,extended_tax#7,list_price#8]) -+- *(8) Project [c_last_name#1, c_first_name#3, ca_city#4, bought_city#5, ss_ticket_number#2, extended_price#6, extended_tax#7, list_price#8] - +- *(8) BroadcastHashJoin [c_current_addr_sk#9], [ca_address_sk#10], Inner, BuildRight, NOT (ca_city#4 = bought_city#5) - :- *(8) Project [ss_ticket_number#2, bought_city#5, extended_price#6, list_price#8, extended_tax#7, c_current_addr_sk#9, c_first_name#3, c_last_name#1] - : +- *(8) BroadcastHashJoin [ss_customer_sk#11], [c_customer_sk#12], Inner, BuildRight - : :- *(8) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[sum(UnscaledValue(ss_ext_sales_price#14)), sum(UnscaledValue(ss_ext_list_price#15)), sum(UnscaledValue(ss_ext_tax#16))]) - : : +- Exchange hashpartitioning(ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4, 5) - : : +- *(5) HashAggregate(keys=[ss_ticket_number#2, ss_customer_sk#11, ss_addr_sk#13, ca_city#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#14)), partial_sum(UnscaledValue(ss_ext_list_price#15)), partial_sum(UnscaledValue(ss_ext_tax#16))]) - : : +- *(5) Project [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16, ca_city#4] - : : +- *(5) BroadcastHashJoin [ss_addr_sk#13], [ca_address_sk#10], Inner, BuildRight - : : :- *(5) Project [ss_customer_sk#11, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] - : : : +- *(5) BroadcastHashJoin [ss_hdemo_sk#17], [hd_demo_sk#18], Inner, BuildRight - : : : :- *(5) Project [ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] - : : : : +- *(5) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#20], Inner, BuildRight - : : : : :- *(5) Project [ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_store_sk#19, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] - : : : : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight - : : : : : :- *(5) Project [ss_sold_date_sk#21, ss_customer_sk#11, ss_hdemo_sk#17, ss_addr_sk#13, ss_store_sk#19, ss_ticket_number#2, ss_ext_sales_price#14, ss_ext_list_price#15, ss_ext_tax#16] - : : : : : : +- *(5) Filter ((((isnotnull(ss_sold_date_sk#21) && isnotnull(ss_store_sk#19)) && isnotnull(ss_hdemo_sk#17)) && isnotnull(ss_addr_sk#13)) && isnotnull(ss_customer_sk#11)) - : : : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#21,ss_customer_sk#11,ss_hdemo_sk#17,ss_addr_sk#13,ss_store_sk#19,ss_ticket_number#2,ss_ext_sales_price#14,ss_ext_list_price#15,ss_ext_tax#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk..., ReadSchema: struct= 1)) && (d_dom#23 <= 2)) && d_year#24 IN (1999,2000,2001)) && isnotnull(d_date_sk#22)) - : : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#22,d_year#24,d_dom#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [s_store_sk#20] - : : : : +- *(2) Filter (s_city#25 IN (Midway,Fairview) && isnotnull(s_store_sk#20)) - : : : : +- *(2) FileScan parquet default.store[s_store_sk#20,s_city#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [hd_demo_sk#18] - : : : +- *(3) Filter (((hd_dep_count#26 = 4) || (hd_vehicle_count#27 = 3)) && isnotnull(hd_demo_sk#18)) - : : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#18,hd_dep_count#26,hd_vehicle_count#27] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [ca_address_sk#10, ca_city#4] - : : +- *(4) Filter (isnotnull(ca_address_sk#10) && isnotnull(ca_city#4)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_city#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [c_customer_sk#12, c_current_addr_sk#9, c_first_name#3, c_last_name#1] - : +- *(6) Filter (isnotnull(c_customer_sk#12) && isnotnull(c_current_addr_sk#9)) - : +- *(6) FileScan parquet default.customer[c_customer_sk#12,c_current_addr_sk#9,c_first_name#3,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)], ReadSchema: struct - +- ReusedExchange [ca_address_sk#10, ca_city#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (43) ++- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (33) + : : +- Exchange (32) + : : +- * HashAggregate (31) + : : +- * Project (30) + : : +- * BroadcastHashJoin Inner BuildRight (29) + : : :- * Project (24) + : : : +- * BroadcastHashJoin Inner BuildRight (23) + : : : :- * Project (17) + : : : : +- * BroadcastHashJoin Inner BuildRight (16) + : : : : :- * Project (10) + : : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (8) + : : : : : +- * Project (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.date_dim (4) + : : : : +- BroadcastExchange (15) + : : : : +- * Project (14) + : : : : +- * Filter (13) + : : : : +- * ColumnarToRow (12) + : : : : +- Scan parquet default.store (11) + : : : +- BroadcastExchange (22) + : : : +- * Project (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.household_demographics (18) + : : +- BroadcastExchange (28) + : : +- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.customer_address (25) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.customer (34) + +- ReusedExchange (40) + + +(1) Scan parquet default.store_sales +Output [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] + +(3) Filter [codegen id : 5] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Condition : ((((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_dom#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] +Condition : ((((isnotnull(d_dom#12) AND (d_dom#12 >= 1)) AND (d_dom#12 <= 2)) AND d_year#11 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_dom#12] + +(8) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(10) Project [codegen id : 5] +Output [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [10]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, d_date_sk#10] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#14, s_city#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_city, [Midway,Fairview]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#14, s_city#15] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#14, s_city#15] +Condition : (s_city#15 IN (Midway,Fairview) AND isnotnull(s_store_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#14] +Input [2]: [s_store_sk#14, s_city#15] + +(15) BroadcastExchange +Input [1]: [s_store_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [9]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, s_store_sk#14] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Condition : (((hd_dep_count#18 = 4) OR (hd_vehicle_count#19 = 3)) AND isnotnull(hd_demo_sk#17)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#17] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#17] +Join condition: None + +(24) Project [codegen id : 5] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, hd_demo_sk#17] + +(25) Scan parquet default.customer_address +Output [2]: [ca_address_sk#21, ca_city#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#21, ca_city#22] + +(27) Filter [codegen id : 4] +Input [2]: [ca_address_sk#21, ca_city#22] +Condition : (isnotnull(ca_address_sk#21) AND isnotnull(ca_city#22)) + +(28) BroadcastExchange +Input [2]: [ca_address_sk#21, ca_city#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_addr_sk#4] +Right keys [1]: [ca_address_sk#21] +Join condition: None + +(30) Project [codegen id : 5] +Output [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_city#22] +Input [8]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_address_sk#21, ca_city#22] + +(31) HashAggregate [codegen id : 5] +Input [7]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_ext_sales_price#7, ss_ext_list_price#8, ss_ext_tax#9, ca_city#22] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#7)), partial_sum(UnscaledValue(ss_ext_list_price#8)), partial_sum(UnscaledValue(ss_ext_tax#9))] +Aggregate Attributes [3]: [sum#24, sum#25, sum#26] +Results [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] + +(32) Exchange +Input [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, 5), true, [id=#30] + +(33) HashAggregate [codegen id : 8] +Input [7]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22, sum#27, sum#28, sum#29] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, ca_city#22] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#7)), sum(UnscaledValue(ss_ext_list_price#8)), sum(UnscaledValue(ss_ext_tax#9))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#7))#31, sum(UnscaledValue(ss_ext_list_price#8))#32, sum(UnscaledValue(ss_ext_tax#9))#33] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ca_city#22 AS bought_city#34, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#31,17,2) AS extended_price#35, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#8))#32,17,2) AS list_price#36, MakeDecimal(sum(UnscaledValue(ss_ext_tax#9))#33,17,2) AS extended_tax#37] + +(34) Scan parquet default.customer +Output [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] + +(36) Filter [codegen id : 6] +Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Condition : (isnotnull(c_customer_sk#38) AND isnotnull(c_current_addr_sk#39)) + +(37) BroadcastExchange +Input [4]: [c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#42] + +(38) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#38] +Join condition: None + +(39) Project [codegen id : 8] +Output [8]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#39, c_first_name#40, c_last_name#41] +Input [10]: [ss_ticket_number#6, ss_customer_sk#2, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_customer_sk#38, c_current_addr_sk#39, c_first_name#40, c_last_name#41] + +(40) ReusedExchange [Reuses operator id: 28] +Output [2]: [ca_address_sk#21, ca_city#22] + +(41) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [c_current_addr_sk#39] +Right keys [1]: [ca_address_sk#21] +Join condition: NOT (ca_city#22 = bought_city#34) + +(42) Project [codegen id : 8] +Output [8]: [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] +Input [10]: [ss_ticket_number#6, bought_city#34, extended_price#35, list_price#36, extended_tax#37, c_current_addr_sk#39, c_first_name#40, c_last_name#41, ca_address_sk#21, ca_city#22] + +(43) TakeOrderedAndProject +Input [8]: [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] +Arguments: 100, [c_last_name#41 ASC NULLS FIRST, ss_ticket_number#6 ASC NULLS FIRST], [c_last_name#41, c_first_name#40, ca_city#22, bought_city#34, ss_ticket_number#6, extended_price#35, extended_tax#37, list_price#36] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt index 579970f83..819e5504f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q68/simplified.txt @@ -1,54 +1,63 @@ -TakeOrderedAndProject [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] - WholeStageCodegen - Project [bought_city,c_first_name,c_last_name,ca_city,extended_price,extended_tax,list_price,ss_ticket_number] - BroadcastHashJoin [bought_city,c_current_addr_sk,ca_address_sk,ca_city] - Project [bought_city,c_current_addr_sk,c_first_name,c_last_name,extended_price,extended_tax,list_price,ss_ticket_number] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] [bought_city,extended_price,extended_tax,list_price,sum,sum,sum,sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_tax))] +TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_city,extended_price,extended_tax,list_price] + WholeStageCodegen (8) + Project [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_number,extended_price,extended_tax,list_price] + BroadcastHashJoin [c_current_addr_sk,ca_address_sk,ca_city,bought_city] + Project [ss_ticket_number,bought_city,extended_price,list_price,extended_tax,c_current_addr_sk,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,sum,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_ext_list_price)),sum(UnscaledValue(ss_ext_tax)),bought_city,extended_price,list_price,extended_tax,sum,sum,sum] InputAdapter - Exchange [ca_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 - WholeStageCodegen - HashAggregate [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [ca_city,ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] - BroadcastHashJoin [ca_address_sk,ss_addr_sk] - Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_ticket_number] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_ticket_number] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_store_sk,ss_ticket_number] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_addr_sk,ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_customer_sk,ss_ext_list_price,ss_ext_sales_price,ss_ext_tax,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city] #1 + WholeStageCodegen (5) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] [sum,sum,sum,sum,sum,sum] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ca_city] + BroadcastHashJoin [ss_addr_sk,ca_address_sk] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_dom,d_year] - Scan parquet default.date_dim [d_date_sk,d_dom,d_year] [d_date_sk,d_dom,d_year] + Filter [d_dom,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_dom] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [s_store_sk] Filter [s_city,s_store_sk] - Scan parquet default.store [s_city,s_store_sk] [s_city,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_city] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [ca_address_sk,ca_city] - Filter [ca_address_sk,ca_city] - Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] + WholeStageCodegen (4) + Filter [ca_address_sk,ca_city] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] [c_current_addr_sk,c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen (6) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name] InputAdapter - ReusedExchange [ca_address_sk,ca_city] [ca_address_sk,ca_city] #5 + ReusedExchange [ca_address_sk,ca_city] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt index 9a777a055..b21892546 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/explain.txt @@ -1,48 +1,274 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[cd_gender#1 ASC NULLS FIRST,cd_marital_status#2 ASC NULLS FIRST,cd_education_status#3 ASC NULLS FIRST,cd_purchase_estimate#4 ASC NULLS FIRST,cd_credit_rating#5 ASC NULLS FIRST], output=[cd_gender#1,cd_marital_status#2,cd_education_status#3,cnt1#6,cd_purchase_estimate#4,cnt2#7,cd_credit_rating#5,cnt3#8]) -+- *(10) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[count(1)]) - +- Exchange hashpartitioning(cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5, 5) - +- *(9) HashAggregate(keys=[cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5], functions=[partial_count(1)]) - +- *(9) Project [cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] - +- *(9) BroadcastHashJoin [c_current_cdemo_sk#9], [cd_demo_sk#10], Inner, BuildRight - :- *(9) Project [c_current_cdemo_sk#9] - : +- *(9) BroadcastHashJoin [c_current_addr_sk#11], [ca_address_sk#12], Inner, BuildRight - : :- *(9) Project [c_current_cdemo_sk#9, c_current_addr_sk#11] - : : +- *(9) BroadcastHashJoin [c_customer_sk#13], [cs_ship_customer_sk#14], LeftAnti, BuildRight - : : :- *(9) BroadcastHashJoin [c_customer_sk#13], [ws_bill_customer_sk#15], LeftAnti, BuildRight - : : : :- *(9) BroadcastHashJoin [c_customer_sk#13], [ss_customer_sk#16], LeftSemi, BuildRight - : : : : :- *(9) Project [c_customer_sk#13, c_current_cdemo_sk#9, c_current_addr_sk#11] - : : : : : +- *(9) Filter (isnotnull(c_current_addr_sk#11) && isnotnull(c_current_cdemo_sk#9)) - : : : : : +- *(9) FileScan parquet default.customer[c_customer_sk#13,c_current_cdemo_sk#9,c_current_addr_sk#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [ss_customer_sk#16] - : : : : +- *(2) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : : : : :- *(2) Project [ss_sold_date_sk#17, ss_customer_sk#16] - : : : : : +- *(2) Filter isnotnull(ss_sold_date_sk#17) - : : : : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(1) Project [d_date_sk#18] - : : : : +- *(1) Filter (((((isnotnull(d_year#19) && isnotnull(d_moy#20)) && (d_year#19 = 2001)) && (d_moy#20 >= 4)) && (d_moy#20 <= 6)) && isnotnull(d_date_sk#18)) - : : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_year#19,d_moy#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThan..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [ws_bill_customer_sk#15] - : : : +- *(4) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#18], Inner, BuildRight - : : : :- *(4) Project [ws_sold_date_sk#21, ws_bill_customer_sk#15] - : : : : +- *(4) Filter isnotnull(ws_sold_date_sk#21) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [cs_ship_customer_sk#14] - : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#22], [d_date_sk#18], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#22, cs_ship_customer_sk#14] - : : : +- *(6) Filter isnotnull(cs_sold_date_sk#22) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#22,cs_ship_customer_sk#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [ca_address_sk#12] - : +- *(7) Filter (ca_state#23 IN (KY,GA,NM) && isnotnull(ca_address_sk#12)) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#12,ca_state#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [In(ca_state, [KY,GA,NM]), IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [cd_demo_sk#10, cd_gender#1, cd_marital_status#2, cd_education_status#3, cd_purchase_estimate#4, cd_credit_rating#5] - +- *(8) Filter isnotnull(cd_demo_sk#10) - +- *(8) FileScan parquet default.customer_demographics[cd_demo_sk#10,cd_gender#1,cd_marital_status#2,cd_education_status#3,cd_purchase_estimate#4,cd_credit_rating#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(3) Filter [codegen id : 9] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] +Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) + +(4) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#4, ss_customer_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ss_sold_date_sk#4, ss_customer_sk#5] + +(6) Filter [codegen id : 2] +Input [2]: [ss_sold_date_sk#4, ss_customer_sk#5] +Condition : isnotnull(ss_sold_date_sk#4) + +(7) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_moy#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,6), IsNotNull(d_date_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(9) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] +Condition : (((((isnotnull(d_year#7) AND isnotnull(d_moy#8)) AND (d_year#7 = 2001)) AND (d_moy#8 >= 4)) AND (d_moy#8 <= 6)) AND isnotnull(d_date_sk#6)) + +(10) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_moy#8] + +(11) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(12) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#4] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(13) Project [codegen id : 2] +Output [1]: [ss_customer_sk#5] +Input [3]: [ss_sold_date_sk#4, ss_customer_sk#5, d_date_sk#6] + +(14) BroadcastExchange +Input [1]: [ss_customer_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#5] +Join condition: None + +(16) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 4] +Input [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] + +(18) Filter [codegen id : 4] +Input [2]: [ws_sold_date_sk#11, ws_bill_customer_sk#12] +Condition : isnotnull(ws_sold_date_sk#11) + +(19) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#6] + +(20) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_date_sk#11] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(21) Project [codegen id : 4] +Output [1]: [ws_bill_customer_sk#12] +Input [3]: [ws_sold_date_sk#11, ws_bill_customer_sk#12, d_date_sk#6] + +(22) BroadcastExchange +Input [1]: [ws_bill_customer_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(23) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#12] +Join condition: None + +(24) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 6] +Input [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] + +(26) Filter [codegen id : 6] +Input [2]: [cs_sold_date_sk#14, cs_ship_customer_sk#15] +Condition : isnotnull(cs_sold_date_sk#14) + +(27) ReusedExchange [Reuses operator id: 11] +Output [1]: [d_date_sk#6] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(29) Project [codegen id : 6] +Output [1]: [cs_ship_customer_sk#15] +Input [3]: [cs_sold_date_sk#14, cs_ship_customer_sk#15, d_date_sk#6] + +(30) BroadcastExchange +Input [1]: [cs_ship_customer_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(31) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [cs_ship_customer_sk#15] +Join condition: None + +(32) Project [codegen id : 9] +Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] +Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] + +(33) Scan parquet default.customer_address +Output [2]: [ca_address_sk#17, ca_state#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [In(ca_state, [KY,GA,NM]), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] + +(35) Filter [codegen id : 7] +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : (ca_state#18 IN (KY,GA,NM) AND isnotnull(ca_address_sk#17)) + +(36) Project [codegen id : 7] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_state#18] + +(37) BroadcastExchange +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_addr_sk#3] +Right keys [1]: [ca_address_sk#17] +Join condition: None + +(39) Project [codegen id : 9] +Output [1]: [c_current_cdemo_sk#2] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17] + +(40) Scan parquet default.customer_demographics +Output [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] + +(42) Filter [codegen id : 8] +Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Condition : isnotnull(cd_demo_sk#20) + +(43) BroadcastExchange +Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] + +(44) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#20] +Join condition: None + +(45) Project [codegen id : 9] +Output [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] + +(46) HashAggregate [codegen id : 9] +Input [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Keys [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#27] +Results [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] + +(47) Exchange +Input [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] +Arguments: hashpartitioning(cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, 5), true, [id=#29] + +(48) HashAggregate [codegen id : 10] +Input [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] +Keys [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#30] +Results [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, count(1)#30 AS cnt1#31, cd_purchase_estimate#24, count(1)#30 AS cnt2#32, cd_credit_rating#25, count(1)#30 AS cnt3#33] + +(49) TakeOrderedAndProject +Input [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#31, cd_purchase_estimate#24, cnt2#32, cd_credit_rating#25, cnt3#33] +Arguments: 100, [cd_gender#21 ASC NULLS FIRST, cd_marital_status#22 ASC NULLS FIRST, cd_education_status#23 ASC NULLS FIRST, cd_purchase_estimate#24 ASC NULLS FIRST, cd_credit_rating#25 ASC NULLS FIRST], [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#31, cd_purchase_estimate#24, cnt2#32, cd_credit_rating#25, cnt3#33] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/simplified.txt index be4dce4b0..9e34940da 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q69/simplified.txt @@ -1,64 +1,73 @@ -TakeOrderedAndProject [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,cnt1,cnt2,cnt3] - WholeStageCodegen - HashAggregate [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count,count(1)] [cnt1,cnt2,cnt3,count,count(1)] +TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cnt1,cnt2,cnt3] + WholeStageCodegen (10) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,count] [count(1),cnt1,cnt2,cnt3,count] InputAdapter - Exchange [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] #1 - WholeStageCodegen - HashAggregate [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate,count,count] [count,count] - Project [cd_credit_rating,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + Exchange [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] #1 + WholeStageCodegen (9) + HashAggregate [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] [count,count] + Project [cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] Project [c_current_cdemo_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_current_cdemo_sk] + Project [c_current_cdemo_sk,c_current_addr_sk] BroadcastHashJoin [c_customer_sk,cs_ship_customer_sk] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] - Filter [c_current_addr_sk,c_current_cdemo_sk] - Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] [c_current_addr_sk,c_current_cdemo_sk,c_customer_sk] + Filter [c_current_addr_sk,c_current_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (2) Project [ss_customer_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (4) Project [ws_bill_customer_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_customer_sk,ws_sold_date_sk] - Filter [ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (6) Project [cs_ship_customer_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ship_customer_sk,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk] [cs_ship_customer_sk,cs_sold_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_customer_sk] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (7) Project [ca_address_sk] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [cd_credit_rating,cd_demo_sk,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] - Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_credit_rating,cd_demo_sk,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] [cd_credit_rating,cd_demo_sk,cd_education_status,cd_gender,cd_marital_status,cd_purchase_estimate] + WholeStageCodegen (8) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/explain.txt index f1c2c24b7..24546c68f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/explain.txt @@ -1,32 +1,193 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,agg1#2,agg2#3,agg3#4,agg4#5]) -+- *(6) HashAggregate(keys=[i_item_id#1], functions=[avg(cast(ss_quantity#6 as bigint)), avg(UnscaledValue(ss_list_price#7)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#9))]) - +- Exchange hashpartitioning(i_item_id#1, 5) - +- *(5) HashAggregate(keys=[i_item_id#1], functions=[partial_avg(cast(ss_quantity#6 as bigint)), partial_avg(UnscaledValue(ss_list_price#7)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#9))]) - +- *(5) Project [ss_quantity#6, ss_list_price#7, ss_sales_price#9, ss_coupon_amt#8, i_item_id#1] - +- *(5) BroadcastHashJoin [ss_promo_sk#10], [p_promo_sk#11], Inner, BuildRight - :- *(5) Project [ss_promo_sk#10, ss_quantity#6, ss_list_price#7, ss_sales_price#9, ss_coupon_amt#8, i_item_id#1] - : +- *(5) BroadcastHashJoin [ss_item_sk#12], [i_item_sk#13], Inner, BuildRight - : :- *(5) Project [ss_item_sk#12, ss_promo_sk#10, ss_quantity#6, ss_list_price#7, ss_sales_price#9, ss_coupon_amt#8] - : : +- *(5) BroadcastHashJoin [ss_sold_date_sk#14], [d_date_sk#15], Inner, BuildRight - : : :- *(5) Project [ss_sold_date_sk#14, ss_item_sk#12, ss_promo_sk#10, ss_quantity#6, ss_list_price#7, ss_sales_price#9, ss_coupon_amt#8] - : : : +- *(5) BroadcastHashJoin [ss_cdemo_sk#16], [cd_demo_sk#17], Inner, BuildRight - : : : :- *(5) Project [ss_sold_date_sk#14, ss_item_sk#12, ss_cdemo_sk#16, ss_promo_sk#10, ss_quantity#6, ss_list_price#7, ss_sales_price#9, ss_coupon_amt#8] - : : : : +- *(5) Filter (((isnotnull(ss_cdemo_sk#16) && isnotnull(ss_sold_date_sk#14)) && isnotnull(ss_item_sk#12)) && isnotnull(ss_promo_sk#10)) - : : : : +- *(5) FileScan parquet default.store_sales[ss_sold_date_sk#14,ss_item_sk#12,ss_cdemo_sk#16,ss_promo_sk#10,ss_quantity#6,ss_list_price#7,ss_sales_price#9,ss_coupon_amt#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_date_sk#15] - : : +- *(2) Filter ((isnotnull(d_year#21) && (d_year#21 = 2000)) && isnotnull(d_date_sk#15)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#15,d_year#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [i_item_sk#13, i_item_id#1] - : +- *(3) Filter isnotnull(i_item_sk#13) - : +- *(3) FileScan parquet default.item[i_item_sk#13,i_item_id#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [p_promo_sk#11] - +- *(4) Filter (((p_channel_email#22 = N) || (p_channel_event#23 = N)) && isnotnull(p_promo_sk#11)) - +- *(4) FileScan parquet default.promotion[p_promo_sk#11,p_channel_email#22,p_channel_event#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.customer_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.item (18) + +- BroadcastExchange (28) + +- * Project (27) + +- * Filter (26) + +- * ColumnarToRow (25) + +- Scan parquet default.promotion (24) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_cdemo_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] + +(3) Filter [codegen id : 5] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Condition : (((isnotnull(ss_cdemo_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) + +(4) Scan parquet default.customer_demographics +Output [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,M), EqualTo(cd_marital_status,S), EqualTo(cd_education_status,College), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(6) Filter [codegen id : 1] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] +Condition : ((((((isnotnull(cd_gender#10) AND isnotnull(cd_marital_status#11)) AND isnotnull(cd_education_status#12)) AND (cd_gender#10 = M)) AND (cd_marital_status#11 = S)) AND (cd_education_status#12 = College)) AND isnotnull(cd_demo_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [cd_demo_sk#9] +Input [4]: [cd_demo_sk#9, cd_gender#10, cd_marital_status#11, cd_education_status#12] + +(8) BroadcastExchange +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(9) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#9] +Join condition: None + +(10) Project [codegen id : 5] +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#14, d_year#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2000)) AND isnotnull(d_date_sk#14)) + +(14) Project [codegen id : 2] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#15] + +(15) BroadcastExchange +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#14] +Join condition: None + +(17) Project [codegen id : 5] +Output [6]: [ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#14] + +(18) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_id#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_id#18] + +(20) Filter [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) + +(21) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(22) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#17] +Join condition: None + +(23) Project [codegen id : 5] +Output [6]: [ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] +Input [8]: [ss_item_sk#2, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#17, i_item_id#18] + +(24) Scan parquet default.promotion +Output [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(25) ColumnarToRow [codegen id : 4] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] + +(26) Filter [codegen id : 4] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] +Condition : (((p_channel_email#21 = N) OR (p_channel_event#22 = N)) AND isnotnull(p_promo_sk#20)) + +(27) Project [codegen id : 4] +Output [1]: [p_promo_sk#20] +Input [3]: [p_promo_sk#20, p_channel_email#21, p_channel_event#22] + +(28) BroadcastExchange +Input [1]: [p_promo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(29) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#20] +Join condition: None + +(30) Project [codegen id : 5] +Output [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] +Input [7]: [ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18, p_promo_sk#20] + +(31) HashAggregate [codegen id : 5] +Input [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#18] +Keys [1]: [i_item_id#18] +Functions [4]: [partial_avg(cast(ss_quantity#5 as bigint)), partial_avg(UnscaledValue(ss_list_price#6)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31] +Results [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] + +(32) Exchange +Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Arguments: hashpartitioning(i_item_id#18, 5), true, [id=#40] + +(33) HashAggregate [codegen id : 6] +Input [9]: [i_item_id#18, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39] +Keys [1]: [i_item_id#18] +Functions [4]: [avg(cast(ss_quantity#5 as bigint)), avg(UnscaledValue(ss_list_price#6)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [4]: [avg(cast(ss_quantity#5 as bigint))#41, avg(UnscaledValue(ss_list_price#6))#42, avg(UnscaledValue(ss_coupon_amt#8))#43, avg(UnscaledValue(ss_sales_price#7))#44] +Results [5]: [i_item_id#18, avg(cast(ss_quantity#5 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(ss_list_price#6))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(ss_coupon_amt#8))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(ss_sales_price#7))#44 / 100.0) as decimal(11,6)) AS agg4#48] + +(34) TakeOrderedAndProject +Input [5]: [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] +Arguments: 100, [i_item_id#18 ASC NULLS FIRST], [i_item_id#18, agg1#45, agg2#46, agg3#47, agg4#48] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt index 2cf9df4fd..075c6d15c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q7/simplified.txt @@ -1,42 +1,50 @@ -TakeOrderedAndProject [agg1,agg2,agg3,agg4,i_item_id] - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,i_item_id,sum,sum,sum,sum] [agg1,agg2,agg3,agg4,avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_sales_price)),avg(cast(ss_quantity as bigint)),count,count,count,count,sum,sum,sum,sum] +TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4] + WholeStageCodegen (6) + HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(cast(ss_quantity as bigint)),avg(UnscaledValue(ss_list_price)),avg(UnscaledValue(ss_coupon_amt)),avg(UnscaledValue(ss_sales_price)),agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [count,count,count,count,count,count,count,count,i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price,sum,sum,sum,sum,sum,sum,sum,sum] [count,count,count,count,count,count,count,count,sum,sum,sum,sum,sum,sum,sum,sum] - Project [i_item_id,ss_coupon_amt,ss_list_price,ss_quantity,ss_sales_price] - BroadcastHashJoin [p_promo_sk,ss_promo_sk] - Project [i_item_id,ss_coupon_amt,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk] - Project [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] - Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] [ss_cdemo_sk,ss_coupon_amt,ss_item_sk,ss_list_price,ss_promo_sk,ss_quantity,ss_sales_price,ss_sold_date_sk] + WholeStageCodegen (5) + HashAggregate [i_item_id,ss_quantity,ss_list_price,ss_coupon_amt,ss_sales_price] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,i_item_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] + BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk] + Filter [ss_cdemo_sk,ss_sold_date_sk,ss_item_sk,ss_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [cd_demo_sk] - Filter [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] [cd_demo_sk,cd_education_status,cd_gender,cd_marital_status] + Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) Project [p_promo_sk] Filter [p_channel_email,p_channel_event,p_promo_sk] - Scan parquet default.promotion [p_channel_email,p_channel_event,p_promo_sk] [p_channel_email,p_channel_event,p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt index f1b11f830..05b533aa6 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/explain.txt @@ -1,46 +1,264 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN s_state#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[total_sum#4,s_state#2,s_county#5,lochierarchy#1,rank_within_parent#3]) -+- *(11) Project [total_sum#4, s_state#2, s_county#5, lochierarchy#1, rank_within_parent#3] - +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] - +- *(10) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 - +- Exchange hashpartitioning(_w1#7, _w2#8, 5) - +- *(9) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ss_net_profit#10))]) - +- Exchange hashpartitioning(s_state#2, s_county#5, spark_grouping_id#9, 5) - +- *(8) HashAggregate(keys=[s_state#2, s_county#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) - +- *(8) Expand [List(ss_net_profit#10, s_state#11, s_county#12, 0), List(ss_net_profit#10, s_state#11, null, 1), List(ss_net_profit#10, null, null, 3)], [ss_net_profit#10, s_state#2, s_county#5, spark_grouping_id#9] - +- *(8) Project [ss_net_profit#10, s_state#13 AS s_state#11, s_county#14 AS s_county#12] - +- *(8) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight - :- *(8) Project [ss_store_sk#15, ss_net_profit#10] - : +- *(8) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : :- *(8) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] - : : +- *(8) Filter (isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) - : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [d_date_sk#18] - : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) BroadcastHashJoin [s_state#13], [s_state#20], LeftSemi, BuildRight - :- *(7) Project [s_store_sk#16, s_county#14, s_state#13] - : +- *(7) Filter isnotnull(s_store_sk#16) - : +- *(7) FileScan parquet default.store[s_store_sk#16,s_county#14,s_state#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(6) Project [s_state#20] - +- *(6) Filter (isnotnull(ranking#21) && (ranking#21 <= 5)) - +- Window [rank(_w2#22) windowspecdefinition(s_state#13, _w2#22 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#21], [s_state#13], [_w2#22 DESC NULLS LAST] - +- *(5) Sort [s_state#13 ASC NULLS FIRST, _w2#22 DESC NULLS LAST], false, 0 - +- *(5) HashAggregate(keys=[s_state#13], functions=[sum(UnscaledValue(ss_net_profit#10))]) - +- Exchange hashpartitioning(s_state#13, 5) - +- *(4) HashAggregate(keys=[s_state#13], functions=[partial_sum(UnscaledValue(ss_net_profit#10))]) - +- *(4) Project [ss_net_profit#10, s_state#13] - +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - :- *(4) Project [ss_sold_date_sk#17, ss_net_profit#10, s_state#13] - : +- *(4) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight - : :- *(4) Project [ss_sold_date_sk#17, ss_store_sk#15, ss_net_profit#10] - : : +- *(4) Filter (isnotnull(ss_store_sk#15) && isnotnull(ss_sold_date_sk#17)) - : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_store_sk#15,ss_net_profit#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [s_store_sk#16, s_state#13] - : +- *(2) Filter isnotnull(s_store_sk#16) - : +- *(2) FileScan parquet default.store[s_store_sk#16,s_state#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (47) ++- * Project (46) + +- Window (45) + +- * Sort (44) + +- Exchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Expand (39) + +- * Project (38) + +- * BroadcastHashJoin Inner BuildRight (37) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- BroadcastExchange (36) + +- * BroadcastHashJoin LeftSemi BuildRight (35) + :- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (34) + +- * Project (33) + +- * Filter (32) + +- Window (31) + +- * Sort (30) + +- Exchange (29) + +- * HashAggregate (28) + +- Exchange (27) + +- * HashAggregate (26) + +- * Project (25) + +- * BroadcastHashJoin Inner BuildRight (24) + :- * Project (22) + : +- * BroadcastHashJoin Inner BuildRight (21) + : :- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet default.store_sales (14) + : +- BroadcastExchange (20) + : +- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.store (17) + +- ReusedExchange (23) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 9] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 9] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#7, s_county#8, s_state#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 8] +Input [3]: [s_store_sk#7, s_county#8, s_state#9] + +(13) Filter [codegen id : 8] +Input [3]: [s_store_sk#7, s_county#8, s_state#9] +Condition : isnotnull(s_store_sk#7) + +(14) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(16) Filter [codegen id : 4] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(17) Scan parquet default.store +Output [2]: [s_store_sk#7, s_state#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#7, s_state#9] + +(19) Filter [codegen id : 2] +Input [2]: [s_store_sk#7, s_state#9] +Condition : isnotnull(s_store_sk#7) + +(20) BroadcastExchange +Input [2]: [s_store_sk#7, s_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(22) Project [codegen id : 4] +Output [3]: [ss_sold_date_sk#1, ss_net_profit#3, s_state#9] +Input [5]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_state#9] + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#4] + +(24) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(25) Project [codegen id : 4] +Output [2]: [ss_net_profit#3, s_state#9] +Input [4]: [ss_sold_date_sk#1, ss_net_profit#3, s_state#9, d_date_sk#4] + +(26) HashAggregate [codegen id : 4] +Input [2]: [ss_net_profit#3, s_state#9] +Keys [1]: [s_state#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#11] +Results [2]: [s_state#9, sum#12] + +(27) Exchange +Input [2]: [s_state#9, sum#12] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#13] + +(28) HashAggregate [codegen id : 5] +Input [2]: [s_state#9, sum#12] +Keys [1]: [s_state#9] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#14] +Results [3]: [s_state#9 AS s_state#15, s_state#9, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#14,17,2) AS _w2#16] + +(29) Exchange +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: hashpartitioning(s_state#9, 5), true, [id=#17] + +(30) Sort [codegen id : 6] +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: [s_state#9 ASC NULLS FIRST, _w2#16 DESC NULLS LAST], false, 0 + +(31) Window +Input [3]: [s_state#15, s_state#9, _w2#16] +Arguments: [rank(_w2#16) windowspecdefinition(s_state#9, _w2#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#18], [s_state#9], [_w2#16 DESC NULLS LAST] + +(32) Filter [codegen id : 7] +Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] +Condition : (isnotnull(ranking#18) AND (ranking#18 <= 5)) + +(33) Project [codegen id : 7] +Output [1]: [s_state#15] +Input [4]: [s_state#15, s_state#9, _w2#16, ranking#18] + +(34) BroadcastExchange +Input [1]: [s_state#15] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#19] + +(35) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_state#9] +Right keys [1]: [s_state#15] +Join condition: None + +(36) BroadcastExchange +Input [3]: [s_store_sk#7, s_county#8, s_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] + +(37) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#7] +Join condition: None + +(38) Project [codegen id : 9] +Output [3]: [ss_net_profit#3, s_state#9, s_county#8] +Input [5]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#7, s_county#8, s_state#9] + +(39) Expand [codegen id : 9] +Input [3]: [ss_net_profit#3, s_state#9, s_county#8] +Arguments: [List(ss_net_profit#3, s_state#9, s_county#8, 0), List(ss_net_profit#3, s_state#9, null, 1), List(ss_net_profit#3, null, null, 3)], [ss_net_profit#3, s_state#21, s_county#22, spark_grouping_id#23] + +(40) HashAggregate [codegen id : 9] +Input [4]: [ss_net_profit#3, s_state#21, s_county#22, spark_grouping_id#23] +Keys [3]: [s_state#21, s_county#22, spark_grouping_id#23] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#24] +Results [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] + +(41) Exchange +Input [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] +Arguments: hashpartitioning(s_state#21, s_county#22, spark_grouping_id#23, 5), true, [id=#26] + +(42) HashAggregate [codegen id : 10] +Input [4]: [s_state#21, s_county#22, spark_grouping_id#23, sum#25] +Keys [3]: [s_state#21, s_county#22, spark_grouping_id#23] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#27] +Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#27,17,2) AS total_sum#28, s_state#21, s_county#22, (cast((shiftright(spark_grouping_id#23, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint)) AS lochierarchy#29, (cast((shiftright(spark_grouping_id#23, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint)) AS _w1#30, CASE WHEN (cast(cast((shiftright(spark_grouping_id#23, 0) & 1) as tinyint) as int) = 0) THEN s_state#21 END AS _w2#31, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#27,17,2) AS _w3#32] + +(43) Exchange +Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] +Arguments: hashpartitioning(_w1#30, _w2#31, 5), true, [id=#33] + +(44) Sort [codegen id : 11] +Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] +Arguments: [_w1#30 ASC NULLS FIRST, _w2#31 ASC NULLS FIRST, _w3#32 DESC NULLS LAST], false, 0 + +(45) Window +Input [7]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32] +Arguments: [rank(_w3#32) windowspecdefinition(_w1#30, _w2#31, _w3#32 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#34], [_w1#30, _w2#31], [_w3#32 DESC NULLS LAST] + +(46) Project [codegen id : 12] +Output [5]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] +Input [8]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, _w1#30, _w2#31, _w3#32, rank_within_parent#34] + +(47) TakeOrderedAndProject +Input [5]: [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] +Arguments: 100, [lochierarchy#29 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#29 as int) = 0) THEN s_state#21 END ASC NULLS FIRST, rank_within_parent#34 ASC NULLS FIRST], [total_sum#28, s_state#21, s_county#22, lochierarchy#29, rank_within_parent#34] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt index d2c001785..5d01429ad 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q70/simplified.txt @@ -1,65 +1,74 @@ -TakeOrderedAndProject [lochierarchy,rank_within_parent,s_county,s_state,total_sum] - WholeStageCodegen - Project [lochierarchy,rank_within_parent,s_county,s_state,total_sum] +TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county] + WholeStageCodegen (12) + Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent] InputAdapter - Window [_w1,_w2,_w3] - WholeStageCodegen + Window [_w3,_w1,_w2] + WholeStageCodegen (11) Sort [_w1,_w2,_w3] InputAdapter Exchange [_w1,_w2] #1 - WholeStageCodegen - HashAggregate [s_county,s_state,spark_grouping_id,sum,sum(UnscaledValue(ss_net_profit))] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ss_net_profit)),total_sum] + WholeStageCodegen (10) + HashAggregate [s_state,s_county,spark_grouping_id,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,lochierarchy,_w1,_w2,_w3,sum] InputAdapter - Exchange [s_county,s_state,spark_grouping_id] #2 - WholeStageCodegen - HashAggregate [s_county,s_state,spark_grouping_id,ss_net_profit,sum,sum] [sum,sum] - Expand [s_county,s_state,ss_net_profit] - Project [s_county,s_state,ss_net_profit] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] + Exchange [s_state,s_county,spark_grouping_id] #2 + WholeStageCodegen (9) + HashAggregate [s_state,s_county,spark_grouping_id,ss_net_profit] [sum,sum] + Expand [ss_net_profit,s_state,s_county] + Project [ss_net_profit,s_state,s_county] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (8) BroadcastHashJoin [s_state,s_state] - Project [s_county,s_state,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_county,s_state,s_store_sk] [s_county,s_state,s_store_sk] + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_county,s_state] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (7) Project [s_state] Filter [ranking] InputAdapter Window [_w2,s_state] - WholeStageCodegen - Sort [_w2,s_state] - HashAggregate [s_state,sum,sum(UnscaledValue(ss_net_profit))] [_w2,s_state,sum,sum(UnscaledValue(ss_net_profit))] - InputAdapter - Exchange [s_state] #6 - WholeStageCodegen - HashAggregate [s_state,ss_net_profit,sum,sum] [sum,sum] - Project [s_state,ss_net_profit] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [s_state,ss_net_profit,ss_sold_date_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [s_state,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_state,s_store_sk] [s_state,s_store_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + WholeStageCodegen (6) + Sort [s_state,_w2] + InputAdapter + Exchange [s_state] #6 + WholeStageCodegen (5) + HashAggregate [s_state,sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum] + InputAdapter + Exchange [s_state] #7 + WholeStageCodegen (4) + HashAggregate [s_state,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_state] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_net_profit,s_state] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Filter [ss_store_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_state] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt index ecda81c88..9471377a1 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/explain.txt @@ -1,40 +1,232 @@ == Physical Plan == -*(11) Sort [ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST], true, 0 -+- Exchange rangepartitioning(ext_price#1 DESC NULLS LAST, brand_id#2 ASC NULLS FIRST, 5) - +- *(10) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[sum(UnscaledValue(ext_price#7))]) - +- Exchange hashpartitioning(i_brand#3, i_brand_id#4, t_hour#5, t_minute#6, 5) - +- *(9) HashAggregate(keys=[i_brand#3, i_brand_id#4, t_hour#5, t_minute#6], functions=[partial_sum(UnscaledValue(ext_price#7))]) - +- *(9) Project [i_brand_id#4, i_brand#3, ext_price#7, t_hour#5, t_minute#6] - +- *(9) BroadcastHashJoin [time_sk#8], [t_time_sk#9], Inner, BuildRight - :- *(9) Project [i_brand_id#4, i_brand#3, ext_price#7, time_sk#8] - : +- *(9) BroadcastHashJoin [i_item_sk#10], [sold_item_sk#11], Inner, BuildLeft - : :- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [i_item_sk#10, i_brand_id#4, i_brand#3] - : : +- *(1) Filter ((isnotnull(i_manager_id#12) && (i_manager_id#12 = 1)) && isnotnull(i_item_sk#10)) - : : +- *(1) FileScan parquet default.item[i_item_sk#10,i_brand_id#4,i_brand#3,i_manager_id#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)], ReadSchema: struct - : +- Union - : :- *(3) Project [ws_ext_sales_price#13 AS ext_price#7, ws_item_sk#14 AS sold_item_sk#11, ws_sold_time_sk#15 AS time_sk#8] - : : +- *(3) BroadcastHashJoin [ws_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight - : : :- *(3) Project [ws_sold_date_sk#16, ws_sold_time_sk#15, ws_item_sk#14, ws_ext_sales_price#13] - : : : +- *(3) Filter ((isnotnull(ws_sold_date_sk#16) && isnotnull(ws_item_sk#14)) && isnotnull(ws_sold_time_sk#15)) - : : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#16,ws_sold_time_sk#15,ws_item_sk#14,ws_ext_sales_price#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_date_sk#17] - : : +- *(2) Filter ((((isnotnull(d_moy#18) && isnotnull(d_year#19)) && (d_moy#18 = 11)) && (d_year#19 = 1999)) && isnotnull(d_date_sk#17)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#19,d_moy#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct - : :- *(5) Project [cs_ext_sales_price#20 AS ext_price#21, cs_item_sk#22 AS sold_item_sk#23, cs_sold_time_sk#24 AS time_sk#25] - : : +- *(5) BroadcastHashJoin [cs_sold_date_sk#26], [d_date_sk#17], Inner, BuildRight - : : :- *(5) Project [cs_sold_date_sk#26, cs_sold_time_sk#24, cs_item_sk#22, cs_ext_sales_price#20] - : : : +- *(5) Filter ((isnotnull(cs_sold_date_sk#26) && isnotnull(cs_item_sk#22)) && isnotnull(cs_sold_time_sk#24)) - : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#26,cs_sold_time_sk#24,cs_item_sk#22,cs_ext_sales_price#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [ss_ext_sales_price#27 AS ext_price#28, ss_item_sk#29 AS sold_item_sk#30, ss_sold_time_sk#31 AS time_sk#32] - : +- *(7) BroadcastHashJoin [ss_sold_date_sk#33], [d_date_sk#17], Inner, BuildRight - : :- *(7) Project [ss_sold_date_sk#33, ss_sold_time_sk#31, ss_item_sk#29, ss_ext_sales_price#27] - : : +- *(7) Filter ((isnotnull(ss_sold_date_sk#33) && isnotnull(ss_item_sk#29)) && isnotnull(ss_sold_time_sk#31)) - : : +- *(7) FileScan parquet default.store_sales[ss_sold_date_sk#33,ss_sold_time_sk#31,ss_item_sk#29,ss_ext_sales_price#27] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [t_time_sk#9, t_hour#5, t_minute#6] - +- *(8) Filter (((t_meal_time#34 = breakfast) || (t_meal_time#34 = dinner)) && isnotnull(t_time_sk#9)) - +- *(8) FileScan parquet default.time_dim[t_time_sk#9,t_hour#5,t_minute#6,t_meal_time#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [Or(EqualTo(t_meal_time,breakfast),EqualTo(t_meal_time,dinner)), IsNotNull(t_time_sk)], ReadSchema: struct \ No newline at end of file +* Sort (42) ++- Exchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * Project (37) + +- * BroadcastHashJoin Inner BuildRight (36) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildLeft (29) + : :- BroadcastExchange (5) + : : +- * Project (4) + : : +- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.item (1) + : +- Union (28) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.web_sales (6) + : : +- BroadcastExchange (13) + : : +- * Project (12) + : : +- * Filter (11) + : : +- * ColumnarToRow (10) + : : +- Scan parquet default.date_dim (9) + : :- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet default.catalog_sales (16) + : : +- ReusedExchange (19) + : +- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Filter (24) + : : +- * ColumnarToRow (23) + : : +- Scan parquet default.store_sales (22) + : +- ReusedExchange (25) + +- BroadcastExchange (35) + +- * Project (34) + +- * Filter (33) + +- * ColumnarToRow (32) + +- Scan parquet default.time_dim (31) + + +(1) Scan parquet default.item +Output [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(3) Filter [codegen id : 1] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] +Condition : ((isnotnull(i_manager_id#4) AND (i_manager_id#4 = 1)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 1] +Output [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] + +(5) BroadcastExchange +Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#5] + +(6) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)] +ReadSchema: struct + +(7) ColumnarToRow [codegen id : 3] +Input [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] + +(8) Filter [codegen id : 3] +Input [4]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9] +Condition : ((isnotnull(ws_sold_date_sk#6) AND isnotnull(ws_item_sk#8)) AND isnotnull(ws_sold_time_sk#7)) + +(9) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(11) Filter [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((((isnotnull(d_moy#12) AND isnotnull(d_year#11)) AND (d_moy#12 = 11)) AND (d_year#11 = 1999)) AND isnotnull(d_date_sk#10)) + +(12) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(13) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#6] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(15) Project [codegen id : 3] +Output [3]: [ws_ext_sales_price#9 AS ext_price#14, ws_item_sk#8 AS sold_item_sk#15, ws_sold_time_sk#7 AS time_sk#16] +Input [5]: [ws_sold_date_sk#6, ws_sold_time_sk#7, ws_item_sk#8, ws_ext_sales_price#9, d_date_sk#10] + +(16) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 5] +Input [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] + +(18) Filter [codegen id : 5] +Input [4]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20] +Condition : ((isnotnull(cs_sold_date_sk#17) AND isnotnull(cs_item_sk#19)) AND isnotnull(cs_sold_time_sk#18)) + +(19) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#10] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#17] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(21) Project [codegen id : 5] +Output [3]: [cs_ext_sales_price#20 AS ext_price#21, cs_item_sk#19 AS sold_item_sk#22, cs_sold_time_sk#18 AS time_sk#23] +Input [5]: [cs_sold_date_sk#17, cs_sold_time_sk#18, cs_item_sk#19, cs_ext_sales_price#20, d_date_sk#10] + +(22) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 7] +Input [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] + +(24) Filter [codegen id : 7] +Input [4]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27] +Condition : ((isnotnull(ss_sold_date_sk#24) AND isnotnull(ss_item_sk#26)) AND isnotnull(ss_sold_time_sk#25)) + +(25) ReusedExchange [Reuses operator id: 13] +Output [1]: [d_date_sk#10] + +(26) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [ss_sold_date_sk#24] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(27) Project [codegen id : 7] +Output [3]: [ss_ext_sales_price#27 AS ext_price#28, ss_item_sk#26 AS sold_item_sk#29, ss_sold_time_sk#25 AS time_sk#30] +Input [5]: [ss_sold_date_sk#24, ss_sold_time_sk#25, ss_item_sk#26, ss_ext_sales_price#27, d_date_sk#10] + +(28) Union + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [sold_item_sk#15] +Join condition: None + +(30) Project [codegen id : 9] +Output [4]: [i_brand_id#2, i_brand#3, ext_price#14, time_sk#16] +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#14, sold_item_sk#15, time_sk#16] + +(31) Scan parquet default.time_dim +Output [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [Or(EqualTo(t_meal_time,breakfast),EqualTo(t_meal_time,dinner)), IsNotNull(t_time_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 8] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(33) Filter [codegen id : 8] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Condition : (((t_meal_time#34 = breakfast) OR (t_meal_time#34 = dinner)) AND isnotnull(t_time_sk#31)) + +(34) Project [codegen id : 8] +Output [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] + +(35) BroadcastExchange +Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] + +(36) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [time_sk#16] +Right keys [1]: [t_time_sk#31] +Join condition: None + +(37) Project [codegen id : 9] +Output [5]: [i_brand_id#2, i_brand#3, ext_price#14, t_hour#32, t_minute#33] +Input [7]: [i_brand_id#2, i_brand#3, ext_price#14, time_sk#16, t_time_sk#31, t_hour#32, t_minute#33] + +(38) HashAggregate [codegen id : 9] +Input [5]: [i_brand_id#2, i_brand#3, ext_price#14, t_hour#32, t_minute#33] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [partial_sum(UnscaledValue(ext_price#14))] +Aggregate Attributes [1]: [sum#36] +Results [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] + +(39) Exchange +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] +Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, 5), true, [id=#38] + +(40) HashAggregate [codegen id : 10] +Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] +Functions [1]: [sum(UnscaledValue(ext_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ext_price#14))#39] +Results [5]: [i_brand_id#2 AS brand_id#40, i_brand#3 AS brand#41, t_hour#32, t_minute#33, MakeDecimal(sum(UnscaledValue(ext_price#14))#39,17,2) AS ext_price#42] + +(41) Exchange +Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] +Arguments: rangepartitioning(ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST, 5), true, [id=#43] + +(42) Sort [codegen id : 11] +Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] +Arguments: [ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt index c905e2e5d..7fb18bbd6 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q71/simplified.txt @@ -1,56 +1,65 @@ -WholeStageCodegen - Sort [brand_id,ext_price] +WholeStageCodegen (11) + Sort [ext_price,brand_id] InputAdapter - Exchange [brand_id,ext_price] #1 - WholeStageCodegen - HashAggregate [i_brand,i_brand_id,sum,sum(UnscaledValue(ext_price)),t_hour,t_minute] [brand,brand_id,ext_price,sum,sum(UnscaledValue(ext_price))] + Exchange [ext_price,brand_id] #1 + WholeStageCodegen (10) + HashAggregate [i_brand,i_brand_id,t_hour,t_minute,sum] [sum(UnscaledValue(ext_price)),brand_id,brand,ext_price,sum] InputAdapter Exchange [i_brand,i_brand_id,t_hour,t_minute] #2 - WholeStageCodegen - HashAggregate [ext_price,i_brand,i_brand_id,sum,sum,t_hour,t_minute] [sum,sum] - Project [ext_price,i_brand,i_brand_id,t_hour,t_minute] - BroadcastHashJoin [t_time_sk,time_sk] - Project [ext_price,i_brand,i_brand_id,time_sk] + WholeStageCodegen (9) + HashAggregate [i_brand,i_brand_id,t_hour,t_minute,ext_price] [sum,sum] + Project [i_brand_id,i_brand,ext_price,t_hour,t_minute] + BroadcastHashJoin [time_sk,t_time_sk] + Project [i_brand_id,i_brand,ext_price,time_sk] BroadcastHashJoin [i_item_sk,sold_item_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [i_brand,i_brand_id,i_item_sk] - Filter [i_item_sk,i_manager_id] - Scan parquet default.item [i_brand,i_brand_id,i_item_sk,i_manager_id] [i_brand,i_brand_id,i_item_sk,i_manager_id] + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_brand] + Filter [i_manager_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id] InputAdapter Union - WholeStageCodegen + WholeStageCodegen (3) Project [ws_ext_sales_price,ws_item_sk,ws_sold_time_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] - Filter [ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] [ws_ext_sales_price,ws_item_sk,ws_sold_date_sk,ws_sold_time_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_item_sk,ws_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_sold_time_sk,ws_item_sk,ws_ext_sales_price] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] - WholeStageCodegen + Filter [d_moy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (5) Project [cs_ext_sales_price,cs_item_sk,cs_sold_time_sk] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] - Filter [cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] [cs_ext_sales_price,cs_item_sk,cs_sold_date_sk,cs_sold_time_sk] + Filter [cs_sold_date_sk,cs_item_sk,cs_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_sold_time_sk,cs_item_sk,cs_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 - WholeStageCodegen + ReusedExchange [d_date_sk] #4 + WholeStageCodegen (7) Project [ss_ext_sales_price,ss_item_sk,ss_sold_time_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_sold_time_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_item_sk,ss_sold_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_ext_sales_price] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 + ReusedExchange [d_date_sk] #4 InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [t_hour,t_minute,t_time_sk] + WholeStageCodegen (8) + Project [t_time_sk,t_hour,t_minute] Filter [t_meal_time,t_time_sk] - Scan parquet default.time_dim [t_hour,t_meal_time,t_minute,t_time_sk] [t_hour,t_meal_time,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt index 99558702e..5ca28c3e3 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/explain.txt @@ -1,68 +1,391 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[total_cnt#1 DESC NULLS LAST,i_item_desc#2 ASC NULLS FIRST,w_warehouse_name#3 ASC NULLS FIRST,d_week_seq#4 ASC NULLS FIRST], output=[i_item_desc#2,w_warehouse_name#3,d_week_seq#4,no_promo#5,promo#6,total_cnt#1]) -+- *(12) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[count(1)]) - +- Exchange hashpartitioning(i_item_desc#2, w_warehouse_name#3, d_week_seq#4, 5) - +- *(11) HashAggregate(keys=[i_item_desc#2, w_warehouse_name#3, d_week_seq#4], functions=[partial_count(1)]) - +- *(11) Project [w_warehouse_name#3, i_item_desc#2, d_week_seq#4] - +- *(11) BroadcastHashJoin [cs_item_sk#7, cs_order_number#8], [cr_item_sk#9, cr_order_number#10], LeftOuter, BuildRight - :- *(11) Project [cs_item_sk#7, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_week_seq#4] - : +- *(11) BroadcastHashJoin [cs_promo_sk#11], [p_promo_sk#12], LeftOuter, BuildRight - : :- *(11) Project [cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_week_seq#4] - : : +- *(11) BroadcastHashJoin [cs_ship_date_sk#13], [d_date_sk#14], Inner, BuildRight, (d_date#15 > cast(cast(d_date#16 as timestamp) + interval 5 days as date)) - : : :- *(11) Project [cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, w_warehouse_name#3, i_item_desc#2, d_date#16, d_week_seq#4] - : : : +- *(11) BroadcastHashJoin [d_week_seq#4, inv_date_sk#17], [d_week_seq#18, d_date_sk#19], Inner, BuildRight - : : : :- *(11) Project [cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2, d_date#16, d_week_seq#4] - : : : : +- *(11) BroadcastHashJoin [cs_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight - : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] - : : : : : +- *(11) BroadcastHashJoin [cs_bill_hdemo_sk#22], [hd_demo_sk#23], Inner, BuildRight - : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] - : : : : : : +- *(11) BroadcastHashJoin [cs_bill_cdemo_sk#24], [cd_demo_sk#25], Inner, BuildRight - : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3, i_item_desc#2] - : : : : : : : +- *(11) BroadcastHashJoin [cs_item_sk#7], [i_item_sk#26], Inner, BuildRight - : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, w_warehouse_name#3] - : : : : : : : : +- *(11) BroadcastHashJoin [inv_warehouse_sk#27], [w_warehouse_sk#28], Inner, BuildRight - : : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, inv_date_sk#17, inv_warehouse_sk#27] - : : : : : : : : : +- *(11) BroadcastHashJoin [cs_item_sk#7], [inv_item_sk#29], Inner, BuildRight, (inv_quantity_on_hand#30 < cs_quantity#31) - : : : : : : : : : :- *(11) Project [cs_sold_date_sk#20, cs_ship_date_sk#13, cs_bill_cdemo_sk#24, cs_bill_hdemo_sk#22, cs_item_sk#7, cs_promo_sk#11, cs_order_number#8, cs_quantity#31] - : : : : : : : : : : +- *(11) Filter (((((isnotnull(cs_quantity#31) && isnotnull(cs_item_sk#7)) && isnotnull(cs_bill_cdemo_sk#24)) && isnotnull(cs_bill_hdemo_sk#22)) && isnotnull(cs_sold_date_sk#20)) && isnotnull(cs_ship_date_sk#13)) - : : : : : : : : : : +- *(11) FileScan parquet default.catalog_sales[cs_sold_date_sk#20,cs_ship_date_sk#13,cs_bill_cdemo_sk#24,cs_bill_hdemo_sk#22,cs_item_sk#7,cs_promo_sk#11,cs_order_number#8,cs_quantity#31] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hd..., ReadSchema: struct - : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : : +- *(2) Project [w_warehouse_sk#28, w_warehouse_name#3] - : : : : : : : : +- *(2) Filter isnotnull(w_warehouse_sk#28) - : : : : : : : : +- *(2) FileScan parquet default.warehouse[w_warehouse_sk#28,w_warehouse_name#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/warehouse], PartitionFilters: [], PushedFilters: [IsNotNull(w_warehouse_sk)], ReadSchema: struct - : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : : +- *(3) Project [i_item_sk#26, i_item_desc#2] - : : : : : : : +- *(3) Filter isnotnull(i_item_sk#26) - : : : : : : : +- *(3) FileScan parquet default.item[i_item_sk#26,i_item_desc#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : : +- *(4) Project [cd_demo_sk#25] - : : : : : : +- *(4) Filter ((isnotnull(cd_marital_status#32) && (cd_marital_status#32 = D)) && isnotnull(cd_demo_sk#25)) - : : : : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#32] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : : +- *(5) Project [hd_demo_sk#23] - : : : : : +- *(5) Filter ((isnotnull(hd_buy_potential#33) && (hd_buy_potential#33 = >10000)) && isnotnull(hd_demo_sk#23)) - : : : : : +- *(5) FileScan parquet default.household_demographics[hd_demo_sk#23,hd_buy_potential#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(6) Project [d_date_sk#21, d_date#16, d_week_seq#4] - : : : : +- *(6) Filter (((isnotnull(d_year#34) && (d_year#34 = 1999)) && isnotnull(d_date_sk#21)) && isnotnull(d_week_seq#4)) - : : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#21,d_date#16,d_week_seq#4,d_year#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) - : : : +- *(7) Project [d_date_sk#19, d_week_seq#18] - : : : +- *(7) Filter (isnotnull(d_week_seq#18) && isnotnull(d_date_sk#19)) - : : : +- *(7) FileScan parquet default.date_dim[d_date_sk#19,d_week_seq#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(8) Project [d_date_sk#14, d_date#15] - : : +- *(8) Filter (isnotnull(d_date#15) && isnotnull(d_date_sk#14)) - : : +- *(8) FileScan parquet default.date_dim[d_date_sk#14,d_date#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(9) Project [p_promo_sk#12] - : +- *(9) Filter isnotnull(p_promo_sk#12) - : +- *(9) FileScan parquet default.promotion[p_promo_sk#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_promo_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295)))) - +- *(10) Project [cr_item_sk#9, cr_order_number#10] - +- *(10) Filter (isnotnull(cr_item_sk#9) && isnotnull(cr_order_number#10)) - +- *(10) FileScan parquet default.catalog_returns[cr_item_sk#9,cr_order_number#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (70) ++- * HashAggregate (69) + +- Exchange (68) + +- * HashAggregate (67) + +- * Project (66) + +- * BroadcastHashJoin LeftOuter BuildRight (65) + :- * Project (60) + : +- * BroadcastHashJoin LeftOuter BuildRight (59) + : :- * Project (54) + : : +- * BroadcastHashJoin Inner BuildRight (53) + : : :- * Project (48) + : : : +- * BroadcastHashJoin Inner BuildRight (47) + : : : :- * Project (42) + : : : : +- * BroadcastHashJoin Inner BuildRight (41) + : : : : :- * Project (35) + : : : : : +- * BroadcastHashJoin Inner BuildRight (34) + : : : : : :- * Project (28) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (27) + : : : : : : :- * Project (21) + : : : : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : : : : :- * Project (15) + : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : : : : :- * Project (9) + : : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : : : : :- * Filter (3) + : : : : : : : : : : +- * ColumnarToRow (2) + : : : : : : : : : : +- Scan parquet default.catalog_sales (1) + : : : : : : : : : +- BroadcastExchange (7) + : : : : : : : : : +- * Filter (6) + : : : : : : : : : +- * ColumnarToRow (5) + : : : : : : : : : +- Scan parquet default.inventory (4) + : : : : : : : : +- BroadcastExchange (13) + : : : : : : : : +- * Filter (12) + : : : : : : : : +- * ColumnarToRow (11) + : : : : : : : : +- Scan parquet default.warehouse (10) + : : : : : : : +- BroadcastExchange (19) + : : : : : : : +- * Filter (18) + : : : : : : : +- * ColumnarToRow (17) + : : : : : : : +- Scan parquet default.item (16) + : : : : : : +- BroadcastExchange (26) + : : : : : : +- * Project (25) + : : : : : : +- * Filter (24) + : : : : : : +- * ColumnarToRow (23) + : : : : : : +- Scan parquet default.customer_demographics (22) + : : : : : +- BroadcastExchange (33) + : : : : : +- * Project (32) + : : : : : +- * Filter (31) + : : : : : +- * ColumnarToRow (30) + : : : : : +- Scan parquet default.household_demographics (29) + : : : : +- BroadcastExchange (40) + : : : : +- * Project (39) + : : : : +- * Filter (38) + : : : : +- * ColumnarToRow (37) + : : : : +- Scan parquet default.date_dim (36) + : : : +- BroadcastExchange (46) + : : : +- * Filter (45) + : : : +- * ColumnarToRow (44) + : : : +- Scan parquet default.date_dim (43) + : : +- BroadcastExchange (52) + : : +- * Filter (51) + : : +- * ColumnarToRow (50) + : : +- Scan parquet default.date_dim (49) + : +- BroadcastExchange (58) + : +- * Filter (57) + : +- * ColumnarToRow (56) + : +- Scan parquet default.promotion (55) + +- BroadcastExchange (64) + +- * Filter (63) + +- * ColumnarToRow (62) + +- Scan parquet default.catalog_returns (61) + + +(1) Scan parquet default.catalog_sales +Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 11] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] + +(3) Filter [codegen id : 11] +Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8] +Condition : (((((isnotnull(cs_quantity#8) AND isnotnull(cs_item_sk#5)) AND isnotnull(cs_bill_cdemo_sk#3)) AND isnotnull(cs_bill_hdemo_sk#4)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_ship_date_sk#2)) + +(4) Scan parquet default.inventory +Output [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] + +(6) Filter [codegen id : 1] +Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] +Condition : (((isnotnull(inv_quantity_on_hand#12) AND isnotnull(inv_item_sk#10)) AND isnotnull(inv_warehouse_sk#11)) AND isnotnull(inv_date_sk#9)) + +(7) BroadcastExchange +Input [4]: [inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#13] + +(8) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#5] +Right keys [1]: [inv_item_sk#10] +Join condition: (inv_quantity_on_hand#12 < cs_quantity#8) + +(9) Project [codegen id : 11] +Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, inv_warehouse_sk#11] +Input [12]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, inv_date_sk#9, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12] + +(10) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] + +(12) Filter [codegen id : 2] +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Condition : isnotnull(w_warehouse_sk#14) + +(13) BroadcastExchange +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] + +(14) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [inv_warehouse_sk#11] +Right keys [1]: [w_warehouse_sk#14] +Join condition: None + +(15) Project [codegen id : 11] +Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, inv_warehouse_sk#11, w_warehouse_sk#14, w_warehouse_name#15] + +(16) Scan parquet default.item +Output [2]: [i_item_sk#17, i_item_desc#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_desc#18] + +(18) Filter [codegen id : 3] +Input [2]: [i_item_sk#17, i_item_desc#18] +Condition : isnotnull(i_item_sk#17) + +(19) BroadcastExchange +Input [2]: [i_item_sk#17, i_item_desc#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] + +(20) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_item_sk#5] +Right keys [1]: [i_item_sk#17] +Join condition: None + +(21) Project [codegen id : 11] +Output [10]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_sk#17, i_item_desc#18] + +(22) Scan parquet default.customer_demographics +Output [2]: [cd_demo_sk#20, cd_marital_status#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [cd_demo_sk#20, cd_marital_status#21] + +(24) Filter [codegen id : 4] +Input [2]: [cd_demo_sk#20, cd_marital_status#21] +Condition : ((isnotnull(cd_marital_status#21) AND (cd_marital_status#21 = D)) AND isnotnull(cd_demo_sk#20)) + +(25) Project [codegen id : 4] +Output [1]: [cd_demo_sk#20] +Input [2]: [cd_demo_sk#20, cd_marital_status#21] + +(26) BroadcastExchange +Input [1]: [cd_demo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] + +(27) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_cdemo_sk#3] +Right keys [1]: [cd_demo_sk#20] +Join condition: None + +(28) Project [codegen id : 11] +Output [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, cd_demo_sk#20] + +(29) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#23, hd_buy_potential#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [2]: [hd_demo_sk#23, hd_buy_potential#24] + +(31) Filter [codegen id : 5] +Input [2]: [hd_demo_sk#23, hd_buy_potential#24] +Condition : ((isnotnull(hd_buy_potential#24) AND (hd_buy_potential#24 = >10000)) AND isnotnull(hd_demo_sk#23)) + +(32) Project [codegen id : 5] +Output [1]: [hd_demo_sk#23] +Input [2]: [hd_demo_sk#23, hd_buy_potential#24] + +(33) BroadcastExchange +Input [1]: [hd_demo_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] + +(34) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_bill_hdemo_sk#4] +Right keys [1]: [hd_demo_sk#23] +Join condition: None + +(35) Project [codegen id : 11] +Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18] +Input [10]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, hd_demo_sk#23] + +(36) Scan parquet default.date_dim +Output [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] + +(38) Filter [codegen id : 6] +Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] +Condition : ((((isnotnull(d_year#29) AND (d_year#29 = 1999)) AND isnotnull(d_date_sk#26)) AND isnotnull(d_week_seq#28)) AND isnotnull(d_date#27)) + +(39) Project [codegen id : 6] +Output [3]: [d_date_sk#26, d_date#27, d_week_seq#28] +Input [4]: [d_date_sk#26, d_date#27, d_week_seq#28, d_year#29] + +(40) BroadcastExchange +Input [3]: [d_date_sk#26, d_date#27, d_week_seq#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(41) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#26] +Join condition: None + +(42) Project [codegen id : 11] +Output [9]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28] +Input [11]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date_sk#26, d_date#27, d_week_seq#28] + +(43) Scan parquet default.date_dim +Output [2]: [d_date_sk#31, d_week_seq#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 7] +Input [2]: [d_date_sk#31, d_week_seq#32] + +(45) Filter [codegen id : 7] +Input [2]: [d_date_sk#31, d_week_seq#32] +Condition : (isnotnull(d_week_seq#32) AND isnotnull(d_date_sk#31)) + +(46) BroadcastExchange +Input [2]: [d_date_sk#31, d_week_seq#32] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#33] + +(47) BroadcastHashJoin [codegen id : 11] +Left keys [2]: [d_week_seq#28, inv_date_sk#9] +Right keys [2]: [d_week_seq#32, d_date_sk#31] +Join condition: None + +(48) Project [codegen id : 11] +Output [8]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28] +Input [11]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, inv_date_sk#9, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28, d_date_sk#31, d_week_seq#32] + +(49) Scan parquet default.date_dim +Output [2]: [d_date_sk#34, d_date#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 8] +Input [2]: [d_date_sk#34, d_date#35] + +(51) Filter [codegen id : 8] +Input [2]: [d_date_sk#34, d_date#35] +Condition : (isnotnull(d_date#35) AND isnotnull(d_date_sk#34)) + +(52) BroadcastExchange +Input [2]: [d_date_sk#34, d_date#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] + +(53) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_ship_date_sk#2] +Right keys [1]: [d_date_sk#34] +Join condition: (d_date#35 > d_date#27 + 5 days) + +(54) Project [codegen id : 11] +Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Input [10]: [cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_date#27, d_week_seq#28, d_date_sk#34, d_date#35] + +(55) Scan parquet default.promotion +Output [1]: [p_promo_sk#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_promo_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 9] +Input [1]: [p_promo_sk#37] + +(57) Filter [codegen id : 9] +Input [1]: [p_promo_sk#37] +Condition : isnotnull(p_promo_sk#37) + +(58) BroadcastExchange +Input [1]: [p_promo_sk#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#38] + +(59) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [cs_promo_sk#6] +Right keys [1]: [p_promo_sk#37] +Join condition: None + +(60) Project [codegen id : 11] +Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28, p_promo_sk#37] + +(61) Scan parquet default.catalog_returns +Output [2]: [cr_item_sk#39, cr_order_number#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 10] +Input [2]: [cr_item_sk#39, cr_order_number#40] + +(63) Filter [codegen id : 10] +Input [2]: [cr_item_sk#39, cr_order_number#40] +Condition : (isnotnull(cr_item_sk#39) AND isnotnull(cr_order_number#40)) + +(64) BroadcastExchange +Input [2]: [cr_item_sk#39, cr_order_number#40] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#41] + +(65) BroadcastHashJoin [codegen id : 11] +Left keys [2]: [cs_item_sk#5, cs_order_number#7] +Right keys [2]: [cr_item_sk#39, cr_order_number#40] +Join condition: None + +(66) Project [codegen id : 11] +Output [3]: [w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#15, i_item_desc#18, d_week_seq#28, cr_item_sk#39, cr_order_number#40] + +(67) HashAggregate [codegen id : 11] +Input [3]: [w_warehouse_name#15, i_item_desc#18, d_week_seq#28] +Keys [3]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#42] +Results [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] + +(68) Exchange +Input [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] +Arguments: hashpartitioning(i_item_desc#18, w_warehouse_name#15, d_week_seq#28, 5), true, [id=#44] + +(69) HashAggregate [codegen id : 12] +Input [4]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count#43] +Keys [3]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#45] +Results [6]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, count(1)#45 AS no_promo#46, count(1)#45 AS promo#47, count(1)#45 AS total_cnt#48] + +(70) TakeOrderedAndProject +Input [6]: [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, no_promo#46, promo#47, total_cnt#48] +Arguments: 100, [total_cnt#48 DESC NULLS LAST, i_item_desc#18 ASC NULLS FIRST, w_warehouse_name#15 ASC NULLS FIRST, d_week_seq#28 ASC NULLS FIRST], [i_item_desc#18, w_warehouse_name#15, d_week_seq#28, no_promo#46, promo#47, total_cnt#48] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt index 5a0b8c54a..25f03cbac 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q72/simplified.txt @@ -1,90 +1,104 @@ -TakeOrderedAndProject [d_week_seq,i_item_desc,no_promo,promo,total_cnt,w_warehouse_name] - WholeStageCodegen - HashAggregate [count,count(1),d_week_seq,i_item_desc,w_warehouse_name] [count,count(1),no_promo,promo,total_cnt] +TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_promo,promo] + WholeStageCodegen (12) + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq,count] [count(1),no_promo,promo,total_cnt,count] InputAdapter - Exchange [d_week_seq,i_item_desc,w_warehouse_name] #1 - WholeStageCodegen - HashAggregate [count,count,d_week_seq,i_item_desc,w_warehouse_name] [count,count] - Project [d_week_seq,i_item_desc,w_warehouse_name] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_item_sk,cs_order_number,d_week_seq,i_item_desc,w_warehouse_name] + Exchange [i_item_desc,w_warehouse_name,d_week_seq] #1 + WholeStageCodegen (11) + HashAggregate [i_item_desc,w_warehouse_name,d_week_seq] [count,count] + Project [w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cs_item_sk,cs_order_number,cs_promo_sk,d_week_seq,i_item_desc,w_warehouse_name] - BroadcastHashJoin [cs_ship_date_sk,d_date,d_date,d_date_sk] - Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,w_warehouse_name] - BroadcastHashJoin [d_date_sk,d_week_seq,d_week_seq,inv_date_sk] - Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,d_date,d_week_seq,i_item_desc,inv_date_sk,w_warehouse_name] + Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq] + BroadcastHashJoin [cs_ship_date_sk,d_date_sk,d_date,d_date] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_date,d_week_seq] + BroadcastHashJoin [d_week_seq,inv_date_sk,d_week_seq,d_date_sk] + Project [cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc,d_date,d_week_seq] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] + Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc] BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk] - Project [cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] - BroadcastHashJoin [cd_demo_sk,cs_bill_cdemo_sk] - Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,i_item_desc,inv_date_sk,w_warehouse_name] + Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc] + BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk] + Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name,i_item_desc] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,w_warehouse_name] + Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,w_warehouse_name] BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk] - Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_ship_date_sk,cs_sold_date_sk,inv_date_sk,inv_warehouse_sk] - BroadcastHashJoin [cs_item_sk,cs_quantity,inv_item_sk,inv_quantity_on_hand] - Project [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] - Filter [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] [cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_order_number,cs_promo_sk,cs_quantity,cs_ship_date_sk,cs_sold_date_sk] + Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,inv_date_sk,inv_warehouse_sk] + BroadcastHashJoin [cs_item_sk,inv_item_sk,inv_quantity_on_hand,cs_quantity] + Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] [inv_date_sk,inv_item_sk,inv_quantity_on_hand,inv_warehouse_sk] + WholeStageCodegen (1) + Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] + WholeStageCodegen (2) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [i_item_desc,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_item_desc,i_item_sk] [i_item_desc,i_item_sk] + WholeStageCodegen (3) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_desc] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) Project [cd_demo_sk] - Filter [cd_demo_sk,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] [cd_demo_sk,cd_marital_status] + Filter [cd_marital_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (5) Project [hd_demo_sk] Filter [hd_buy_potential,hd_demo_sk] - Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [d_date,d_date_sk,d_week_seq] - Filter [d_date_sk,d_week_seq,d_year] - Scan parquet default.date_dim [d_date,d_date_sk,d_week_seq,d_year] [d_date,d_date_sk,d_week_seq,d_year] + WholeStageCodegen (6) + Project [d_date_sk,d_date,d_week_seq] + Filter [d_year,d_date_sk,d_week_seq,d_date] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year] InputAdapter BroadcastExchange #8 - WholeStageCodegen - Project [d_date_sk,d_week_seq] - Filter [d_date_sk,d_week_seq] - Scan parquet default.date_dim [d_date_sk,d_week_seq] [d_date_sk,d_week_seq] + WholeStageCodegen (7) + Filter [d_week_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_week_seq] InputAdapter BroadcastExchange #9 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + WholeStageCodegen (8) + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #10 - WholeStageCodegen - Project [p_promo_sk] - Filter [p_promo_sk] - Scan parquet default.promotion [p_promo_sk] [p_promo_sk] + WholeStageCodegen (9) + Filter [p_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk] InputAdapter BroadcastExchange #11 - WholeStageCodegen - Project [cr_item_sk,cr_order_number] - Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] [cr_item_sk,cr_order_number] + WholeStageCodegen (10) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt index a444e7bf3..f4565c3ed 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/explain.txt @@ -1,34 +1,203 @@ == Physical Plan == -*(7) Sort [cnt#1 DESC NULLS LAST], true, 0 -+- Exchange rangepartitioning(cnt#1 DESC NULLS LAST, 5) - +- *(6) Project [c_last_name#2, c_first_name#3, c_salutation#4, c_preferred_cust_flag#5, ss_ticket_number#6, cnt#1] - +- *(6) BroadcastHashJoin [ss_customer_sk#7], [c_customer_sk#8], Inner, BuildRight - :- *(6) Filter ((cnt#1 >= 1) && (cnt#1 <= 5)) - : +- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[count(1)]) - : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#7, 5) - : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#7], functions=[partial_count(1)]) - : +- *(4) Project [ss_customer_sk#7, ss_ticket_number#6] - : +- *(4) BroadcastHashJoin [ss_hdemo_sk#9], [hd_demo_sk#10], Inner, BuildRight - : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_ticket_number#6] - : : +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight - : : :- *(4) Project [ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#6] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight - : : : :- *(4) Project [ss_sold_date_sk#13, ss_customer_sk#7, ss_hdemo_sk#9, ss_store_sk#11, ss_ticket_number#6] - : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#13) && isnotnull(ss_store_sk#11)) && isnotnull(ss_hdemo_sk#9)) && isnotnull(ss_customer_sk#7)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_customer_sk#7,ss_hdemo_sk#9,ss_store_sk#11,ss_ticket_number#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#14] - : : : +- *(1) Filter ((((isnotnull(d_dom#15) && (d_dom#15 >= 1)) && (d_dom#15 <= 2)) && d_year#16 IN (1999,2000,2001)) && isnotnull(d_date_sk#14)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#14,d_year#16,d_dom#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [s_store_sk#12] - : : +- *(2) Filter (s_county#17 IN (Williamson County,Franklin Parish,Bronx County,Orange County) && isnotnull(s_store_sk#12)) - : : +- *(2) FileScan parquet default.store[s_store_sk#12,s_county#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [In(s_county, [Williamson County,Franklin Parish,Bronx County,Orange County]), IsNotNull(s_store_..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [hd_demo_sk#10] - : +- *(3) Filter ((((isnotnull(hd_vehicle_count#18) && ((hd_buy_potential#19 = >10000) || (hd_buy_potential#19 = unknown))) && (hd_vehicle_count#18 > 0)) && (CASE WHEN (hd_vehicle_count#18 > 0) THEN (cast(hd_dep_count#20 as double) / cast(hd_vehicle_count#18 as double)) ELSE null END > 1.0)) && isnotnull(hd_demo_sk#10)) - : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#10,hd_buy_potential#19,hd_dep_count#20,hd_vehicle_count#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknow..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [c_customer_sk#8, c_salutation#4, c_first_name#3, c_last_name#2, c_preferred_cust_flag#5] - +- *(5) Filter isnotnull(c_customer_sk#8) - +- *(5) FileScan parquet default.customer[c_customer_sk#8,c_salutation#4,c_first_name#3,c_last_name#2,c_preferred_cust_flag#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] + +(3) Filter [codegen id : 4] +Input [5]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#6, d_year#7, d_dom#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] +Condition : ((((isnotnull(d_dom#8) AND (d_dom#8 >= 1)) AND (d_dom#8 <= 2)) AND d_year#7 IN (1999,2000,2001)) AND isnotnull(d_date_sk#6)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#6] +Input [3]: [d_date_sk#6, d_year#7, d_dom#8] + +(8) BroadcastExchange +Input [1]: [d_date_sk#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5] +Input [6]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, d_date_sk#6] + +(11) Scan parquet default.store +Output [2]: [s_store_sk#10, s_county#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [In(s_county, [Williamson County,Franklin Parish,Bronx County,Orange County]), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] + +(13) Filter [codegen id : 2] +Input [2]: [s_store_sk#10, s_county#11] +Condition : (s_county#11 IN (Williamson County,Franklin Parish,Bronx County,Orange County) AND isnotnull(s_store_sk#10)) + +(14) Project [codegen id : 2] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_county#11] + +(15) BroadcastExchange +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#10] +Join condition: None + +(17) Project [codegen id : 4] +Output [3]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5] +Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, s_store_sk#10] + +(18) Scan parquet default.household_demographics +Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(20) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#13] +Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [2]: [ss_customer_sk#2, ss_ticket_number#5] +Input [4]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_ticket_number#5, hd_demo_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [2]: [ss_customer_sk#2, ss_ticket_number#5] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#18] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] + +(26) Exchange +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] + +(27) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] +Keys [2]: [ss_ticket_number#5, ss_customer_sk#2] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#21] +Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count(1)#21 AS cnt#22] + +(28) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] +Condition : ((cnt#22 >= 1) AND (cnt#22 <= 5)) + +(29) Scan parquet default.customer +Output [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(31) Filter [codegen id : 5] +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Condition : isnotnull(c_customer_sk#23) + +(32) BroadcastExchange +Input [5]: [c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#23] +Join condition: None + +(34) Project [codegen id : 6] +Output [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_salutation#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27] + +(35) Exchange +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29] + +(36) Sort [codegen id : 7] +Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] +Arguments: [cnt#22 DESC NULLS LAST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/simplified.txt index 15bb10f43..46b724156 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q73/simplified.txt @@ -1,46 +1,54 @@ -WholeStageCodegen +WholeStageCodegen (7) Sort [cnt] InputAdapter Exchange [cnt] #1 - WholeStageCodegen - Project [c_first_name,c_last_name,c_preferred_cust_flag,c_salutation,cnt,ss_ticket_number] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] + WholeStageCodegen (6) + Project [c_last_name,c_first_name,c_salutation,c_preferred_cust_flag,ss_ticket_number,cnt] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] Filter [cnt] - HashAggregate [count,count(1),ss_customer_sk,ss_ticket_number] [cnt,count,count(1)] + HashAggregate [ss_ticket_number,ss_customer_sk,count] [count(1),cnt,count] InputAdapter - Exchange [ss_customer_sk,ss_ticket_number] #2 - WholeStageCodegen - HashAggregate [count,count,ss_customer_sk,ss_ticket_number] [count,count] + Exchange [ss_ticket_number,ss_customer_sk] #2 + WholeStageCodegen (4) + HashAggregate [ss_ticket_number,ss_customer_sk] [count,count] Project [ss_customer_sk,ss_ticket_number] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_ticket_number] - BroadcastHashJoin [s_store_sk,ss_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_dom,d_year] - Scan parquet default.date_dim [d_date_sk,d_dom,d_year] [d_date_sk,d_dom,d_year] + Filter [d_dom,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_dom] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [s_store_sk] Filter [s_county,s_store_sk] - Scan parquet default.store [s_county,s_store_sk] [s_county,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_county] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] - Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_buy_potential,hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] [c_customer_sk,c_first_name,c_last_name,c_preferred_cust_flag,c_salutation] + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/explain.txt index 8ba775ee8..4c9bad96e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/explain.txt @@ -1,80 +1,410 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[customer_id#1 ASC NULLS FIRST,customer_id#1 ASC NULLS FIRST,customer_id#1 ASC NULLS FIRST], output=[customer_id#1,customer_first_name#2,customer_last_name#3]) -+- *(17) Project [customer_id#1, customer_first_name#2, customer_last_name#3] - +- *(17) BroadcastHashJoin [customer_id#4], [customer_id#5], Inner, BuildRight, (CASE WHEN (year_total#6 > 0.00) THEN CheckOverflow((promote_precision(year_total#7) / promote_precision(year_total#6)), DecimalType(37,20)) ELSE null END > CASE WHEN (year_total#8 > 0.00) THEN CheckOverflow((promote_precision(year_total#9) / promote_precision(year_total#8)), DecimalType(37,20)) ELSE null END) - :- *(17) Project [customer_id#4, year_total#8, customer_id#1, customer_first_name#2, customer_last_name#3, year_total#9, year_total#6] - : +- *(17) BroadcastHashJoin [customer_id#4], [customer_id#10], Inner, BuildRight - : :- *(17) BroadcastHashJoin [customer_id#4], [customer_id#1], Inner, BuildRight - : : :- Union - : : : :- *(4) Filter (isnotnull(year_total#8) && (year_total#8 > 0.00)) - : : : : +- *(4) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) - : : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) - : : : : +- *(3) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) - : : : : +- *(3) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] - : : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight - : : : : :- *(3) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_sold_date_sk#16, ss_net_paid#15] - : : : : : +- *(3) BroadcastHashJoin [c_customer_sk#18], [ss_customer_sk#19], Inner, BuildRight - : : : : : :- *(3) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] - : : : : : : +- *(3) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) - : : : : : : +- *(3) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : : : +- *(1) Project [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15] - : : : : : +- *(1) Filter (isnotnull(ss_customer_sk#19) && isnotnull(ss_sold_date_sk#16)) - : : : : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#16,ss_customer_sk#19,ss_net_paid#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(2) Project [d_date_sk#17, d_year#14] - : : : : +- *(2) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2001)) && isnotnull(d_date_sk#17)) - : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2001), IsNotNull(d_date_sk)], ReadSchema: struct - : : : +- LocalTableScan , [customer_id#20, year_total#21] - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : : +- Union - : : :- *(8) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ss_net_paid#15))]) - : : : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) - : : : +- *(7) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ss_net_paid#15))]) - : : : +- *(7) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_net_paid#15, d_year#14] - : : : +- *(7) BroadcastHashJoin [ss_sold_date_sk#16], [d_date_sk#17], Inner, BuildRight - : : : :- *(7) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ss_sold_date_sk#16, ss_net_paid#15] - : : : : +- *(7) BroadcastHashJoin [c_customer_sk#18], [ss_customer_sk#19], Inner, BuildRight - : : : : :- *(7) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] - : : : : : +- *(7) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) - : : : : : +- *(7) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : : : +- ReusedExchange [ss_sold_date_sk#16, ss_customer_sk#19, ss_net_paid#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(6) Project [d_date_sk#17, d_year#14] - : : : +- *(6) Filter (((isnotnull(d_year#14) && d_year#14 IN (2001,2002)) && (d_year#14 = 2002)) && isnotnull(d_date_sk#17)) - : : : +- *(6) FileScan parquet default.date_dim[d_date_sk#17,d_year#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), In(d_year, [2001,2002]), EqualTo(d_year,2002), IsNotNull(d_date_sk)], ReadSchema: struct - : : +- LocalTableScan , [customer_id#20, customer_first_name#22, customer_last_name#23, year_total#21] - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- Union - : :- LocalTableScan , [customer_id#10, year_total#6] - : +- *(12) Filter (isnotnull(year_total#21) && (year_total#21 > 0.00)) - : +- *(12) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) - : +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) - : +- *(11) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) - : +- *(11) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] - : +- *(11) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight - : :- *(11) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_sold_date_sk#25, ws_net_paid#24] - : : +- *(11) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight - : : :- *(11) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] - : : : +- *(11) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) - : : : +- *(11) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- *(9) Project [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24] - : : +- *(9) Filter (isnotnull(ws_bill_customer_sk#26) && isnotnull(ws_sold_date_sk#25)) - : : +- *(9) FileScan parquet default.web_sales[ws_sold_date_sk#25,ws_bill_customer_sk#26,ws_net_paid#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- Union - :- LocalTableScan , [customer_id#5, year_total#7] - +- *(16) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[sum(UnscaledValue(ws_net_paid#24))]) - +- Exchange hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14, 5) - +- *(15) HashAggregate(keys=[c_customer_id#11, c_first_name#12, c_last_name#13, d_year#14], functions=[partial_sum(UnscaledValue(ws_net_paid#24))]) - +- *(15) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_net_paid#24, d_year#14] - +- *(15) BroadcastHashJoin [ws_sold_date_sk#25], [d_date_sk#17], Inner, BuildRight - :- *(15) Project [c_customer_id#11, c_first_name#12, c_last_name#13, ws_sold_date_sk#25, ws_net_paid#24] - : +- *(15) BroadcastHashJoin [c_customer_sk#18], [ws_bill_customer_sk#26], Inner, BuildRight - : :- *(15) Project [c_customer_sk#18, c_customer_id#11, c_first_name#12, c_last_name#13] - : : +- *(15) Filter (isnotnull(c_customer_sk#18) && isnotnull(c_customer_id#11)) - : : +- *(15) FileScan parquet default.customer[c_customer_sk#18,c_customer_id#11,c_first_name#12,c_last_name#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)], ReadSchema: struct - : +- ReusedExchange [ws_sold_date_sk#25, ws_bill_customer_sk#26, ws_net_paid#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - +- ReusedExchange [d_date_sk#17, d_year#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (72) ++- * Project (71) + +- * BroadcastHashJoin Inner BuildRight (70) + :- * Project (56) + : +- * BroadcastHashJoin Inner BuildRight (55) + : :- * BroadcastHashJoin Inner BuildRight (36) + : : :- * Filter (19) + : : : +- * HashAggregate (18) + : : : +- Exchange (17) + : : : +- * HashAggregate (16) + : : : +- * Project (15) + : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.store_sales (4) + : : : +- BroadcastExchange (13) + : : : +- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (35) + : : +- * HashAggregate (34) + : : +- Exchange (33) + : : +- * HashAggregate (32) + : : +- * Project (31) + : : +- * BroadcastHashJoin Inner BuildRight (30) + : : :- * Project (25) + : : : +- * BroadcastHashJoin Inner BuildRight (24) + : : : :- * Filter (22) + : : : : +- * ColumnarToRow (21) + : : : : +- Scan parquet default.customer (20) + : : : +- ReusedExchange (23) + : : +- BroadcastExchange (29) + : : +- * Filter (28) + : : +- * ColumnarToRow (27) + : : +- Scan parquet default.date_dim (26) + : +- BroadcastExchange (54) + : +- * Project (53) + : +- * Filter (52) + : +- * HashAggregate (51) + : +- Exchange (50) + : +- * HashAggregate (49) + : +- * Project (48) + : +- * BroadcastHashJoin Inner BuildRight (47) + : :- * Project (45) + : : +- * BroadcastHashJoin Inner BuildRight (44) + : : :- * Filter (39) + : : : +- * ColumnarToRow (38) + : : : +- Scan parquet default.customer (37) + : : +- BroadcastExchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.web_sales (40) + : +- ReusedExchange (46) + +- BroadcastExchange (69) + +- * HashAggregate (68) + +- Exchange (67) + +- * HashAggregate (66) + +- * Project (65) + +- * BroadcastHashJoin Inner BuildRight (64) + :- * Project (62) + : +- * BroadcastHashJoin Inner BuildRight (61) + : :- * Filter (59) + : : +- * ColumnarToRow (58) + : : +- Scan parquet default.customer (57) + : +- ReusedExchange (60) + +- ReusedExchange (63) + + +(1) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(3) Filter [codegen id : 3] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(4) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(6) Filter [codegen id : 1] +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_sold_date_sk#5)) + +(7) BroadcastExchange +Input [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#6] +Join condition: None + +(9) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#9, d_year#10] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#9, d_year#10] +Condition : (((isnotnull(d_year#10) AND (d_year#10 = 2001)) AND d_year#10 IN (2001,2002)) AND isnotnull(d_date_sk#9)) + +(13) BroadcastExchange +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(15) Project [codegen id : 3] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7, d_date_sk#9, d_year#10] + +(16) HashAggregate [codegen id : 3] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum#12] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] + +(17) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#14] + +(18) HashAggregate [codegen id : 16] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#13] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#7))#15] +Results [2]: [c_customer_id#2 AS customer_id#16, MakeDecimal(sum(UnscaledValue(ss_net_paid#7))#15,17,2) AS year_total#17] + +(19) Filter [codegen id : 16] +Input [2]: [customer_id#16, year_total#17] +Condition : (isnotnull(year_total#17) AND (year_total#17 > 0.00)) + +(20) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(22) Filter [codegen id : 6] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(23) ReusedExchange [Reuses operator id: 7] +Output [3]: [ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ss_customer_sk#6] +Join condition: None + +(25) Project [codegen id : 6] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_customer_sk#6, ss_net_paid#7] + +(26) Scan parquet default.date_dim +Output [2]: [d_date_sk#9, d_year#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 5] +Input [2]: [d_date_sk#9, d_year#10] + +(28) Filter [codegen id : 5] +Input [2]: [d_date_sk#9, d_year#10] +Condition : (((isnotnull(d_year#10) AND (d_year#10 = 2002)) AND d_year#10 IN (2001,2002)) AND isnotnull(d_date_sk#9)) + +(29) BroadcastExchange +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(30) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(31) Project [codegen id : 6] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_sold_date_sk#5, ss_net_paid#7, d_date_sk#9, d_year#10] + +(32) HashAggregate [codegen id : 6] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#7, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum#19] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] + +(33) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#21] + +(34) HashAggregate [codegen id : 7] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#20] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ss_net_paid#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#7))#22] +Results [4]: [c_customer_id#2 AS customer_id#23, c_first_name#3 AS customer_first_name#24, c_last_name#4 AS customer_last_name#25, MakeDecimal(sum(UnscaledValue(ss_net_paid#7))#22,17,2) AS year_total#26] + +(35) BroadcastExchange +Input [4]: [customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] + +(36) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#16] +Right keys [1]: [customer_id#23] +Join condition: None + +(37) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(38) ColumnarToRow [codegen id : 10] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(39) Filter [codegen id : 10] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(40) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 8] +Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(42) Filter [codegen id : 8] +Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] +Condition : (isnotnull(ws_bill_customer_sk#29) AND isnotnull(ws_sold_date_sk#28)) + +(43) BroadcastExchange +Input [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#31] + +(44) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#29] +Join condition: None + +(45) Project [codegen id : 10] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(46) ReusedExchange [Reuses operator id: 13] +Output [2]: [d_date_sk#9, d_year#10] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(48) Project [codegen id : 10] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30, d_date_sk#9, d_year#10] + +(49) HashAggregate [codegen id : 10] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum#32] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] + +(50) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#34] + +(51) HashAggregate [codegen id : 11] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#33] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#30))#35] +Results [2]: [c_customer_id#2 AS customer_id#36, MakeDecimal(sum(UnscaledValue(ws_net_paid#30))#35,17,2) AS year_total#37] + +(52) Filter [codegen id : 11] +Input [2]: [customer_id#36, year_total#37] +Condition : (isnotnull(year_total#37) AND (year_total#37 > 0.00)) + +(53) Project [codegen id : 11] +Output [2]: [customer_id#36 AS customer_id#38, year_total#37 AS year_total#39] +Input [2]: [customer_id#36, year_total#37] + +(54) BroadcastExchange +Input [2]: [customer_id#38, year_total#39] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] + +(55) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#16] +Right keys [1]: [customer_id#38] +Join condition: None + +(56) Project [codegen id : 16] +Output [7]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#39] +Input [8]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, customer_id#38, year_total#39] + +(57) Scan parquet default.customer +Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] +ReadSchema: struct + +(58) ColumnarToRow [codegen id : 14] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] + +(59) Filter [codegen id : 14] +Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4] +Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2)) + +(60) ReusedExchange [Reuses operator id: 43] +Output [3]: [ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#1] +Right keys [1]: [ws_bill_customer_sk#29] +Join condition: None + +(62) Project [codegen id : 14] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30] +Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_bill_customer_sk#29, ws_net_paid#30] + +(63) ReusedExchange [Reuses operator id: 29] +Output [2]: [d_date_sk#9, d_year#10] + +(64) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(65) Project [codegen id : 14] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_sold_date_sk#28, ws_net_paid#30, d_date_sk#9, d_year#10] + +(66) HashAggregate [codegen id : 14] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ws_net_paid#30, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum#41] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] + +(67) Exchange +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), true, [id=#43] + +(68) HashAggregate [codegen id : 15] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#42] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] +Functions [1]: [sum(UnscaledValue(ws_net_paid#30))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#30))#44] +Results [2]: [c_customer_id#2 AS customer_id#45, MakeDecimal(sum(UnscaledValue(ws_net_paid#30))#44,17,2) AS year_total#46] + +(69) BroadcastExchange +Input [2]: [customer_id#45, year_total#46] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#47] + +(70) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [customer_id#16] +Right keys [1]: [customer_id#45] +Join condition: (CASE WHEN (year_total#39 > 0.00) THEN CheckOverflow((promote_precision(year_total#46) / promote_precision(year_total#39)), DecimalType(37,20), true) ELSE null END > CASE WHEN (year_total#17 > 0.00) THEN CheckOverflow((promote_precision(year_total#26) / promote_precision(year_total#17)), DecimalType(37,20), true) ELSE null END) + +(71) Project [codegen id : 16] +Output [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] +Input [9]: [customer_id#16, year_total#17, customer_id#23, customer_first_name#24, customer_last_name#25, year_total#26, year_total#39, customer_id#45, year_total#46] + +(72) TakeOrderedAndProject +Input [3]: [customer_id#23, customer_first_name#24, customer_last_name#25] +Arguments: 100, [customer_id#23 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST, customer_id#23 ASC NULLS FIRST], [customer_id#23, customer_first_name#24, customer_last_name#25] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt index 5167a92d5..add2d43fc 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q74/simplified.txt @@ -1,108 +1,107 @@ -TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name] - WholeStageCodegen - Project [customer_first_name,customer_id,customer_last_name] +TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name] + WholeStageCodegen (16) + Project [customer_id,customer_first_name,customer_last_name] BroadcastHashJoin [customer_id,customer_id,year_total,year_total,year_total,year_total] - Project [customer_first_name,customer_id,customer_id,customer_last_name,year_total,year_total,year_total] + Project [customer_id,year_total,customer_id,customer_first_name,customer_last_name,year_total,year_total] BroadcastHashJoin [customer_id,customer_id] BroadcastHashJoin [customer_id,customer_id] - InputAdapter - Union - WholeStageCodegen - Filter [year_total] - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] [customer_id,sum,sum(UnscaledValue(ss_net_paid)),year_total] - InputAdapter - Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid,sum,sum] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] + Filter [year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #1 + WholeStageCodegen (3) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_sold_date_sk,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (1) + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_net_paid] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow InputAdapter - BroadcastExchange #2 - WholeStageCodegen - Project [ss_customer_sk,ss_net_paid,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk] [ss_customer_sk,ss_net_paid,ss_sold_date_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [customer_id,year_total] [customer_id,year_total] + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - Union - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ss_net_paid))] [customer_first_name,customer_id,customer_last_name,sum,sum(UnscaledValue(ss_net_paid)),year_total] - InputAdapter - Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid,sum,sum] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,ss_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] - InputAdapter - ReusedExchange [ss_customer_sk,ss_net_paid,ss_sold_date_sk] [ss_customer_sk,ss_net_paid,ss_sold_date_sk] #2 - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] - LocalTableScan [customer_first_name,customer_id,customer_last_name,year_total] [customer_first_name,customer_id,customer_last_name,year_total] + WholeStageCodegen (7) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ss_net_paid)),customer_id,customer_first_name,customer_last_name,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #5 + WholeStageCodegen (6) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ss_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ss_net_paid,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ss_sold_date_sk,ss_net_paid] + BroadcastHashJoin [c_customer_sk,ss_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ss_sold_date_sk,ss_customer_sk,ss_net_paid] #2 + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #7 - Union - LocalTableScan [customer_id,year_total] [customer_id,year_total] - WholeStageCodegen + WholeStageCodegen (11) + Project [customer_id,year_total] Filter [year_total] - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] InputAdapter Exchange [c_customer_id,c_first_name,c_last_name,d_year] #8 - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum,ws_net_paid] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] + WholeStageCodegen (10) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_sold_date_sk,ws_net_paid] BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] InputAdapter BroadcastExchange #9 - WholeStageCodegen - Project [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] + WholeStageCodegen (8) + Filter [ws_bill_customer_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 + ReusedExchange [d_date_sk,d_year] #3 InputAdapter BroadcastExchange #10 - Union - LocalTableScan [customer_id,year_total] [customer_id,year_total] - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum(UnscaledValue(ws_net_paid))] [customer_id,sum,sum(UnscaledValue(ws_net_paid)),year_total] - InputAdapter - Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 - WholeStageCodegen - HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum,sum,ws_net_paid] [sum,sum] - Project [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [c_customer_id,c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_id,c_customer_sk] - Scan parquet default.customer [c_customer_id,c_customer_sk,c_first_name,c_last_name] [c_customer_id,c_customer_sk,c_first_name,c_last_name] - InputAdapter - ReusedExchange [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk] #9 - InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #6 + WholeStageCodegen (15) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum] + InputAdapter + Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11 + WholeStageCodegen (14) + HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum] + Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [c_customer_id,c_first_name,c_last_name,ws_sold_date_sk,ws_net_paid] + BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] + Filter [c_customer_sk,c_customer_id] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name] + InputAdapter + ReusedExchange [ws_sold_date_sk,ws_bill_customer_sk,ws_net_paid] #9 + InputAdapter + ReusedExchange [d_date_sk,d_year] #6 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt index e230db432..292a44930 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/explain.txt @@ -1,117 +1,647 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[sales_cnt_diff#1 ASC NULLS FIRST], output=[prev_year#2,year#3,i_brand_id#4,i_class_id#5,i_category_id#6,i_manufact_id#7,prev_yr_cnt#8,curr_yr_cnt#9,sales_cnt_diff#1,sales_amt_diff#10]) -+- *(34) Project [d_year#11 AS prev_year#2, d_year#12 AS year#3, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#13 AS prev_yr_cnt#8, sales_cnt#14 AS curr_yr_cnt#9, (sales_cnt#14 - sales_cnt#13) AS sales_cnt_diff#1, CheckOverflow((promote_precision(cast(sales_amt#15 as decimal(19,2))) - promote_precision(cast(sales_amt#16 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#10] - +- *(34) BroadcastHashJoin [i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], [i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], Inner, BuildRight, (CheckOverflow((promote_precision(cast(sales_cnt#14 as decimal(17,2))) / promote_precision(cast(sales_cnt#13 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000) - :- *(34) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) - : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, 5) - : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) - : +- *(16) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) - : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 5) - : +- *(15) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) - : +- Union - : :- *(10) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) - : : +- Exchange hashpartitioning(d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22, 5) - : : +- *(9) HashAggregate(keys=[d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, sales_cnt#21, sales_amt#22], functions=[]) - : : +- Union - : : :- *(4) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] - : : : +- *(4) BroadcastHashJoin [cs_order_number#27, cs_item_sk#28], [cr_order_number#29, cr_item_sk#30], LeftOuter, BuildRight - : : : :- *(4) Project [cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] - : : : : +- *(4) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#32], Inner, BuildRight - : : : : :- *(4) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] - : : : : : +- *(4) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#33], Inner, BuildRight - : : : : : :- *(4) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] - : : : : : : +- *(4) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) - : : : : : : +- *(4) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) - : : : +- *(3) Project [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26] - : : : +- *(3) Filter (isnotnull(cr_item_sk#30) && isnotnull(cr_order_number#29)) - : : : +- *(3) FileScan parquet default.catalog_returns[cr_item_sk#30,cr_order_number#29,cr_return_quantity#24,cr_return_amount#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)], ReadSchema: struct - : : +- *(8) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] - : : +- *(8) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight - : : :- *(8) Project [ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] - : : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#45], [d_date_sk#32], Inner, BuildRight - : : : :- *(8) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] - : : : : +- *(8) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#33], Inner, BuildRight - : : : : :- *(8) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] - : : : : : +- *(8) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) - : : : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : +- *(14) Project [d_year#12, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, (ws_quantity#46 - coalesce(wr_return_quantity#47, 0)) AS sales_cnt#48, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#50, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#51] - : +- *(14) BroadcastHashJoin [cast(ws_order_number#52 as bigint), cast(ws_item_sk#53 as bigint)], [wr_order_number#54, wr_item_sk#55], LeftOuter, BuildRight - : :- *(14) Project [ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7, d_year#12] - : : +- *(14) BroadcastHashJoin [ws_sold_date_sk#56], [d_date_sk#32], Inner, BuildRight - : : :- *(14) Project [ws_sold_date_sk#56, ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49, i_brand_id#4, i_class_id#5, i_category_id#6, i_manufact_id#7] - : : : +- *(14) BroadcastHashJoin [ws_item_sk#53], [i_item_sk#33], Inner, BuildRight - : : : :- *(14) Project [ws_sold_date_sk#56, ws_item_sk#53, ws_order_number#52, ws_quantity#46, ws_ext_sales_price#49] - : : : : +- *(14) Filter (isnotnull(ws_item_sk#53) && isnotnull(ws_sold_date_sk#56)) - : : : : +- *(14) FileScan parquet default.web_sales[ws_sold_date_sk#56,ws_item_sk#53,ws_order_number#52,ws_quantity#46,ws_ext_sales_price#49] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true])) - +- *(33) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]) - +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, 5) - +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20], functions=[partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]) - +- *(32) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) - +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 5) - +- *(31) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) - +- Union - :- *(26) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) - : +- Exchange hashpartitioning(d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22, 5) - : +- *(25) HashAggregate(keys=[d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, sales_cnt#21, sales_amt#22], functions=[]) - : +- Union - : :- *(20) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (cs_quantity#23 - coalesce(cr_return_quantity#24, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#26, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#22] - : : +- *(20) BroadcastHashJoin [cs_order_number#27, cs_item_sk#28], [cr_order_number#29, cr_item_sk#30], LeftOuter, BuildRight - : : :- *(20) Project [cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, d_year#11] - : : : +- *(20) BroadcastHashJoin [cs_sold_date_sk#31], [d_date_sk#57], Inner, BuildRight - : : : :- *(20) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20] - : : : : +- *(20) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#58], Inner, BuildRight - : : : : :- *(20) Project [cs_sold_date_sk#31, cs_item_sk#28, cs_order_number#27, cs_quantity#23, cs_ext_sales_price#25] - : : : : : +- *(20) Filter (isnotnull(cs_item_sk#28) && isnotnull(cs_sold_date_sk#31)) - : : : : : +- *(20) FileScan parquet default.catalog_sales[cs_sold_date_sk#31,cs_item_sk#28,cs_order_number#27,cs_quantity#23,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [cr_item_sk#30, cr_order_number#29, cr_return_quantity#24, cr_return_amount#26], BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295)))) - : +- *(24) Project [d_year#11, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, (ss_quantity#35 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#37, CheckOverflow((promote_precision(cast(ss_ext_sales_price#38 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#39, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] - : +- *(24) BroadcastHashJoin [cast(ss_ticket_number#41 as bigint), cast(ss_item_sk#42 as bigint)], [sr_ticket_number#43, sr_item_sk#44], LeftOuter, BuildRight - : :- *(24) Project [ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20, d_year#11] - : : +- *(24) BroadcastHashJoin [ss_sold_date_sk#45], [d_date_sk#57], Inner, BuildRight - : : :- *(24) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38, i_brand_id#17, i_class_id#18, i_category_id#19, i_manufact_id#20] - : : : +- *(24) BroadcastHashJoin [ss_item_sk#42], [i_item_sk#58], Inner, BuildRight - : : : :- *(24) Project [ss_sold_date_sk#45, ss_item_sk#42, ss_ticket_number#41, ss_quantity#35, ss_ext_sales_price#38] - : : : : +- *(24) Filter (isnotnull(ss_item_sk#42) && isnotnull(ss_sold_date_sk#45)) - : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_date_sk#45,ss_item_sk#42,ss_ticket_number#41,ss_quantity#35,ss_ext_sales_price#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] + +(3) Filter [codegen id : 4] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Books), IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id), IsNotNull(i_manufact_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] +Condition : ((((((isnotnull(i_category#10) AND (i_category#10 = Books)) AND isnotnull(i_item_sk#6)) AND isnotnull(i_brand_id#7)) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) AND isnotnull(i_manufact_id#11)) + +(7) Project [codegen id : 1] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [6]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_category#10, i_manufact_id#11] + +(8) BroadcastExchange +Input [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(10) Project [codegen id : 4] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_year#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#13, d_year#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(16) Project [codegen id : 4] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(17) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(19) Filter [codegen id : 3] +Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] +Condition : (isnotnull(cr_order_number#17) AND isnotnull(cr_item_sk#16)) + +(20) BroadcastExchange +Input [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#20] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#17, cr_item_sk#16] +Join condition: None + +(22) Project [codegen id : 4] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(23) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 8] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] + +(25) Filter [codegen id : 8] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23)) + +(26) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(27) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_item_sk#24] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(28) Project [codegen id : 8] +Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(29) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#13, d_year#14] + +(30) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#23] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(31) Project [codegen id : 8] +Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(32) Scan parquet default.store_returns +Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(33) ColumnarToRow [codegen id : 7] +Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(34) Filter [codegen id : 7] +Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] +Condition : (isnotnull(sr_ticket_number#29) AND isnotnull(sr_item_sk#28)) + +(35) BroadcastExchange +Input [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#32] + +(36) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)] +Right keys [2]: [sr_ticket_number#29, sr_item_sk#28] +Join condition: None + +(37) Project [codegen id : 8] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#33, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#34] +Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(38) Union + +(39) HashAggregate [codegen id : 9] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(40) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#35] + +(41) HashAggregate [codegen id : 10] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(42) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 14] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] + +(44) Filter [codegen id : 14] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) + +(45) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(46) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_item_sk#37] +Right keys [1]: [i_item_sk#6] +Join condition: None + +(47) Project [codegen id : 14] +Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] + +(48) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#13, d_year#14] + +(49) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ws_sold_date_sk#36] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(50) Project [codegen id : 14] +Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14] +Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14] + +(51) Scan parquet default.web_returns +Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(52) ColumnarToRow [codegen id : 13] +Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(53) Filter [codegen id : 13] +Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +Condition : (isnotnull(wr_order_number#42) AND isnotnull(wr_item_sk#41)) + +(54) BroadcastExchange +Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#45] + +(55) BroadcastHashJoin [codegen id : 14] +Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] +Right keys [2]: [wr_order_number#42, wr_item_sk#41] +Join condition: None + +(56) Project [codegen id : 14] +Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#46, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#47] +Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(57) Union + +(58) HashAggregate [codegen id : 15] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(59) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#48] + +(60) HashAggregate [codegen id : 16] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] + +(61) HashAggregate [codegen id : 16] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22] +Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum#49, sum#50] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] + +(62) Exchange +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#53] + +(63) HashAggregate [codegen id : 34] +Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52] +Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#54, sum(UnscaledValue(sales_amt#22))#55] +Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#54 AS sales_cnt#56, MakeDecimal(sum(UnscaledValue(sales_amt#22))#55,18,2) AS sales_amt#57] + +(64) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(65) ColumnarToRow [codegen id : 20] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] + +(66) Filter [codegen id : 20] +Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5] +Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1)) + +(67) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(68) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [cs_item_sk#2] +Right keys [1]: [i_item_sk#58] +Join condition: None + +(69) Project [codegen id : 20] +Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(70) Scan parquet default.date_dim +Output [2]: [d_date_sk#63, d_year#64] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] +ReadSchema: struct + +(71) ColumnarToRow [codegen id : 18] +Input [2]: [d_date_sk#63, d_year#64] + +(72) Filter [codegen id : 18] +Input [2]: [d_date_sk#63, d_year#64] +Condition : ((isnotnull(d_year#64) AND (d_year#64 = 2001)) AND isnotnull(d_date_sk#63)) + +(73) BroadcastExchange +Input [2]: [d_date_sk#63, d_year#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65] + +(74) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [cs_sold_date_sk#1] +Right keys [1]: [d_date_sk#63] +Join condition: None + +(75) Project [codegen id : 20] +Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] +Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] + +(76) ReusedExchange [Reuses operator id: 20] +Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(77) BroadcastHashJoin [codegen id : 20] +Left keys [2]: [cs_order_number#3, cs_item_sk#2] +Right keys [2]: [cr_order_number#17, cr_item_sk#16] +Join condition: None + +(78) Project [codegen id : 20] +Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22] +Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19] + +(79) Scan parquet default.store_sales +Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(80) ColumnarToRow [codegen id : 24] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] + +(81) Filter [codegen id : 24] +Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27] +Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23)) + +(82) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(83) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_item_sk#24] +Right keys [1]: [i_item_sk#58] +Join condition: None + +(84) Project [codegen id : 24] +Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(85) ReusedExchange [Reuses operator id: 73] +Output [2]: [d_date_sk#63, d_year#64] + +(86) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_sold_date_sk#23] +Right keys [1]: [d_date_sk#63] +Join condition: None + +(87) Project [codegen id : 24] +Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] +Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] + +(88) ReusedExchange [Reuses operator id: 35] +Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(89) BroadcastHashJoin [codegen id : 24] +Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)] +Right keys [2]: [sr_ticket_number#29, sr_item_sk#28] +Join condition: None + +(90) Project [codegen id : 24] +Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67] +Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31] + +(91) Union + +(92) HashAggregate [codegen id : 25] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(93) Exchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#68] + +(94) HashAggregate [codegen id : 26] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(95) Scan parquet default.web_sales +Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(96) ColumnarToRow [codegen id : 30] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] + +(97) Filter [codegen id : 30] +Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40] +Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36)) + +(98) ReusedExchange [Reuses operator id: 8] +Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(99) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [ws_item_sk#37] +Right keys [1]: [i_item_sk#58] +Join condition: None + +(100) Project [codegen id : 30] +Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] + +(101) ReusedExchange [Reuses operator id: 73] +Output [2]: [d_date_sk#63, d_year#64] + +(102) BroadcastHashJoin [codegen id : 30] +Left keys [1]: [ws_sold_date_sk#36] +Right keys [1]: [d_date_sk#63] +Join condition: None + +(103) Project [codegen id : 30] +Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64] +Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64] + +(104) ReusedExchange [Reuses operator id: 54] +Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(105) BroadcastHashJoin [codegen id : 30] +Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)] +Right keys [2]: [wr_order_number#42, wr_item_sk#41] +Join condition: None + +(106) Project [codegen id : 30] +Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#69, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#70] +Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44] + +(107) Union + +(108) HashAggregate [codegen id : 31] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(109) Exchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#71] + +(110) HashAggregate [codegen id : 32] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Functions: [] +Aggregate Attributes: [] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] + +(111) HashAggregate [codegen id : 32] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22] +Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum#72, sum#73] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] + +(112) Exchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] +Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, 5), true, [id=#76] + +(113) HashAggregate [codegen id : 33] +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75] +Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))] +Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#77, sum(UnscaledValue(sales_amt#22))#78] +Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum(cast(sales_cnt#21 as bigint))#77 AS sales_cnt#79, MakeDecimal(sum(UnscaledValue(sales_amt#22))#78,18,2) AS sales_amt#80] + +(114) BroadcastExchange +Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#81] + +(115) BroadcastHashJoin [codegen id : 34] +Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11] +Right keys [4]: [i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62] +Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#56 as decimal(17,2))) / promote_precision(cast(sales_cnt#79 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000) + +(116) Project [codegen id : 34] +Output [10]: [d_year#64 AS prev_year#82, d_year#14 AS year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#79 AS prev_yr_cnt#84, sales_cnt#56 AS curr_yr_cnt#85, (sales_cnt#56 - sales_cnt#79) AS sales_cnt_diff#86, CheckOverflow((promote_precision(cast(sales_amt#57 as decimal(19,2))) - promote_precision(cast(sales_amt#80 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#87] +Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#56, sales_amt#57, d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80] + +(117) TakeOrderedAndProject +Input [10]: [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] +Arguments: 100, [sales_cnt_diff#86 ASC NULLS FIRST], [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/simplified.txt index 8ea410541..298a06b87 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q75/simplified.txt @@ -1,167 +1,180 @@ -TakeOrderedAndProject [curr_yr_cnt,i_brand_id,i_category_id,i_class_id,i_manufact_id,prev_year,prev_yr_cnt,sales_amt_diff,sales_cnt_diff,year] - WholeStageCodegen - Project [d_year,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_amt,sales_cnt,sales_cnt] - BroadcastHashJoin [i_brand_id,i_brand_id,i_category_id,i_category_id,i_class_id,i_class_id,i_manufact_id,i_manufact_id,sales_cnt,sales_cnt] - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] +TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_amt_diff] + WholeStageCodegen (34) + Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt] + BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] InputAdapter - Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #1 - WholeStageCodegen - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt,sum,sum,sum,sum] [sum,sum,sum,sum] - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #1 + WholeStageCodegen (16) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter - Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #2 - WholeStageCodegen - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #2 + WholeStageCodegen (15) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union - WholeStageCodegen - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + WholeStageCodegen (10) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter - Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #3 - WholeStageCodegen - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3 + WholeStageCodegen (9) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union - WholeStageCodegen - Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + WholeStageCodegen (4) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] + Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] - Filter [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] - Scan parquet default.item [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category,i_category_id,i_class_id,i_item_sk,i_manufact_id] + WholeStageCodegen (1) + Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] - Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] - WholeStageCodegen - Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + WholeStageCodegen (3) + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] + WholeStageCodegen (8) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #5 + ReusedExchange [d_date_sk,d_year] #5 InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] - WholeStageCodegen - Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + WholeStageCodegen (7) + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] + WholeStageCodegen (14) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price] InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #5 + ReusedExchange [d_date_sk,d_year] #5 InputAdapter BroadcastExchange #8 - WholeStageCodegen - Project [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] - Filter [wr_item_sk,wr_order_number] - Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] + WholeStageCodegen (13) + Filter [wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] InputAdapter BroadcastExchange #9 - WholeStageCodegen - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] [sales_amt,sales_cnt,sum,sum,sum(UnscaledValue(sales_amt)),sum(cast(sales_cnt as bigint))] + WholeStageCodegen (33) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum] InputAdapter - Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] #10 - WholeStageCodegen - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt,sum,sum,sum,sum] [sum,sum,sum,sum] - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #10 + WholeStageCodegen (32) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum] + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter - Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #11 - WholeStageCodegen - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #11 + WholeStageCodegen (31) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union - WholeStageCodegen - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + WholeStageCodegen (26) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter - Exchange [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] #12 - WholeStageCodegen - HashAggregate [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sales_amt,sales_cnt] + Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #12 + WholeStageCodegen (25) + HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] InputAdapter Union - WholeStageCodegen - Project [cr_return_amount,cr_return_quantity,cs_ext_sales_price,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id] + WholeStageCodegen (20) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk,i_brand_id,i_category_id,i_class_id,i_manufact_id] + Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] - Filter [cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_order_number,cs_quantity,cs_sold_date_sk] + Filter [cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price] InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 InputAdapter BroadcastExchange #13 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen (18) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter - ReusedExchange [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] [cr_item_sk,cr_order_number,cr_return_amount,cr_return_quantity] #6 - WholeStageCodegen - Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,sr_return_amt,sr_return_quantity,ss_ext_sales_price,ss_quantity] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_ticket_number] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] [ss_ext_sales_price,ss_item_sk,ss_quantity,ss_sold_date_sk,ss_ticket_number] + ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #6 + WholeStageCodegen (24) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price] InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #13 + ReusedExchange [d_date_sk,d_year] #13 InputAdapter - ReusedExchange [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_return_amt,sr_return_quantity,sr_ticket_number] #7 - WholeStageCodegen - Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,wr_return_amt,wr_return_quantity,ws_ext_sales_price,ws_quantity] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [d_year,i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_brand_id,i_category_id,i_class_id,i_manufact_id,ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_order_number,ws_quantity,ws_sold_date_sk] + ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7 + WholeStageCodegen (30) + Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price] InputAdapter - ReusedExchange [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] [i_brand_id,i_category_id,i_class_id,i_item_sk,i_manufact_id] #4 + ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4 InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #13 + ReusedExchange [d_date_sk,d_year] #13 InputAdapter - ReusedExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] [wr_item_sk,wr_order_number,wr_return_amt,wr_return_quantity] #8 + ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #8 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/explain.txt index 69e96dd02..917e2b028 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/explain.txt @@ -1,39 +1,209 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,col_name#2 ASC NULLS FIRST,d_year#3 ASC NULLS FIRST,d_qoy#4 ASC NULLS FIRST,i_category#5 ASC NULLS FIRST], output=[channel#1,col_name#2,d_year#3,d_qoy#4,i_category#5,sales_cnt#6,sales_amt#7]) -+- *(11) HashAggregate(keys=[channel#1, col_name#2, d_year#3, d_qoy#4, i_category#5], functions=[count(1), sum(UnscaledValue(ext_sales_price#8))]) - +- Exchange hashpartitioning(channel#1, col_name#2, d_year#3, d_qoy#4, i_category#5, 5) - +- *(10) HashAggregate(keys=[channel#1, col_name#2, d_year#3, d_qoy#4, i_category#5], functions=[partial_count(1), partial_sum(UnscaledValue(ext_sales_price#8))]) - +- Union - :- *(3) Project [store AS channel#1, ss_store_sk#9 AS col_name#2, d_year#3, d_qoy#4, i_category#5, ss_ext_sales_price#10 AS ext_sales_price#8] - : +- *(3) BroadcastHashJoin [ss_sold_date_sk#11], [d_date_sk#12], Inner, BuildRight - : :- *(3) Project [ss_sold_date_sk#11, ss_store_sk#9, ss_ext_sales_price#10, i_category#5] - : : +- *(3) BroadcastHashJoin [ss_item_sk#13], [i_item_sk#14], Inner, BuildRight - : : :- *(3) Project [ss_sold_date_sk#11, ss_item_sk#13, ss_store_sk#9, ss_ext_sales_price#10] - : : : +- *(3) Filter ((isnull(ss_store_sk#9) && isnotnull(ss_item_sk#13)) && isnotnull(ss_sold_date_sk#11)) - : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#11,ss_item_sk#13,ss_store_sk#9,ss_ext_sales_price#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [i_item_sk#14, i_category#5] - : : +- *(1) Filter isnotnull(i_item_sk#14) - : : +- *(1) FileScan parquet default.item[i_item_sk#14,i_category#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#12, d_year#3, d_qoy#4] - : +- *(2) Filter isnotnull(d_date_sk#12) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#12,d_year#3,d_qoy#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - :- *(6) Project [web AS channel#15, ws_ship_customer_sk#16 AS col_name#17, d_year#3, d_qoy#4, i_category#5, ws_ext_sales_price#18 AS ext_sales_price#19] - : +- *(6) BroadcastHashJoin [ws_sold_date_sk#20], [d_date_sk#12], Inner, BuildRight - : :- *(6) Project [ws_sold_date_sk#20, ws_ship_customer_sk#16, ws_ext_sales_price#18, i_category#5] - : : +- *(6) BroadcastHashJoin [ws_item_sk#21], [i_item_sk#14], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#20, ws_item_sk#21, ws_ship_customer_sk#16, ws_ext_sales_price#18] - : : : +- *(6) Filter ((isnull(ws_ship_customer_sk#16) && isnotnull(ws_item_sk#21)) && isnotnull(ws_sold_date_sk#20)) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#20,ws_item_sk#21,ws_ship_customer_sk#16,ws_ext_sales_price#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(9) Project [catalog AS channel#22, cs_ship_addr_sk#23 AS col_name#24, d_year#3, d_qoy#4, i_category#5, cs_ext_sales_price#25 AS ext_sales_price#26] - +- *(9) BroadcastHashJoin [cs_sold_date_sk#27], [d_date_sk#12], Inner, BuildRight - :- *(9) Project [cs_sold_date_sk#27, cs_ship_addr_sk#23, cs_ext_sales_price#25, i_category#5] - : +- *(9) BroadcastHashJoin [cs_item_sk#28], [i_item_sk#14], Inner, BuildRight - : :- *(9) Project [cs_sold_date_sk#27, cs_ship_addr_sk#23, cs_item_sk#28, cs_ext_sales_price#25] - : : +- *(9) Filter ((isnull(cs_ship_addr_sk#23) && isnotnull(cs_item_sk#28)) && isnotnull(cs_sold_date_sk#27)) - : : +- *(9) FileScan parquet default.catalog_sales[cs_sold_date_sk#27,cs_ship_addr_sk#23,cs_item_sk#28,cs_ext_sales_price#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [i_item_sk#14, i_category#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [d_date_sk#12, d_year#3, d_qoy#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (38) ++- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- Union (34) + :- * Project (15) + : +- * BroadcastHashJoin Inner BuildRight (14) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.item (4) + : +- BroadcastExchange (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.date_dim (10) + :- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (21) + : : +- * BroadcastHashJoin Inner BuildRight (20) + : : :- * Filter (18) + : : : +- * ColumnarToRow (17) + : : : +- Scan parquet default.web_sales (16) + : : +- ReusedExchange (19) + : +- ReusedExchange (22) + +- * Project (33) + +- * BroadcastHashJoin Inner BuildRight (32) + :- * Project (30) + : +- * BroadcastHashJoin Inner BuildRight (29) + : :- * Filter (27) + : : +- * ColumnarToRow (26) + : : +- Scan parquet default.catalog_sales (25) + : +- ReusedExchange (28) + +- ReusedExchange (31) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4] +Condition : ((isnull(ss_store_sk#3) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [2]: [i_item_sk#5, i_category#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#5, i_category#6] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#5, i_category#6] +Condition : isnotnull(i_item_sk#5) + +(7) BroadcastExchange +Input [2]: [i_item_sk#5, i_category#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(9) Project [codegen id : 3] +Output [4]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#4, i_category#6] +Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_ext_sales_price#4, i_item_sk#5, i_category#6] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Condition : isnotnull(d_date_sk#8) + +(13) BroadcastExchange +Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(15) Project [codegen id : 3] +Output [6]: [store AS channel#12, ss_store_sk#3 AS col_name#13, d_year#9, d_qoy#10, i_category#6, ss_ext_sales_price#4 AS ext_sales_price#14] +Input [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#4, i_category#6, d_date_sk#8, d_year#9, d_qoy#10] + +(16) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 6] +Input [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] + +(18) Filter [codegen id : 6] +Input [4]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18] +Condition : ((isnull(ws_ship_customer_sk#17) AND isnotnull(ws_item_sk#16)) AND isnotnull(ws_sold_date_sk#15)) + +(19) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#5, i_category#6] + +(20) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#16] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(21) Project [codegen id : 6] +Output [4]: [ws_sold_date_sk#15, ws_ship_customer_sk#17, ws_ext_sales_price#18, i_category#6] +Input [6]: [ws_sold_date_sk#15, ws_item_sk#16, ws_ship_customer_sk#17, ws_ext_sales_price#18, i_item_sk#5, i_category#6] + +(22) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(23) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#15] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(24) Project [codegen id : 6] +Output [6]: [web AS channel#19, ws_ship_customer_sk#17 AS col_name#20, d_year#9, d_qoy#10, i_category#6, ws_ext_sales_price#18 AS ext_sales_price#21] +Input [7]: [ws_sold_date_sk#15, ws_ship_customer_sk#17, ws_ext_sales_price#18, i_category#6, d_date_sk#8, d_year#9, d_qoy#10] + +(25) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(26) ColumnarToRow [codegen id : 9] +Input [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25] + +(27) Filter [codegen id : 9] +Input [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25] +Condition : ((isnull(cs_ship_addr_sk#23) AND isnotnull(cs_item_sk#24)) AND isnotnull(cs_sold_date_sk#22)) + +(28) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#5, i_category#6] + +(29) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_item_sk#24] +Right keys [1]: [i_item_sk#5] +Join condition: None + +(30) Project [codegen id : 9] +Output [4]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_ext_sales_price#25, i_category#6] +Input [6]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_item_sk#24, cs_ext_sales_price#25, i_item_sk#5, i_category#6] + +(31) ReusedExchange [Reuses operator id: 13] +Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cs_sold_date_sk#22] +Right keys [1]: [d_date_sk#8] +Join condition: None + +(33) Project [codegen id : 9] +Output [6]: [catalog AS channel#26, cs_ship_addr_sk#23 AS col_name#27, d_year#9, d_qoy#10, i_category#6, cs_ext_sales_price#25 AS ext_sales_price#28] +Input [7]: [cs_sold_date_sk#22, cs_ship_addr_sk#23, cs_ext_sales_price#25, i_category#6, d_date_sk#8, d_year#9, d_qoy#10] + +(34) Union + +(35) HashAggregate [codegen id : 10] +Input [6]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, ext_sales_price#14] +Keys [5]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6] +Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#14))] +Aggregate Attributes [2]: [count#29, sum#30] +Results [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#31, sum#32] + +(36) Exchange +Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#31, sum#32] +Arguments: hashpartitioning(channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, 5), true, [id=#33] + +(37) HashAggregate [codegen id : 11] +Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#31, sum#32] +Keys [5]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6] +Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#14))] +Aggregate Attributes [2]: [count(1)#34, sum(UnscaledValue(ext_sales_price#14))#35] +Results [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count(1)#34 AS sales_cnt#36, MakeDecimal(sum(UnscaledValue(ext_sales_price#14))#35,17,2) AS sales_amt#37] + +(38) TakeOrderedAndProject +Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, sales_cnt#36, sales_amt#37] +Arguments: 100, [channel#12 ASC NULLS FIRST, col_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, i_category#6 ASC NULLS FIRST], [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, sales_cnt#36, sales_amt#37] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt index 5b7872ad7..f01916baa 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q76/simplified.txt @@ -1,53 +1,58 @@ -TakeOrderedAndProject [channel,col_name,d_qoy,d_year,i_category,sales_amt,sales_cnt] - WholeStageCodegen - HashAggregate [channel,col_name,count,count(1),d_qoy,d_year,i_category,sum,sum(UnscaledValue(ext_sales_price))] [count,count(1),sales_amt,sales_cnt,sum,sum(UnscaledValue(ext_sales_price))] +TakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_amt] + WholeStageCodegen (11) + HashAggregate [channel,col_name,d_year,d_qoy,i_category,count,sum] [count(1),sum(UnscaledValue(ext_sales_price)),sales_cnt,sales_amt,count,sum] InputAdapter - Exchange [channel,col_name,d_qoy,d_year,i_category] #1 - WholeStageCodegen - HashAggregate [channel,col_name,count,count,d_qoy,d_year,ext_sales_price,i_category,sum,sum] [count,count,sum,sum] + Exchange [channel,col_name,d_year,d_qoy,i_category] #1 + WholeStageCodegen (10) + HashAggregate [channel,col_name,d_year,d_qoy,i_category,ext_sales_price] [count,sum,count,sum] InputAdapter Union - WholeStageCodegen - Project [d_qoy,d_year,i_category,ss_ext_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_category,ss_ext_sales_price,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (3) + Project [ss_store_sk,d_year,d_qoy,i_category,ss_ext_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_store_sk,ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [i_category,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_category,i_item_sk] [i_category,i_item_sk] + WholeStageCodegen (1) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_category] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_qoy,d_year] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] - WholeStageCodegen - Project [d_qoy,d_year,i_category,ws_ext_sales_price,ws_ship_customer_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [i_category,ws_ext_sales_price,ws_ship_customer_sk,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] [ws_ext_sales_price,ws_item_sk,ws_ship_customer_sk,ws_sold_date_sk] + WholeStageCodegen (2) + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] + WholeStageCodegen (6) + Project [ws_ship_customer_sk,d_year,d_qoy,i_category,ws_ext_sales_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_ship_customer_sk,ws_ext_sales_price,i_category] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_ship_customer_sk,ws_item_sk,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price] InputAdapter - ReusedExchange [i_category,i_item_sk] [i_category,i_item_sk] #2 + ReusedExchange [i_item_sk,i_category] #2 InputAdapter - ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 - WholeStageCodegen - Project [cs_ext_sales_price,cs_ship_addr_sk,d_qoy,d_year,i_category] + ReusedExchange [d_date_sk,d_year,d_qoy] #3 + WholeStageCodegen (9) + Project [cs_ship_addr_sk,d_year,d_qoy,i_category,cs_ext_sales_price] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_ext_sales_price,cs_ship_addr_sk,cs_sold_date_sk,i_category] + Project [cs_sold_date_sk,cs_ship_addr_sk,cs_ext_sales_price,i_category] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] - Filter [cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] [cs_ext_sales_price,cs_item_sk,cs_ship_addr_sk,cs_sold_date_sk] + Filter [cs_ship_addr_sk,cs_item_sk,cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price] InputAdapter - ReusedExchange [i_category,i_item_sk] [i_category,i_item_sk] #2 + ReusedExchange [i_item_sk,i_category] #2 InputAdapter - ReusedExchange [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] #3 + ReusedExchange [d_date_sk,d_year,d_qoy] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt index 1fa673f3a..c232055ba 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/explain.txt @@ -1,100 +1,520 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) -+- *(25) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) - +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) - +- *(24) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) - +- *(24) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] - +- Union - :- *(8) Project [sales#7, coalesce(returns#12, 0.00) AS returns#8, CheckOverflow((promote_precision(cast(profit#13 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#9, store channel AS channel#10, s_store_sk#15 AS id#11] - : +- *(8) BroadcastHashJoin [s_store_sk#15], [s_store_sk#16], LeftOuter, BuildRight - : :- *(8) HashAggregate(keys=[s_store_sk#15], functions=[sum(UnscaledValue(ss_ext_sales_price#17)), sum(UnscaledValue(ss_net_profit#18))]) - : : +- Exchange hashpartitioning(s_store_sk#15, 5) - : : +- *(3) HashAggregate(keys=[s_store_sk#15], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#17)), partial_sum(UnscaledValue(ss_net_profit#18))]) - : : +- *(3) Project [ss_ext_sales_price#17, ss_net_profit#18, s_store_sk#15] - : : +- *(3) BroadcastHashJoin [ss_store_sk#19], [s_store_sk#15], Inner, BuildRight - : : :- *(3) Project [ss_store_sk#19, ss_ext_sales_price#17, ss_net_profit#18] - : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#20], [d_date_sk#21], Inner, BuildRight - : : : :- *(3) Project [ss_sold_date_sk#20, ss_store_sk#19, ss_ext_sales_price#17, ss_net_profit#18] - : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#20) && isnotnull(ss_store_sk#19)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#20,ss_store_sk#19,ss_ext_sales_price#17,ss_net_profit#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [s_store_sk#15] - : : +- *(2) Filter isnotnull(s_store_sk#15) - : : +- *(2) FileScan parquet default.store[s_store_sk#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) HashAggregate(keys=[s_store_sk#16], functions=[sum(UnscaledValue(sr_return_amt#23)), sum(UnscaledValue(sr_net_loss#24))]) - : +- Exchange hashpartitioning(s_store_sk#16, 5) - : +- *(6) HashAggregate(keys=[s_store_sk#16], functions=[partial_sum(UnscaledValue(sr_return_amt#23)), partial_sum(UnscaledValue(sr_net_loss#24))]) - : +- *(6) Project [sr_return_amt#23, sr_net_loss#24, s_store_sk#16] - : +- *(6) BroadcastHashJoin [sr_store_sk#25], [cast(s_store_sk#16 as bigint)], Inner, BuildRight - : :- *(6) Project [sr_store_sk#25, sr_return_amt#23, sr_net_loss#24] - : : +- *(6) BroadcastHashJoin [sr_returned_date_sk#26], [cast(d_date_sk#21 as bigint)], Inner, BuildRight - : : :- *(6) Project [sr_returned_date_sk#26, sr_store_sk#25, sr_return_amt#23, sr_net_loss#24] - : : : +- *(6) Filter (isnotnull(sr_returned_date_sk#26) && isnotnull(sr_store_sk#25)) - : : : +- *(6) FileScan parquet default.store_returns[sr_returned_date_sk#26,sr_store_sk#25,sr_return_amt#23,sr_net_loss#24] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)], ReadSchema: struct= 11172)) && (d_date#22 <= 11202)) && isnotnull(d_date_sk#21)) - : : +- *(4) FileScan parquet default.date_dim[d_date_sk#21,d_date#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), Is..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(5) Project [s_store_sk#16] - : +- *(5) Filter isnotnull(s_store_sk#16) - : +- *(5) FileScan parquet default.store[s_store_sk#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - :- *(15) Project [sales#27, returns#28, CheckOverflow((promote_precision(cast(profit#29 as decimal(18,2))) - promote_precision(cast(profit_loss#30 as decimal(18,2)))), DecimalType(18,2)) AS profit#31, catalog channel AS channel#32, cs_call_center_sk#33 AS id#34] - : +- BroadcastNestedLoopJoin BuildLeft, Inner - : :- BroadcastExchange IdentityBroadcastMode - : : +- *(11) HashAggregate(keys=[cs_call_center_sk#33], functions=[sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))]) - : : +- Exchange hashpartitioning(cs_call_center_sk#33, 5) - : : +- *(10) HashAggregate(keys=[cs_call_center_sk#33], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))]) - : : +- *(10) Project [cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] - : : +- *(10) BroadcastHashJoin [cs_sold_date_sk#37], [d_date_sk#21], Inner, BuildRight - : : :- *(10) Project [cs_sold_date_sk#37, cs_call_center_sk#33, cs_ext_sales_price#35, cs_net_profit#36] - : : : +- *(10) Filter isnotnull(cs_sold_date_sk#37) - : : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#37,cs_call_center_sk#33,cs_ext_sales_price#35,cs_net_profit#36] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#21], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(23) Project [sales#41, coalesce(returns#42, 0.00) AS returns#43, CheckOverflow((promote_precision(cast(profit#44 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#45, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#46, web channel AS channel#47, wp_web_page_sk#48 AS id#49] - +- *(23) BroadcastHashJoin [wp_web_page_sk#48], [wp_web_page_sk#50], LeftOuter, BuildRight - :- *(23) HashAggregate(keys=[wp_web_page_sk#48], functions=[sum(UnscaledValue(ws_ext_sales_price#51)), sum(UnscaledValue(ws_net_profit#52))]) - : +- Exchange hashpartitioning(wp_web_page_sk#48, 5) - : +- *(18) HashAggregate(keys=[wp_web_page_sk#48], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#51)), partial_sum(UnscaledValue(ws_net_profit#52))]) - : +- *(18) Project [ws_ext_sales_price#51, ws_net_profit#52, wp_web_page_sk#48] - : +- *(18) BroadcastHashJoin [ws_web_page_sk#53], [wp_web_page_sk#48], Inner, BuildRight - : :- *(18) Project [ws_web_page_sk#53, ws_ext_sales_price#51, ws_net_profit#52] - : : +- *(18) BroadcastHashJoin [ws_sold_date_sk#54], [d_date_sk#21], Inner, BuildRight - : : :- *(18) Project [ws_sold_date_sk#54, ws_web_page_sk#53, ws_ext_sales_price#51, ws_net_profit#52] - : : : +- *(18) Filter (isnotnull(ws_sold_date_sk#54) && isnotnull(ws_web_page_sk#53)) - : : : +- *(18) FileScan parquet default.web_sales[ws_sold_date_sk#54,ws_web_page_sk#53,ws_ext_sales_price#51,ws_net_profit#52] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(22) HashAggregate(keys=[wp_web_page_sk#50], functions=[sum(UnscaledValue(wr_return_amt#55)), sum(UnscaledValue(wr_net_loss#56))]) - +- Exchange hashpartitioning(wp_web_page_sk#50, 5) - +- *(21) HashAggregate(keys=[wp_web_page_sk#50], functions=[partial_sum(UnscaledValue(wr_return_amt#55)), partial_sum(UnscaledValue(wr_net_loss#56))]) - +- *(21) Project [wr_return_amt#55, wr_net_loss#56, wp_web_page_sk#50] - +- *(21) BroadcastHashJoin [wr_web_page_sk#57], [cast(wp_web_page_sk#50 as bigint)], Inner, BuildRight - :- *(21) Project [wr_web_page_sk#57, wr_return_amt#55, wr_net_loss#56] - : +- *(21) BroadcastHashJoin [wr_returned_date_sk#58], [cast(d_date_sk#21 as bigint)], Inner, BuildRight - : :- *(21) Project [wr_returned_date_sk#58, wr_web_page_sk#57, wr_return_amt#55, wr_net_loss#56] - : : +- *(21) Filter (isnotnull(wr_returned_date_sk#58) && isnotnull(wr_web_page_sk#57)) - : : +- *(21) FileScan parquet default.web_returns[wr_returned_date_sk#58,wr_web_page_sk#57,wr_return_amt#55,wr_net_loss#56] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (91) ++- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- * Expand (87) + +- Union (86) + :- * Project (34) + : +- * BroadcastHashJoin LeftOuter BuildRight (33) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (32) + : +- * HashAggregate (31) + : +- Exchange (30) + : +- * HashAggregate (29) + : +- * Project (28) + : +- * BroadcastHashJoin Inner BuildRight (27) + : :- * Project (25) + : : +- * BroadcastHashJoin Inner BuildRight (24) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet default.store_returns (20) + : : +- ReusedExchange (23) + : +- ReusedExchange (26) + :- * Project (55) + : +- BroadcastNestedLoopJoin Inner BuildLeft (54) + : :- BroadcastExchange (44) + : : +- * HashAggregate (43) + : : +- Exchange (42) + : : +- * HashAggregate (41) + : : +- * Project (40) + : : +- * BroadcastHashJoin Inner BuildRight (39) + : : :- * Filter (37) + : : : +- * ColumnarToRow (36) + : : : +- Scan parquet default.catalog_sales (35) + : : +- ReusedExchange (38) + : +- * HashAggregate (53) + : +- Exchange (52) + : +- * HashAggregate (51) + : +- * Project (50) + : +- * BroadcastHashJoin Inner BuildRight (49) + : :- * Filter (47) + : : +- * ColumnarToRow (46) + : : +- Scan parquet default.catalog_returns (45) + : +- ReusedExchange (48) + +- * Project (85) + +- * BroadcastHashJoin LeftOuter BuildRight (84) + :- * HashAggregate (70) + : +- Exchange (69) + : +- * HashAggregate (68) + : +- * Project (67) + : +- * BroadcastHashJoin Inner BuildRight (66) + : :- * Project (61) + : : +- * BroadcastHashJoin Inner BuildRight (60) + : : :- * Filter (58) + : : : +- * ColumnarToRow (57) + : : : +- Scan parquet default.web_sales (56) + : : +- ReusedExchange (59) + : +- BroadcastExchange (65) + : +- * Filter (64) + : +- * ColumnarToRow (63) + : +- Scan parquet default.web_page (62) + +- BroadcastExchange (83) + +- * HashAggregate (82) + +- Exchange (81) + +- * HashAggregate (80) + +- * Project (79) + +- * BroadcastHashJoin Inner BuildRight (78) + :- * Project (76) + : +- * BroadcastHashJoin Inner BuildRight (75) + : :- * Filter (73) + : : +- * ColumnarToRow (72) + : : +- Scan parquet default.web_returns (71) + : +- ReusedExchange (74) + +- ReusedExchange (77) + + +(1) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] + +(3) Filter [codegen id : 3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_date#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_date#6] +Condition : (((isnotnull(d_date#6) AND (d_date#6 >= 11172)) AND (d_date#6 <= 11202)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_date#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4] +Input [5]: [ss_sold_date_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, d_date_sk#5] + +(11) Scan parquet default.store +Output [1]: [s_store_sk#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [s_store_sk#8] + +(13) Filter [codegen id : 2] +Input [1]: [s_store_sk#8] +Condition : isnotnull(s_store_sk#8) + +(14) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] +Input [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] + +(17) HashAggregate [codegen id : 3] +Input [3]: [ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] +Keys [1]: [s_store_sk#8] +Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#3)), partial_sum(UnscaledValue(ss_net_profit#4))] +Aggregate Attributes [2]: [sum#10, sum#11] +Results [3]: [s_store_sk#8, sum#12, sum#13] + +(18) Exchange +Input [3]: [s_store_sk#8, sum#12, sum#13] +Arguments: hashpartitioning(s_store_sk#8, 5), true, [id=#14] + +(19) HashAggregate [codegen id : 8] +Input [3]: [s_store_sk#8, sum#12, sum#13] +Keys [1]: [s_store_sk#8] +Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#3)), sum(UnscaledValue(ss_net_profit#4))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#3))#15, sum(UnscaledValue(ss_net_profit#4))#16] +Results [3]: [s_store_sk#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS sales#17, MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#16,17,2) AS profit#18] + +(20) Scan parquet default.store_returns +Output [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_returned_date_sk), IsNotNull(sr_store_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] + +(22) Filter [codegen id : 6] +Input [4]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Condition : (isnotnull(sr_returned_date_sk#19) AND isnotnull(sr_store_sk#20)) + +(23) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(24) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(25) Project [codegen id : 6] +Output [3]: [sr_store_sk#20, sr_return_amt#21, sr_net_loss#22] +Input [5]: [sr_returned_date_sk#19, sr_store_sk#20, sr_return_amt#21, sr_net_loss#22, d_date_sk#5] + +(26) ReusedExchange [Reuses operator id: 14] +Output [1]: [s_store_sk#23] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [sr_store_sk#20] +Right keys [1]: [cast(s_store_sk#23 as bigint)] +Join condition: None + +(28) Project [codegen id : 6] +Output [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Input [4]: [sr_store_sk#20, sr_return_amt#21, sr_net_loss#22, s_store_sk#23] + +(29) HashAggregate [codegen id : 6] +Input [3]: [sr_return_amt#21, sr_net_loss#22, s_store_sk#23] +Keys [1]: [s_store_sk#23] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#21)), partial_sum(UnscaledValue(sr_net_loss#22))] +Aggregate Attributes [2]: [sum#24, sum#25] +Results [3]: [s_store_sk#23, sum#26, sum#27] + +(30) Exchange +Input [3]: [s_store_sk#23, sum#26, sum#27] +Arguments: hashpartitioning(s_store_sk#23, 5), true, [id=#28] + +(31) HashAggregate [codegen id : 7] +Input [3]: [s_store_sk#23, sum#26, sum#27] +Keys [1]: [s_store_sk#23] +Functions [2]: [sum(UnscaledValue(sr_return_amt#21)), sum(UnscaledValue(sr_net_loss#22))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#21))#29, sum(UnscaledValue(sr_net_loss#22))#30] +Results [3]: [s_store_sk#23, MakeDecimal(sum(UnscaledValue(sr_return_amt#21))#29,17,2) AS returns#31, MakeDecimal(sum(UnscaledValue(sr_net_loss#22))#30,17,2) AS profit_loss#32] + +(32) BroadcastExchange +Input [3]: [s_store_sk#23, returns#31, profit_loss#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [s_store_sk#8] +Right keys [1]: [s_store_sk#23] +Join condition: None + +(34) Project [codegen id : 8] +Output [5]: [sales#17, coalesce(returns#31, 0.00) AS returns#34, CheckOverflow((promote_precision(cast(profit#18 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#35, store channel AS channel#36, s_store_sk#8 AS id#37] +Input [6]: [s_store_sk#8, sales#17, profit#18, s_store_sk#23, returns#31, profit_loss#32] + +(35) Scan parquet default.catalog_sales +Output [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 10] +Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] + +(37) Filter [codegen id : 10] +Input [4]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Condition : isnotnull(cs_sold_date_sk#38) + +(38) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(39) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#38] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(40) Project [codegen id : 10] +Output [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Input [5]: [cs_sold_date_sk#38, cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41, d_date_sk#5] + +(41) HashAggregate [codegen id : 10] +Input [3]: [cs_call_center_sk#39, cs_ext_sales_price#40, cs_net_profit#41] +Keys [1]: [cs_call_center_sk#39] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#40)), partial_sum(UnscaledValue(cs_net_profit#41))] +Aggregate Attributes [2]: [sum#42, sum#43] +Results [3]: [cs_call_center_sk#39, sum#44, sum#45] + +(42) Exchange +Input [3]: [cs_call_center_sk#39, sum#44, sum#45] +Arguments: hashpartitioning(cs_call_center_sk#39, 5), true, [id=#46] + +(43) HashAggregate [codegen id : 11] +Input [3]: [cs_call_center_sk#39, sum#44, sum#45] +Keys [1]: [cs_call_center_sk#39] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#40)), sum(UnscaledValue(cs_net_profit#41))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#40))#47, sum(UnscaledValue(cs_net_profit#41))#48] +Results [3]: [cs_call_center_sk#39, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#40))#47,17,2) AS sales#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#41))#48,17,2) AS profit#50] + +(44) BroadcastExchange +Input [3]: [cs_call_center_sk#39, sales#49, profit#50] +Arguments: IdentityBroadcastMode, [id=#51] + +(45) Scan parquet default.catalog_returns +Output [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk)] +ReadSchema: struct + +(46) ColumnarToRow [codegen id : 13] +Input [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] + +(47) Filter [codegen id : 13] +Input [3]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54] +Condition : isnotnull(cr_returned_date_sk#52) + +(48) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(49) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cr_returned_date_sk#52] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(50) Project [codegen id : 13] +Output [2]: [cr_return_amount#53, cr_net_loss#54] +Input [4]: [cr_returned_date_sk#52, cr_return_amount#53, cr_net_loss#54, d_date_sk#5] + +(51) HashAggregate [codegen id : 13] +Input [2]: [cr_return_amount#53, cr_net_loss#54] +Keys: [] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#53)), partial_sum(UnscaledValue(cr_net_loss#54))] +Aggregate Attributes [2]: [sum#55, sum#56] +Results [2]: [sum#57, sum#58] + +(52) Exchange +Input [2]: [sum#57, sum#58] +Arguments: SinglePartition, true, [id=#59] + +(53) HashAggregate [codegen id : 14] +Input [2]: [sum#57, sum#58] +Keys: [] +Functions [2]: [sum(UnscaledValue(cr_return_amount#53)), sum(UnscaledValue(cr_net_loss#54))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#53))#60, sum(UnscaledValue(cr_net_loss#54))#61] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#53))#60,17,2) AS returns#62, MakeDecimal(sum(UnscaledValue(cr_net_loss#54))#61,17,2) AS profit_loss#63] + +(54) BroadcastNestedLoopJoin +Join condition: None + +(55) Project [codegen id : 15] +Output [5]: [sales#49, returns#62, CheckOverflow((promote_precision(cast(profit#50 as decimal(18,2))) - promote_precision(cast(profit_loss#63 as decimal(18,2)))), DecimalType(18,2), true) AS profit#64, catalog channel AS channel#65, cs_call_center_sk#39 AS id#66] +Input [5]: [cs_call_center_sk#39, sales#49, profit#50, returns#62, profit_loss#63] + +(56) Scan parquet default.web_sales +Output [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(57) ColumnarToRow [codegen id : 18] +Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] + +(58) Filter [codegen id : 18] +Input [4]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Condition : (isnotnull(ws_sold_date_sk#67) AND isnotnull(ws_web_page_sk#68)) + +(59) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(60) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#67] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(61) Project [codegen id : 18] +Output [3]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70] +Input [5]: [ws_sold_date_sk#67, ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, d_date_sk#5] + +(62) Scan parquet default.web_page +Output [1]: [wp_web_page_sk#71] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(63) ColumnarToRow [codegen id : 17] +Input [1]: [wp_web_page_sk#71] + +(64) Filter [codegen id : 17] +Input [1]: [wp_web_page_sk#71] +Condition : isnotnull(wp_web_page_sk#71) + +(65) BroadcastExchange +Input [1]: [wp_web_page_sk#71] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#72] + +(66) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_web_page_sk#68] +Right keys [1]: [wp_web_page_sk#71] +Join condition: None + +(67) Project [codegen id : 18] +Output [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] +Input [4]: [ws_web_page_sk#68, ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] + +(68) HashAggregate [codegen id : 18] +Input [3]: [ws_ext_sales_price#69, ws_net_profit#70, wp_web_page_sk#71] +Keys [1]: [wp_web_page_sk#71] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#69)), partial_sum(UnscaledValue(ws_net_profit#70))] +Aggregate Attributes [2]: [sum#73, sum#74] +Results [3]: [wp_web_page_sk#71, sum#75, sum#76] + +(69) Exchange +Input [3]: [wp_web_page_sk#71, sum#75, sum#76] +Arguments: hashpartitioning(wp_web_page_sk#71, 5), true, [id=#77] + +(70) HashAggregate [codegen id : 23] +Input [3]: [wp_web_page_sk#71, sum#75, sum#76] +Keys [1]: [wp_web_page_sk#71] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#69)), sum(UnscaledValue(ws_net_profit#70))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#69))#78, sum(UnscaledValue(ws_net_profit#70))#79] +Results [3]: [wp_web_page_sk#71, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#69))#78,17,2) AS sales#80, MakeDecimal(sum(UnscaledValue(ws_net_profit#70))#79,17,2) AS profit#81] + +(71) Scan parquet default.web_returns +Output [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_returned_date_sk), IsNotNull(wr_web_page_sk)] +ReadSchema: struct + +(72) ColumnarToRow [codegen id : 21] +Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] + +(73) Filter [codegen id : 21] +Input [4]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Condition : (isnotnull(wr_returned_date_sk#82) AND isnotnull(wr_web_page_sk#83)) + +(74) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(75) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wr_returned_date_sk#82] +Right keys [1]: [cast(d_date_sk#5 as bigint)] +Join condition: None + +(76) Project [codegen id : 21] +Output [3]: [wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85] +Input [5]: [wr_returned_date_sk#82, wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85, d_date_sk#5] + +(77) ReusedExchange [Reuses operator id: 65] +Output [1]: [wp_web_page_sk#86] + +(78) BroadcastHashJoin [codegen id : 21] +Left keys [1]: [wr_web_page_sk#83] +Right keys [1]: [cast(wp_web_page_sk#86 as bigint)] +Join condition: None + +(79) Project [codegen id : 21] +Output [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Input [4]: [wr_web_page_sk#83, wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] + +(80) HashAggregate [codegen id : 21] +Input [3]: [wr_return_amt#84, wr_net_loss#85, wp_web_page_sk#86] +Keys [1]: [wp_web_page_sk#86] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#84)), partial_sum(UnscaledValue(wr_net_loss#85))] +Aggregate Attributes [2]: [sum#87, sum#88] +Results [3]: [wp_web_page_sk#86, sum#89, sum#90] + +(81) Exchange +Input [3]: [wp_web_page_sk#86, sum#89, sum#90] +Arguments: hashpartitioning(wp_web_page_sk#86, 5), true, [id=#91] + +(82) HashAggregate [codegen id : 22] +Input [3]: [wp_web_page_sk#86, sum#89, sum#90] +Keys [1]: [wp_web_page_sk#86] +Functions [2]: [sum(UnscaledValue(wr_return_amt#84)), sum(UnscaledValue(wr_net_loss#85))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#84))#92, sum(UnscaledValue(wr_net_loss#85))#93] +Results [3]: [wp_web_page_sk#86, MakeDecimal(sum(UnscaledValue(wr_return_amt#84))#92,17,2) AS returns#94, MakeDecimal(sum(UnscaledValue(wr_net_loss#85))#93,17,2) AS profit_loss#95] + +(83) BroadcastExchange +Input [3]: [wp_web_page_sk#86, returns#94, profit_loss#95] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#96] + +(84) BroadcastHashJoin [codegen id : 23] +Left keys [1]: [wp_web_page_sk#71] +Right keys [1]: [wp_web_page_sk#86] +Join condition: None + +(85) Project [codegen id : 23] +Output [5]: [sales#80, coalesce(returns#94, 0.00) AS returns#97, CheckOverflow((promote_precision(cast(profit#81 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#95, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS profit#98, web channel AS channel#99, wp_web_page_sk#71 AS id#100] +Input [6]: [wp_web_page_sk#71, sales#80, profit#81, wp_web_page_sk#86, returns#94, profit_loss#95] + +(86) Union + +(87) Expand [codegen id : 24] +Input [5]: [sales#17, returns#34, profit#35, channel#36, id#37] +Arguments: [List(sales#17, returns#34, profit#35, channel#36, id#37, 0), List(sales#17, returns#34, profit#35, channel#36, null, 1), List(sales#17, returns#34, profit#35, null, null, 3)], [sales#17, returns#34, profit#35, channel#101, id#102, spark_grouping_id#103] + +(88) HashAggregate [codegen id : 24] +Input [6]: [sales#17, returns#34, profit#35, channel#101, id#102, spark_grouping_id#103] +Keys [3]: [channel#101, id#102, spark_grouping_id#103] +Functions [3]: [partial_sum(sales#17), partial_sum(returns#34), partial_sum(profit#35)] +Aggregate Attributes [6]: [sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109] +Results [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] + +(89) Exchange +Input [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] +Arguments: hashpartitioning(channel#101, id#102, spark_grouping_id#103, 5), true, [id=#116] + +(90) HashAggregate [codegen id : 25] +Input [9]: [channel#101, id#102, spark_grouping_id#103, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115] +Keys [3]: [channel#101, id#102, spark_grouping_id#103] +Functions [3]: [sum(sales#17), sum(returns#34), sum(profit#35)] +Aggregate Attributes [3]: [sum(sales#17)#117, sum(returns#34)#118, sum(profit#35)#119] +Results [5]: [channel#101, id#102, sum(sales#17)#117 AS sales#120, sum(returns#34)#118 AS returns#121, sum(profit#35)#119 AS profit#122] + +(91) TakeOrderedAndProject +Input [5]: [channel#101, id#102, sales#120, returns#121, profit#122] +Arguments: 100, [channel#101 ASC NULLS FIRST, id#102 ASC NULLS FIRST], [channel#101, id#102, sales#120, returns#121, profit#122] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt index ef07e38e8..bfbeff02b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q77/simplified.txt @@ -1,141 +1,139 @@ -TakeOrderedAndProject [channel,id,profit,returns,sales] - WholeStageCodegen - HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (25) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter Exchange [channel,id,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Expand [channel,id,profit,returns,sales] + WholeStageCodegen (24) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] InputAdapter Union - WholeStageCodegen - Project [profit,profit_loss,returns,s_store_sk,sales] + WholeStageCodegen (8) + Project [sales,returns,profit,profit_loss,s_store_sk] BroadcastHashJoin [s_store_sk,s_store_sk] - HashAggregate [s_store_sk,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] [profit,sales,sum,sum,sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit))] + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum] InputAdapter Exchange [s_store_sk] #2 - WholeStageCodegen - HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [s_store_sk,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_ext_sales_price,ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (3) + HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum] + Project [ss_ext_sales_price,ss_net_profit,s_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_ext_sales_price,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] + WholeStageCodegen (2) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk] InputAdapter BroadcastExchange #5 - WholeStageCodegen - HashAggregate [s_store_sk,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] [profit_loss,returns,sum,sum,sum(UnscaledValue(sr_net_loss)),sum(UnscaledValue(sr_return_amt))] + WholeStageCodegen (7) + HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum] InputAdapter Exchange [s_store_sk] #6 - WholeStageCodegen - HashAggregate [s_store_sk,sr_net_loss,sr_return_amt,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [s_store_sk,sr_net_loss,sr_return_amt] - BroadcastHashJoin [s_store_sk,sr_store_sk] - Project [sr_net_loss,sr_return_amt,sr_store_sk] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] - Filter [sr_returned_date_sk,sr_store_sk] - Scan parquet default.store_returns [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] [sr_net_loss,sr_return_amt,sr_returned_date_sk,sr_store_sk] + WholeStageCodegen (6) + HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum] + Project [sr_return_amt,sr_net_loss,s_store_sk] + BroadcastHashJoin [sr_store_sk,s_store_sk] + Project [sr_store_sk,sr_return_amt,sr_net_loss] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Filter [sr_returned_date_sk,sr_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss] InputAdapter - BroadcastExchange #7 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ReusedExchange [d_date_sk] #3 InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_sk] [s_store_sk] - WholeStageCodegen - Project [cs_call_center_sk,profit,profit_loss,returns,sales] + ReusedExchange [s_store_sk] #4 + WholeStageCodegen (15) + Project [sales,returns,profit,profit_loss,cs_call_center_sk] InputAdapter BroadcastNestedLoopJoin - BroadcastExchange #9 - WholeStageCodegen - HashAggregate [cs_call_center_sk,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] [profit,sales,sum,sum,sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit))] + BroadcastExchange #7 + WholeStageCodegen (11) + HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum] InputAdapter - Exchange [cs_call_center_sk] #10 - WholeStageCodegen - HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,sum,sum,sum,sum] [sum,sum,sum,sum] + Exchange [cs_call_center_sk] #8 + WholeStageCodegen (10) + HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum] Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] [profit_loss,returns,sum,sum,sum(UnscaledValue(cr_net_loss)),sum(UnscaledValue(cr_return_amount))] + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (14) + HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum] InputAdapter - Exchange #11 - WholeStageCodegen - HashAggregate [cr_net_loss,cr_return_amount,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [cr_net_loss,cr_return_amount] + Exchange #9 + WholeStageCodegen (13) + HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum] + Project [cr_return_amount,cr_net_loss] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_net_loss,cr_return_amount,cr_returned_date_sk] - Filter [cr_returned_date_sk] - Scan parquet default.catalog_returns [cr_net_loss,cr_return_amount,cr_returned_date_sk] [cr_net_loss,cr_return_amount,cr_returned_date_sk] + Filter [cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 - WholeStageCodegen - Project [profit,profit_loss,returns,sales,wp_web_page_sk] + ReusedExchange [d_date_sk] #3 + WholeStageCodegen (23) + Project [sales,returns,profit,profit_loss,wp_web_page_sk] BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk] - HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),wp_web_page_sk] [profit,sales,sum,sum,sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit))] + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum] InputAdapter - Exchange [wp_web_page_sk] #12 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] - Project [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] - BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] - Project [ws_ext_sales_price,ws_net_profit,ws_web_page_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] - Filter [ws_sold_date_sk,ws_web_page_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] [ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,ws_web_page_sk] + Exchange [wp_web_page_sk] #10 + WholeStageCodegen (18) + HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum] + Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 InputAdapter - BroadcastExchange #13 - WholeStageCodegen - Project [wp_web_page_sk] - Filter [wp_web_page_sk] - Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] + BroadcastExchange #11 + WholeStageCodegen (17) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_web_page_sk] InputAdapter - BroadcastExchange #14 - WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt)),wp_web_page_sk] [profit_loss,returns,sum,sum,sum(UnscaledValue(wr_net_loss)),sum(UnscaledValue(wr_return_amt))] + BroadcastExchange #12 + WholeStageCodegen (22) + HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum] InputAdapter - Exchange [wp_web_page_sk] #15 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,wp_web_page_sk,wr_net_loss,wr_return_amt] [sum,sum,sum,sum] - Project [wp_web_page_sk,wr_net_loss,wr_return_amt] - BroadcastHashJoin [wp_web_page_sk,wr_web_page_sk] - Project [wr_net_loss,wr_return_amt,wr_web_page_sk] - BroadcastHashJoin [d_date_sk,wr_returned_date_sk] - Project [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] - Filter [wr_returned_date_sk,wr_web_page_sk] - Scan parquet default.web_returns [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] [wr_net_loss,wr_return_amt,wr_returned_date_sk,wr_web_page_sk] + Exchange [wp_web_page_sk] #13 + WholeStageCodegen (21) + HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum] + Project [wr_return_amt,wr_net_loss,wp_web_page_sk] + BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk] + Project [wr_web_page_sk,wr_return_amt,wr_net_loss] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Filter [wr_returned_date_sk,wr_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #7 + ReusedExchange [d_date_sk] #3 InputAdapter - BroadcastExchange #16 - WholeStageCodegen - Project [wp_web_page_sk] - Filter [wp_web_page_sk] - Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] + ReusedExchange [wp_web_page_sk] #11 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt index 3a21513d1..1bcf039dd 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/explain.txt @@ -1,61 +1,341 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[ratio#1 ASC NULLS FIRST,ss_qty#2 DESC NULLS LAST,ss_wc#3 DESC NULLS LAST,ss_sp#4 DESC NULLS LAST,other_chan_qty#5 ASC NULLS FIRST,other_chan_wholesale_cost#6 ASC NULLS FIRST,other_chan_sales_price#7 ASC NULLS FIRST,round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) ASC NULLS FIRST], output=[ratio#1,store_qty#10,store_wholesale_cost#11,store_sales_price#12,other_chan_qty#5,other_chan_wholesale_cost#6,other_chan_sales_price#7]) -+- *(12) Project [round((cast(ss_qty#2 as double) / cast(coalesce((ws_qty#8 + cs_qty#9), 1) as double)), 2) AS ratio#1, ss_qty#2 AS store_qty#10, ss_wc#3 AS store_wholesale_cost#11, ss_sp#4 AS store_sales_price#12, (coalesce(ws_qty#8, 0) + coalesce(cs_qty#9, 0)) AS other_chan_qty#5, CheckOverflow((promote_precision(cast(coalesce(ws_wc#13, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#14, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#6, CheckOverflow((promote_precision(cast(coalesce(ws_sp#15, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#16, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#7, cs_qty#9, ss_wc#3, ss_qty#2, ss_sp#4, ws_qty#8] - +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [cs_sold_year#20, cs_item_sk#21, cs_customer_sk#22], Inner, BuildRight - :- *(12) Project [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19, ss_qty#2, ss_wc#3, ss_sp#4, ws_qty#8, ws_wc#13, ws_sp#15] - : +- *(12) BroadcastHashJoin [ss_sold_year#17, ss_item_sk#18, ss_customer_sk#19], [ws_sold_year#23, ws_item_sk#24, ws_customer_sk#25], Inner, BuildRight - : :- *(12) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[sum(cast(ss_quantity#27 as bigint)), sum(UnscaledValue(ss_wholesale_cost#28)), sum(UnscaledValue(ss_sales_price#29))]) - : : +- Exchange hashpartitioning(d_year#26, ss_item_sk#18, ss_customer_sk#19, 5) - : : +- *(3) HashAggregate(keys=[d_year#26, ss_item_sk#18, ss_customer_sk#19], functions=[partial_sum(cast(ss_quantity#27 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#28)), partial_sum(UnscaledValue(ss_sales_price#29))]) - : : +- *(3) Project [ss_item_sk#18, ss_customer_sk#19, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29, d_year#26] - : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#30], [d_date_sk#31], Inner, BuildRight - : : :- *(3) Project [ss_sold_date_sk#30, ss_item_sk#18, ss_customer_sk#19, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29] - : : : +- *(3) Filter isnull(sr_ticket_number#32) - : : : +- *(3) BroadcastHashJoin [cast(ss_ticket_number#33 as bigint), cast(ss_item_sk#18 as bigint)], [sr_ticket_number#32, sr_item_sk#34], LeftOuter, BuildRight - : : : :- *(3) Project [ss_sold_date_sk#30, ss_item_sk#18, ss_customer_sk#19, ss_ticket_number#33, ss_quantity#27, ss_wholesale_cost#28, ss_sales_price#29] - : : : : +- *(3) Filter ((isnotnull(ss_sold_date_sk#30) && isnotnull(ss_item_sk#18)) && isnotnull(ss_customer_sk#19)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#30,ss_item_sk#18,ss_customer_sk#19,ss_ticket_number#33,ss_quantity#27,ss_wholesale_cost#28,ss_sales_price#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_date_sk#31, d_year#26] - : : +- *(2) Filter ((isnotnull(d_year#26) && (d_year#26 = 2000)) && isnotnull(d_date_sk#31)) - : : +- *(2) FileScan parquet default.date_dim[d_date_sk#31,d_year#26] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) - : +- *(7) Filter (coalesce(ws_qty#8, 0) > 0) - : +- *(7) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[sum(cast(ws_quantity#36 as bigint)), sum(UnscaledValue(ws_wholesale_cost#37)), sum(UnscaledValue(ws_sales_price#38))]) - : +- Exchange hashpartitioning(d_year#26, ws_item_sk#24, ws_bill_customer_sk#35, 5) - : +- *(6) HashAggregate(keys=[d_year#26, ws_item_sk#24, ws_bill_customer_sk#35], functions=[partial_sum(cast(ws_quantity#36 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#37)), partial_sum(UnscaledValue(ws_sales_price#38))]) - : +- *(6) Project [ws_item_sk#24, ws_bill_customer_sk#35, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38, d_year#26] - : +- *(6) BroadcastHashJoin [ws_sold_date_sk#39], [d_date_sk#31], Inner, BuildRight - : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] - : : +- *(6) Filter isnull(wr_order_number#40) - : : +- *(6) BroadcastHashJoin [cast(ws_order_number#41 as bigint), cast(ws_item_sk#24 as bigint)], [wr_order_number#40, wr_item_sk#42], LeftOuter, BuildRight - : : :- *(6) Project [ws_sold_date_sk#39, ws_item_sk#24, ws_bill_customer_sk#35, ws_order_number#41, ws_quantity#36, ws_wholesale_cost#37, ws_sales_price#38] - : : : +- *(6) Filter ((isnotnull(ws_sold_date_sk#39) && isnotnull(ws_bill_customer_sk#35)) && isnotnull(ws_item_sk#24)) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#39,ws_item_sk#24,ws_bill_customer_sk#35,ws_order_number#41,ws_quantity#36,ws_wholesale_cost#37,ws_sales_price#38] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true])) - +- *(11) Filter (coalesce(cs_qty#9, 0) > 0) - +- *(11) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[sum(cast(cs_quantity#44 as bigint)), sum(UnscaledValue(cs_wholesale_cost#45)), sum(UnscaledValue(cs_sales_price#46))]) - +- Exchange hashpartitioning(d_year#26, cs_item_sk#21, cs_bill_customer_sk#43, 5) - +- *(10) HashAggregate(keys=[d_year#26, cs_item_sk#21, cs_bill_customer_sk#43], functions=[partial_sum(cast(cs_quantity#44 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#45)), partial_sum(UnscaledValue(cs_sales_price#46))]) - +- *(10) Project [cs_bill_customer_sk#43, cs_item_sk#21, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46, d_year#26] - +- *(10) BroadcastHashJoin [cs_sold_date_sk#47], [d_date_sk#31], Inner, BuildRight - :- *(10) Project [cs_sold_date_sk#47, cs_bill_customer_sk#43, cs_item_sk#21, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46] - : +- *(10) Filter isnull(cr_order_number#48) - : +- *(10) BroadcastHashJoin [cs_order_number#49, cs_item_sk#21], [cr_order_number#48, cr_item_sk#50], LeftOuter, BuildRight - : :- *(10) Project [cs_sold_date_sk#47, cs_bill_customer_sk#43, cs_item_sk#21, cs_order_number#49, cs_quantity#44, cs_wholesale_cost#45, cs_sales_price#46] - : : +- *(10) Filter ((isnotnull(cs_sold_date_sk#47) && isnotnull(cs_item_sk#21)) && isnotnull(cs_bill_customer_sk#43)) - : : +- *(10) FileScan parquet default.catalog_sales[cs_sold_date_sk#47,cs_bill_customer_sk#43,cs_item_sk#21,cs_order_number#49,cs_quantity#44,cs_wholesale_cost#45,cs_sales_price#46] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#31, d_year#26], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (60) ++- * Project (59) + +- * BroadcastHashJoin Inner BuildRight (58) + :- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * HashAggregate (19) + : : +- Exchange (18) + : : +- * HashAggregate (17) + : : +- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * Filter (9) + : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.store_returns (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.date_dim (11) + : +- BroadcastExchange (37) + : +- * Filter (36) + : +- * HashAggregate (35) + : +- Exchange (34) + : +- * HashAggregate (33) + : +- * Project (32) + : +- * BroadcastHashJoin Inner BuildRight (31) + : :- * Project (29) + : : +- * Filter (28) + : : +- * BroadcastHashJoin LeftOuter BuildRight (27) + : : :- * Filter (22) + : : : +- * ColumnarToRow (21) + : : : +- Scan parquet default.web_sales (20) + : : +- BroadcastExchange (26) + : : +- * Filter (25) + : : +- * ColumnarToRow (24) + : : +- Scan parquet default.web_returns (23) + : +- ReusedExchange (30) + +- BroadcastExchange (57) + +- * Filter (56) + +- * HashAggregate (55) + +- Exchange (54) + +- * HashAggregate (53) + +- * Project (52) + +- * BroadcastHashJoin Inner BuildRight (51) + :- * Project (49) + : +- * Filter (48) + : +- * BroadcastHashJoin LeftOuter BuildRight (47) + : :- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.catalog_sales (40) + : +- BroadcastExchange (46) + : +- * Filter (45) + : +- * ColumnarToRow (44) + : +- Scan parquet default.catalog_returns (43) + +- ReusedExchange (50) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] + +(3) Filter [codegen id : 3] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Condition : ((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_customer_sk#3)) + +(4) Scan parquet default.store_returns +Output [2]: [sr_item_sk#8, sr_ticket_number#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [sr_item_sk#8, sr_ticket_number#9] + +(6) Filter [codegen id : 1] +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) + +(7) BroadcastExchange +Input [2]: [sr_item_sk#8, sr_ticket_number#9] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#2 as bigint)] +Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Join condition: None + +(9) Filter [codegen id : 3] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#8, sr_ticket_number#9] +Condition : isnull(sr_ticket_number#9) + +(10) Project [codegen id : 3] +Output [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#4, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, sr_item_sk#8, sr_ticket_number#9] + +(11) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_year#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_year#12] + +(13) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_year#12] +Condition : ((isnotnull(d_year#12) AND (d_year#12 = 2000)) AND isnotnull(d_date_sk#11)) + +(14) BroadcastExchange +Input [2]: [d_date_sk#11, d_year#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#12] +Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_date_sk#11, d_year#12] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_quantity#5, ss_wholesale_cost#6, ss_sales_price#7, d_year#12] +Keys [3]: [d_year#12, ss_item_sk#2, ss_customer_sk#3] +Functions [3]: [partial_sum(cast(ss_quantity#5 as bigint)), partial_sum(UnscaledValue(ss_wholesale_cost#6)), partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [3]: [sum#14, sum#15, sum#16] +Results [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] + +(18) Exchange +Input [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] +Arguments: hashpartitioning(d_year#12, ss_item_sk#2, ss_customer_sk#3, 5), true, [id=#20] + +(19) HashAggregate [codegen id : 12] +Input [6]: [d_year#12, ss_item_sk#2, ss_customer_sk#3, sum#17, sum#18, sum#19] +Keys [3]: [d_year#12, ss_item_sk#2, ss_customer_sk#3] +Functions [3]: [sum(cast(ss_quantity#5 as bigint)), sum(UnscaledValue(ss_wholesale_cost#6)), sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [3]: [sum(cast(ss_quantity#5 as bigint))#21, sum(UnscaledValue(ss_wholesale_cost#6))#22, sum(UnscaledValue(ss_sales_price#7))#23] +Results [6]: [d_year#12 AS ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, sum(cast(ss_quantity#5 as bigint))#21 AS ss_qty#25, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#6))#22,17,2) AS ss_wc#26, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#23,17,2) AS ss_sp#27] + +(20) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 6] +Input [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] + +(22) Filter [codegen id : 6] +Input [7]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] +Condition : ((isnotnull(ws_sold_date_sk#28) AND isnotnull(ws_item_sk#29)) AND isnotnull(ws_bill_customer_sk#30)) + +(23) Scan parquet default.web_returns +Output [2]: [wr_item_sk#35, wr_order_number#36] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [wr_item_sk#35, wr_order_number#36] + +(25) Filter [codegen id : 4] +Input [2]: [wr_item_sk#35, wr_order_number#36] +Condition : (isnotnull(wr_order_number#36) AND isnotnull(wr_item_sk#35)) + +(26) BroadcastExchange +Input [2]: [wr_item_sk#35, wr_order_number#36] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#37] + +(27) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ws_order_number#31 as bigint), cast(ws_item_sk#29 as bigint)] +Right keys [2]: [wr_order_number#36, wr_item_sk#35] +Join condition: None + +(28) Filter [codegen id : 6] +Input [9]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, wr_item_sk#35, wr_order_number#36] +Condition : isnull(wr_order_number#36) + +(29) Project [codegen id : 6] +Output [6]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34] +Input [9]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_order_number#31, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, wr_item_sk#35, wr_order_number#36] + +(30) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#11, d_year#12] + +(31) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#28] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(32) Project [codegen id : 6] +Output [6]: [ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_year#12] +Input [8]: [ws_sold_date_sk#28, ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_date_sk#11, d_year#12] + +(33) HashAggregate [codegen id : 6] +Input [6]: [ws_item_sk#29, ws_bill_customer_sk#30, ws_quantity#32, ws_wholesale_cost#33, ws_sales_price#34, d_year#12] +Keys [3]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30] +Functions [3]: [partial_sum(cast(ws_quantity#32 as bigint)), partial_sum(UnscaledValue(ws_wholesale_cost#33)), partial_sum(UnscaledValue(ws_sales_price#34))] +Aggregate Attributes [3]: [sum#38, sum#39, sum#40] +Results [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] + +(34) Exchange +Input [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] +Arguments: hashpartitioning(d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, 5), true, [id=#44] + +(35) HashAggregate [codegen id : 7] +Input [6]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30, sum#41, sum#42, sum#43] +Keys [3]: [d_year#12, ws_item_sk#29, ws_bill_customer_sk#30] +Functions [3]: [sum(cast(ws_quantity#32 as bigint)), sum(UnscaledValue(ws_wholesale_cost#33)), sum(UnscaledValue(ws_sales_price#34))] +Aggregate Attributes [3]: [sum(cast(ws_quantity#32 as bigint))#45, sum(UnscaledValue(ws_wholesale_cost#33))#46, sum(UnscaledValue(ws_sales_price#34))#47] +Results [6]: [d_year#12 AS ws_sold_year#48, ws_item_sk#29, ws_bill_customer_sk#30 AS ws_customer_sk#49, sum(cast(ws_quantity#32 as bigint))#45 AS ws_qty#50, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#33))#46,17,2) AS ws_wc#51, MakeDecimal(sum(UnscaledValue(ws_sales_price#34))#47,17,2) AS ws_sp#52] + +(36) Filter [codegen id : 7] +Input [6]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] +Condition : (coalesce(ws_qty#50, 0) > 0) + +(37) BroadcastExchange +Input [6]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#53] + +(38) BroadcastHashJoin [codegen id : 12] +Left keys [3]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3] +Right keys [3]: [ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49] +Join condition: None + +(39) Project [codegen id : 12] +Output [9]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, ws_wc#51, ws_sp#52] +Input [12]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_sold_year#48, ws_item_sk#29, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] + +(40) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 10] +Input [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] + +(42) Filter [codegen id : 10] +Input [7]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] +Condition : ((isnotnull(cs_sold_date_sk#54) AND isnotnull(cs_item_sk#56)) AND isnotnull(cs_bill_customer_sk#55)) + +(43) Scan parquet default.catalog_returns +Output [2]: [cr_item_sk#61, cr_order_number#62] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 8] +Input [2]: [cr_item_sk#61, cr_order_number#62] + +(45) Filter [codegen id : 8] +Input [2]: [cr_item_sk#61, cr_order_number#62] +Condition : (isnotnull(cr_order_number#62) AND isnotnull(cr_item_sk#61)) + +(46) BroadcastExchange +Input [2]: [cr_item_sk#61, cr_order_number#62] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#63] + +(47) BroadcastHashJoin [codegen id : 10] +Left keys [2]: [cs_order_number#57, cs_item_sk#56] +Right keys [2]: [cr_order_number#62, cr_item_sk#61] +Join condition: None + +(48) Filter [codegen id : 10] +Input [9]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, cr_item_sk#61, cr_order_number#62] +Condition : isnull(cr_order_number#62) + +(49) Project [codegen id : 10] +Output [6]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60] +Input [9]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_order_number#57, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, cr_item_sk#61, cr_order_number#62] + +(50) ReusedExchange [Reuses operator id: 14] +Output [2]: [d_date_sk#11, d_year#12] + +(51) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cs_sold_date_sk#54] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(52) Project [codegen id : 10] +Output [6]: [cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_year#12] +Input [8]: [cs_sold_date_sk#54, cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_date_sk#11, d_year#12] + +(53) HashAggregate [codegen id : 10] +Input [6]: [cs_bill_customer_sk#55, cs_item_sk#56, cs_quantity#58, cs_wholesale_cost#59, cs_sales_price#60, d_year#12] +Keys [3]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55] +Functions [3]: [partial_sum(cast(cs_quantity#58 as bigint)), partial_sum(UnscaledValue(cs_wholesale_cost#59)), partial_sum(UnscaledValue(cs_sales_price#60))] +Aggregate Attributes [3]: [sum#64, sum#65, sum#66] +Results [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] + +(54) Exchange +Input [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] +Arguments: hashpartitioning(d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, 5), true, [id=#70] + +(55) HashAggregate [codegen id : 11] +Input [6]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55, sum#67, sum#68, sum#69] +Keys [3]: [d_year#12, cs_item_sk#56, cs_bill_customer_sk#55] +Functions [3]: [sum(cast(cs_quantity#58 as bigint)), sum(UnscaledValue(cs_wholesale_cost#59)), sum(UnscaledValue(cs_sales_price#60))] +Aggregate Attributes [3]: [sum(cast(cs_quantity#58 as bigint))#71, sum(UnscaledValue(cs_wholesale_cost#59))#72, sum(UnscaledValue(cs_sales_price#60))#73] +Results [6]: [d_year#12 AS cs_sold_year#74, cs_item_sk#56, cs_bill_customer_sk#55 AS cs_customer_sk#75, sum(cast(cs_quantity#58 as bigint))#71 AS cs_qty#76, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#59))#72,17,2) AS cs_wc#77, MakeDecimal(sum(UnscaledValue(cs_sales_price#60))#73,17,2) AS cs_sp#78] + +(56) Filter [codegen id : 11] +Input [6]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] +Condition : (coalesce(cs_qty#76, 0) > 0) + +(57) BroadcastExchange +Input [6]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#79] + +(58) BroadcastHashJoin [codegen id : 12] +Left keys [3]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3] +Right keys [3]: [cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75] +Join condition: None + +(59) Project [codegen id : 12] +Output [12]: [round((cast(ss_qty#25 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#80, ss_qty#25 AS store_qty#81, ss_wc#26 AS store_wholesale_cost#82, ss_sp#27 AS store_sales_price#83, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#84, CheckOverflow((promote_precision(cast(coalesce(ws_wc#51, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#77, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_wholesale_cost#85, CheckOverflow((promote_precision(cast(coalesce(ws_sp#52, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#78, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_sales_price#86, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, cs_qty#76] +Input [15]: [ss_sold_year#24, ss_item_sk#2, ss_customer_sk#3, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, ws_wc#51, ws_sp#52, cs_sold_year#74, cs_item_sk#56, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] + +(60) TakeOrderedAndProject +Input [12]: [ratio#80, store_qty#81, store_wholesale_cost#82, store_sales_price#83, other_chan_qty#84, other_chan_wholesale_cost#85, other_chan_sales_price#86, ss_qty#25, ss_wc#26, ss_sp#27, ws_qty#50, cs_qty#76] +Arguments: 100, [ratio#80 ASC NULLS FIRST, ss_qty#25 DESC NULLS LAST, ss_wc#26 DESC NULLS LAST, ss_sp#27 DESC NULLS LAST, other_chan_qty#84 ASC NULLS FIRST, other_chan_wholesale_cost#85 ASC NULLS FIRST, other_chan_sales_price#86 ASC NULLS FIRST, round((cast(ss_qty#25 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) ASC NULLS FIRST], [ratio#80, store_qty#81, store_wholesale_cost#82, store_sales_price#83, other_chan_qty#84, other_chan_wholesale_cost#85, other_chan_sales_price#86] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt index fc8bc4f3f..977070283 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q78/simplified.txt @@ -1,81 +1,88 @@ -TakeOrderedAndProject [cs_qty,other_chan_qty,other_chan_sales_price,other_chan_wholesale_cost,ratio,ss_qty,ss_sp,ss_wc,store_qty,store_sales_price,store_wholesale_cost,ws_qty] - WholeStageCodegen - Project [cs_qty,cs_sp,cs_wc,ss_qty,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] - BroadcastHashJoin [cs_customer_sk,cs_item_sk,cs_sold_year,ss_customer_sk,ss_item_sk,ss_sold_year] - Project [ss_customer_sk,ss_item_sk,ss_qty,ss_sold_year,ss_sp,ss_wc,ws_qty,ws_sp,ws_wc] - BroadcastHashJoin [ss_customer_sk,ss_item_sk,ss_sold_year,ws_customer_sk,ws_item_sk,ws_sold_year] - HashAggregate [d_year,ss_customer_sk,ss_item_sk,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] [ss_qty,ss_sold_year,ss_sp,ss_wc,sum,sum,sum,sum(UnscaledValue(ss_sales_price)),sum(UnscaledValue(ss_wholesale_cost)),sum(cast(ss_quantity as bigint))] +TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholesale_cost,other_chan_sales_price,ws_qty,cs_qty,store_qty,store_wholesale_cost,store_sales_price] + WholeStageCodegen (12) + Project [ss_qty,ws_qty,cs_qty,ss_wc,ss_sp,ws_wc,cs_wc,ws_sp,cs_sp] + BroadcastHashJoin [ss_sold_year,ss_item_sk,ss_customer_sk,cs_sold_year,cs_item_sk,cs_customer_sk] + Project [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp,ws_qty,ws_wc,ws_sp] + BroadcastHashJoin [ss_sold_year,ss_item_sk,ss_customer_sk,ws_sold_year,ws_item_sk,ws_customer_sk] + HashAggregate [d_year,ss_item_sk,ss_customer_sk,sum,sum,sum] [sum(cast(ss_quantity as bigint)),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_sales_price)),ss_sold_year,ss_qty,ss_wc,ss_sp,sum,sum,sum] InputAdapter - Exchange [d_year,ss_customer_sk,ss_item_sk] #1 - WholeStageCodegen - HashAggregate [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [d_year,ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_wholesale_cost] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_wholesale_cost] + Exchange [d_year,ss_item_sk,ss_customer_sk] #1 + WholeStageCodegen (3) + HashAggregate [d_year,ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price] [sum,sum,sum,sum,sum,sum] + Project [ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price,d_year] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_quantity,ss_wholesale_cost,ss_sales_price] Filter [sr_ticket_number] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] - Filter [ss_customer_sk,ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_sold_date_sk,ss_ticket_number,ss_wholesale_cost] + BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk] + Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [sr_item_sk,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] [sr_item_sk,sr_ticket_number] + WholeStageCodegen (1) + Filter [sr_ticket_number,sr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [d_date_sk,d_year] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + WholeStageCodegen (2) + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (7) Filter [ws_qty] - HashAggregate [d_year,sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_bill_customer_sk,ws_item_sk] [sum,sum,sum,sum(UnscaledValue(ws_sales_price)),sum(UnscaledValue(ws_wholesale_cost)),sum(cast(ws_quantity as bigint)),ws_customer_sk,ws_qty,ws_sold_year,ws_sp,ws_wc] + HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,sum,sum,sum] [sum(cast(ws_quantity as bigint)),sum(UnscaledValue(ws_wholesale_cost)),sum(UnscaledValue(ws_sales_price)),ws_sold_year,ws_customer_sk,ws_qty,ws_wc,ws_sp,sum,sum,sum] InputAdapter - Exchange [d_year,ws_bill_customer_sk,ws_item_sk] #5 - WholeStageCodegen - HashAggregate [d_year,sum,sum,sum,sum,sum,sum,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] [sum,sum,sum,sum,sum,sum] - Project [d_year,ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_wholesale_cost] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_customer_sk,ws_item_sk,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + Exchange [d_year,ws_item_sk,ws_bill_customer_sk] #5 + WholeStageCodegen (6) + HashAggregate [d_year,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price] [sum,sum,sum,sum,sum,sum] + Project [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price,d_year] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_wholesale_cost,ws_sales_price] Filter [wr_order_number] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] - Filter [ws_bill_customer_sk,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] [ws_bill_customer_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_wholesale_cost] + BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk] + Filter [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price] InputAdapter BroadcastExchange #6 - WholeStageCodegen - Project [wr_item_sk,wr_order_number] - Filter [wr_item_sk,wr_order_number] - Scan parquet default.web_returns [wr_item_sk,wr_order_number] [wr_item_sk,wr_order_number] + WholeStageCodegen (4) + Filter [wr_order_number,wr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number] InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 + ReusedExchange [d_date_sk,d_year] #3 InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (11) Filter [cs_qty] - HashAggregate [cs_bill_customer_sk,cs_item_sk,d_year,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] [cs_customer_sk,cs_qty,cs_sold_year,cs_sp,cs_wc,sum,sum,sum,sum(UnscaledValue(cs_sales_price)),sum(UnscaledValue(cs_wholesale_cost)),sum(cast(cs_quantity as bigint))] + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,sum,sum,sum] [sum(cast(cs_quantity as bigint)),sum(UnscaledValue(cs_wholesale_cost)),sum(UnscaledValue(cs_sales_price)),cs_sold_year,cs_customer_sk,cs_qty,cs_wc,cs_sp,sum,sum,sum] InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk,d_year] #8 - WholeStageCodegen - HashAggregate [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_wholesale_cost,d_year] + Exchange [d_year,cs_item_sk,cs_bill_customer_sk] #8 + WholeStageCodegen (10) + HashAggregate [d_year,cs_item_sk,cs_bill_customer_sk,cs_quantity,cs_wholesale_cost,cs_sales_price] [sum,sum,sum,sum,sum,sum] + Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price,d_year] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_wholesale_cost,cs_sales_price] Filter [cr_order_number] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] - Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_sales_price,cs_sold_date_sk,cs_wholesale_cost] + BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk] + Filter [cs_sold_date_sk,cs_item_sk,cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price] InputAdapter BroadcastExchange #9 - WholeStageCodegen - Project [cr_item_sk,cr_order_number] - Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] [cr_item_sk,cr_order_number] + WholeStageCodegen (8) + Filter [cr_order_number,cr_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number] InputAdapter - ReusedExchange [d_date_sk,d_year] [d_date_sk,d_year] #3 + ReusedExchange [d_date_sk,d_year] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt index 716a1b741..63cde7268 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/explain.txt @@ -1,32 +1,193 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_last_name#1 ASC NULLS FIRST,c_first_name#2 ASC NULLS FIRST,substring(s_city#3, 1, 30) ASC NULLS FIRST,profit#4 ASC NULLS FIRST], output=[c_last_name#1,c_first_name#2,substring(s_city, 1, 30)#5,ss_ticket_number#6,amt#7,profit#4]) -+- *(6) Project [c_last_name#1, c_first_name#2, substring(s_city#3, 1, 30) AS substring(s_city, 1, 30)#5, ss_ticket_number#6, amt#7, profit#4, s_city#3] - +- *(6) BroadcastHashJoin [ss_customer_sk#8], [c_customer_sk#9], Inner, BuildRight - :- *(6) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[sum(UnscaledValue(ss_coupon_amt#11)), sum(UnscaledValue(ss_net_profit#12))]) - : +- Exchange hashpartitioning(ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3, 5) - : +- *(4) HashAggregate(keys=[ss_ticket_number#6, ss_customer_sk#8, ss_addr_sk#10, s_city#3], functions=[partial_sum(UnscaledValue(ss_coupon_amt#11)), partial_sum(UnscaledValue(ss_net_profit#12))]) - : +- *(4) Project [ss_customer_sk#8, ss_addr_sk#10, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12, s_city#3] - : +- *(4) BroadcastHashJoin [ss_hdemo_sk#13], [hd_demo_sk#14], Inner, BuildRight - : :- *(4) Project [ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12, s_city#3] - : : +- *(4) BroadcastHashJoin [ss_store_sk#15], [s_store_sk#16], Inner, BuildRight - : : :- *(4) Project [ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_store_sk#15, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12] - : : : +- *(4) BroadcastHashJoin [ss_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : : : :- *(4) Project [ss_sold_date_sk#17, ss_customer_sk#8, ss_hdemo_sk#13, ss_addr_sk#10, ss_store_sk#15, ss_ticket_number#6, ss_coupon_amt#11, ss_net_profit#12] - : : : : +- *(4) Filter (((isnotnull(ss_sold_date_sk#17) && isnotnull(ss_store_sk#15)) && isnotnull(ss_hdemo_sk#13)) && isnotnull(ss_customer_sk#8)) - : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_date_sk#17,ss_customer_sk#8,ss_hdemo_sk#13,ss_addr_sk#10,ss_store_sk#15,ss_ticket_number#6,ss_coupon_amt#11,ss_net_profit#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_custome..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [s_store_sk#16, s_city#3] - : : +- *(2) Filter (((isnotnull(s_number_employees#21) && (s_number_employees#21 >= 200)) && (s_number_employees#21 <= 295)) && isnotnull(s_store_sk#16)) - : : +- *(2) FileScan parquet default.store[s_store_sk#16,s_number_employees#21,s_city#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_num..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [hd_demo_sk#14] - : +- *(3) Filter (((hd_dep_count#22 = 6) || (hd_vehicle_count#23 > 2)) && isnotnull(hd_demo_sk#14)) - : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#14,hd_dep_count#22,hd_vehicle_count#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [c_customer_sk#9, c_first_name#2, c_last_name#1] - +- *(5) Filter isnotnull(c_customer_sk#9) - +- *(5) FileScan parquet default.customer[c_customer_sk#9,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (34) ++- * Project (33) + +- * BroadcastHashJoin Inner BuildRight (32) + :- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.store (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.household_demographics (18) + +- BroadcastExchange (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet default.customer (28) + + +(1) Scan parquet default.store_sales +Output [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] + +(3) Filter [codegen id : 4] +Input [8]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#5)) AND isnotnull(ss_hdemo_sk#3)) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#9, d_year#10, d_dow#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] +Condition : (((isnotnull(d_dow#11) AND (d_dow#11 = 1)) AND d_year#10 IN (1999,2000,2001)) AND isnotnull(d_date_sk#9)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#10, d_dow#11] + +(8) BroadcastExchange +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#9] +Join condition: None + +(10) Project [codegen id : 4] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8] +Input [9]: [ss_sold_date_sk#1, ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, d_date_sk#9] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#13, s_number_employees#14, s_city#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_number_employees,295), IsNotNull(s_store_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] +Condition : (((isnotnull(s_number_employees#14) AND (s_number_employees#14 >= 200)) AND (s_number_employees#14 <= 295)) AND isnotnull(s_store_sk#13)) + +(14) Project [codegen id : 2] +Output [2]: [s_store_sk#13, s_city#15] +Input [3]: [s_store_sk#13, s_number_employees#14, s_city#15] + +(15) BroadcastExchange +Input [2]: [s_store_sk#13, s_city#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#5] +Right keys [1]: [s_store_sk#13] +Join condition: None + +(17) Project [codegen id : 4] +Output [7]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] +Input [9]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_store_sk#13, s_city#15] + +(18) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(20) Filter [codegen id : 3] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] +Condition : (((hd_dep_count#18 = 6) OR (hd_vehicle_count#19 > 2)) AND isnotnull(hd_demo_sk#17)) + +(21) Project [codegen id : 3] +Output [1]: [hd_demo_sk#17] +Input [3]: [hd_demo_sk#17, hd_dep_count#18, hd_vehicle_count#19] + +(22) BroadcastExchange +Input [1]: [hd_demo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#17] +Join condition: None + +(24) Project [codegen id : 4] +Output [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] +Input [8]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15, hd_demo_sk#17] + +(25) HashAggregate [codegen id : 4] +Input [6]: [ss_customer_sk#2, ss_addr_sk#4, ss_ticket_number#6, ss_coupon_amt#7, ss_net_profit#8, s_city#15] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#7)), partial_sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum#21, sum#22] +Results [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] + +(26) Exchange +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] +Arguments: hashpartitioning(ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, 5), true, [id=#25] + +(27) HashAggregate [codegen id : 6] +Input [6]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15, sum#23, sum#24] +Keys [4]: [ss_ticket_number#6, ss_customer_sk#2, ss_addr_sk#4, s_city#15] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#7)), sum(UnscaledValue(ss_net_profit#8))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#7))#26, sum(UnscaledValue(ss_net_profit#8))#27] +Results [5]: [ss_ticket_number#6, ss_customer_sk#2, s_city#15, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#7))#26,17,2) AS amt#28, MakeDecimal(sum(UnscaledValue(ss_net_profit#8))#27,17,2) AS profit#29] + +(28) Scan parquet default.customer +Output [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 5] +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] + +(30) Filter [codegen id : 5] +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Condition : isnotnull(c_customer_sk#30) + +(31) BroadcastExchange +Input [3]: [c_customer_sk#30, c_first_name#31, c_last_name#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33] + +(32) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#30] +Join condition: None + +(33) Project [codegen id : 6] +Output [7]: [c_last_name#32, c_first_name#31, substr(s_city#15, 1, 30) AS substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29, s_city#15] +Input [8]: [ss_ticket_number#6, ss_customer_sk#2, s_city#15, amt#28, profit#29, c_customer_sk#30, c_first_name#31, c_last_name#32] + +(34) TakeOrderedAndProject +Input [7]: [c_last_name#32, c_first_name#31, substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29, s_city#15] +Arguments: 100, [c_last_name#32 ASC NULLS FIRST, c_first_name#31 ASC NULLS FIRST, substr(s_city#15, 1, 30) ASC NULLS FIRST, profit#29 ASC NULLS FIRST], [c_last_name#32, c_first_name#31, substr(s_city, 1, 30)#34, ss_ticket_number#6, amt#28, profit#29] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt index 01ce92730..5b6177488 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q79/simplified.txt @@ -1,42 +1,50 @@ -TakeOrderedAndProject [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number,substring(s_city, 1, 30)] - WholeStageCodegen - Project [amt,c_first_name,c_last_name,profit,s_city,ss_ticket_number] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - HashAggregate [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] [amt,profit,sum,sum,sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit))] +TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1, 30),ss_ticket_number,amt] + WholeStageCodegen (6) + Project [c_last_name,c_first_name,s_city,ss_ticket_number,amt,profit] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,sum,sum] [sum(UnscaledValue(ss_coupon_amt)),sum(UnscaledValue(ss_net_profit)),amt,profit,sum,sum] InputAdapter - Exchange [s_city,ss_addr_sk,ss_customer_sk,ss_ticket_number] #1 - WholeStageCodegen - HashAggregate [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number,sum,sum,sum,sum] [sum,sum,sum,sum] - Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [s_city,ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_ticket_number] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_store_sk,ss_ticket_number] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_customer_sk,ss_hdemo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_addr_sk,ss_coupon_amt,ss_customer_sk,ss_hdemo_sk,ss_net_profit,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + Exchange [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city] #1 + WholeStageCodegen (4) + HashAggregate [ss_ticket_number,ss_customer_sk,ss_addr_sk,s_city,ss_coupon_amt,ss_net_profit] [sum,sum,sum,sum] + Project [ss_customer_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,s_city] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk,ss_hdemo_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_dow,d_year] - Scan parquet default.date_dim [d_date_sk,d_dow,d_year] [d_date_sk,d_dow,d_year] + Filter [d_dow,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_dow] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [s_city,s_store_sk] + WholeStageCodegen (2) + Project [s_store_sk,s_city] Filter [s_number_employees,s_store_sk] - Scan parquet default.store [s_city,s_number_employees,s_store_sk] [s_city,s_number_employees,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_number_employees,s_city] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt index 7fe2f14b3..e9f9a92a0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/explain.txt @@ -1,45 +1,272 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[s_store_name#1 ASC NULLS FIRST], output=[s_store_name#1,sum(ss_net_profit)#2]) -+- *(9) HashAggregate(keys=[s_store_name#1], functions=[sum(UnscaledValue(ss_net_profit#3))]) - +- Exchange hashpartitioning(s_store_name#1, 5) - +- *(8) HashAggregate(keys=[s_store_name#1], functions=[partial_sum(UnscaledValue(ss_net_profit#3))]) - +- *(8) Project [ss_net_profit#3, s_store_name#1] - +- *(8) BroadcastHashJoin [substring(s_zip#4, 1, 2)], [substring(ca_zip#5, 1, 2)], Inner, BuildRight - :- *(8) Project [ss_net_profit#3, s_store_name#1, s_zip#4] - : +- *(8) BroadcastHashJoin [ss_store_sk#6], [s_store_sk#7], Inner, BuildRight - : :- *(8) Project [ss_store_sk#6, ss_net_profit#3] - : : +- *(8) BroadcastHashJoin [ss_sold_date_sk#8], [d_date_sk#9], Inner, BuildRight - : : :- *(8) Project [ss_sold_date_sk#8, ss_store_sk#6, ss_net_profit#3] - : : : +- *(8) Filter (isnotnull(ss_sold_date_sk#8) && isnotnull(ss_store_sk#6)) - : : : +- *(8) FileScan parquet default.store_sales[ss_sold_date_sk#8,ss_store_sk#6,ss_net_profit#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [d_date_sk#9] - : : +- *(1) Filter ((((isnotnull(d_qoy#10) && isnotnull(d_year#11)) && (d_qoy#10 = 2)) && (d_year#11 = 1998)) && isnotnull(d_date_sk#9)) - : : +- *(1) FileScan parquet default.date_dim[d_date_sk#9,d_year#11,d_qoy#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [s_store_sk#7, s_store_name#1, s_zip#4] - : +- *(2) Filter (isnotnull(s_store_sk#7) && isnotnull(s_zip#4)) - : +- *(2) FileScan parquet default.store[s_store_sk#7,s_store_name#1,s_zip#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(substring(input[0, string, true], 1, 2))) - +- *(7) HashAggregate(keys=[ca_zip#5], functions=[]) - +- Exchange hashpartitioning(ca_zip#5, 5) - +- *(6) HashAggregate(keys=[ca_zip#5], functions=[]) - +- *(6) BroadcastHashJoin [coalesce(ca_zip#5, )], [coalesce(ca_zip#12, )], LeftSemi, BuildRight, (ca_zip#5 <=> ca_zip#12) - :- *(6) Project [substring(ca_zip#13, 1, 5) AS ca_zip#5] - : +- *(6) Filter (substring(ca_zip#13, 1, 5) INSET (56910,69952,63792,39371,74351,11101,25003,97189,57834,73134,62377,51200,32754,22752,86379,14171,91110,40162,98569,28709,13394,66162,25733,25782,26065,18383,51949,87343,50298,83849,33786,64528,23470,67030,46136,25280,46820,77721,99076,18426,31880,17871,98235,45748,49156,18652,72013,51622,43848,78567,41248,13695,44165,67853,54917,53179,64034,10567,71791,68908,55565,59402,64147,85816,57855,61547,27700,68100,28810,58263,15723,83933,51103,58058,90578,82276,81096,81426,96451,77556,38607,76638,18906,62971,57047,48425,35576,11928,30625,83444,73520,51650,57647,60099,30122,94983,24128,10445,41368,26233,26859,21756,24676,19849,36420,38193,58470,39127,13595,87501,24317,15455,69399,98025,81019,48033,11376,39516,67875,92712,14867,38122,29741,42961,30469,51211,56458,15559,16021,33123,33282,33515,72823,54601,76698,56240,72175,60279,20004,68806,72325,28488,43933,50412,45200,22246,78668,79777,96765,67301,73273,49448,82636,23932,47305,29839,39192,18799,61265,37125,58943,64457,88424,24610,84935,89360,68893,30431,28898,10336,90257,59166,46081,26105,96888,36634,86284,35258,39972,22927,73241,53268,24206,27385,99543,31671,14663,30903,39861,24996,63089,88086,83921,21076,67897,66708,45721,60576,25103,52867,30450,36233,30010,96576,73171,56571,56575,64544,13955,78451,43285,18119,16725,83041,76107,79994,54364,35942,56691,19769,63435,34102,18845,22744,13354,75691,45549,23968,31387,83144,13375,15765,28577,88190,19736,73650,37930,25989,83926,94898,51798,39736,22437,55253,38415,71256,18376,42029,25858,44438,19515,38935,51649,71954,15882,18767,63193,25486,49130,37126,40604,34425,17043,12305,11634,26653,94167,36446,10516,67473,66864,72425,63981,18842,22461,42666,47770,69035,70372,28587,45266,15371,15798,45375,90225,16807,31016,68014,21337,19505,50016,10144,84093,21286,19430,34322,91068,94945,72305,24671,58048,65084,28545,21195,20548,22245,77191,96976,48583,76231,15734,61810,11356,68621,68786,98359,41367,26689,69913,76614,68101,88885,50308,79077,18270,28915,29178,53672,62878,10390,14922,68341,56529,41766,68309,56616,15126,61860,97789,11489,45692,41918,72151,72550,27156,36495,70738,17879,53535,17920,68880,78890,35850,14089,58078,65164,27068,26231,13376,57665,32213,77610,87816,21309,15146,86198,91137,55307,67467,40558,94627,82136,22351,89091,20260,23006,91393,47537,62496,98294,18840,71286,81312,31029,70466,35458,14060,22685,28286,25631,19512,40081,63837,14328,35474,22152,76232,51061,86057,17183) && isnotnull(substring(ca_zip#13, 1, 5))) - : +- *(6) FileScan parquet default.customer_address[ca_zip#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ))) - +- *(5) Project [ca_zip#12] - +- *(5) Filter (count(1)#14 > 10) - +- *(5) HashAggregate(keys=[ca_zip#13], functions=[count(1)]) - +- Exchange hashpartitioning(ca_zip#13, 5) - +- *(4) HashAggregate(keys=[ca_zip#13], functions=[partial_count(1)]) - +- *(4) Project [ca_zip#13] - +- *(4) BroadcastHashJoin [ca_address_sk#15], [c_current_addr_sk#16], Inner, BuildRight - :- *(4) Project [ca_address_sk#15, ca_zip#13] - : +- *(4) Filter isnotnull(ca_address_sk#15) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#15,ca_zip#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [c_current_addr_sk#16] - +- *(3) Filter ((isnotnull(c_preferred_cust_flag#17) && (c_preferred_cust_flag#17 = Y)) && isnotnull(c_current_addr_sk#16)) - +- *(3) FileScan parquet default.customer[c_current_addr_sk#16,c_preferred_cust_flag#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (47) ++- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.date_dim (4) + : +- BroadcastExchange (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.store (11) + +- BroadcastExchange (41) + +- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * Project (37) + +- * BroadcastHashJoin LeftSemi BuildRight (36) + :- * Filter (19) + : +- * ColumnarToRow (18) + : +- Scan parquet default.customer_address (17) + +- BroadcastExchange (35) + +- * Project (34) + +- * Filter (33) + +- * HashAggregate (32) + +- Exchange (31) + +- * HashAggregate (30) + +- * Project (29) + +- * BroadcastHashJoin Inner BuildRight (28) + :- * Filter (22) + : +- * ColumnarToRow (21) + : +- Scan parquet default.customer_address (20) + +- BroadcastExchange (27) + +- * Project (26) + +- * Filter (25) + +- * ColumnarToRow (24) + +- Scan parquet default.customer (23) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] + +(3) Filter [codegen id : 8] +Input [3]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] +Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 1998)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [3]: [d_date_sk#4, d_year#5, d_qoy#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 8] +Output [2]: [ss_store_sk#2, ss_net_profit#3] +Input [4]: [ss_sold_date_sk#1, ss_store_sk#2, ss_net_profit#3, d_date_sk#4] + +(11) Scan parquet default.store +Output [3]: [s_store_sk#8, s_store_name#9, s_zip#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_zip)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] + +(13) Filter [codegen id : 2] +Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] +Condition : (isnotnull(s_store_sk#8) AND isnotnull(s_zip#10)) + +(14) BroadcastExchange +Input [3]: [s_store_sk#8, s_store_name#9, s_zip#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ss_store_sk#2] +Right keys [1]: [s_store_sk#8] +Join condition: None + +(16) Project [codegen id : 8] +Output [3]: [ss_net_profit#3, s_store_name#9, s_zip#10] +Input [5]: [ss_store_sk#2, ss_net_profit#3, s_store_sk#8, s_store_name#9, s_zip#10] + +(17) Scan parquet default.customer_address +Output [1]: [ca_zip#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 6] +Input [1]: [ca_zip#12] + +(19) Filter [codegen id : 6] +Input [1]: [ca_zip#12] +Condition : (substr(ca_zip#12, 1, 5) INSET (56910,69952,63792,39371,74351,11101,25003,97189,57834,73134,62377,51200,32754,22752,86379,14171,91110,40162,98569,28709,13394,66162,25733,25782,26065,18383,51949,87343,50298,83849,33786,64528,23470,67030,46136,25280,46820,77721,99076,18426,31880,17871,98235,45748,49156,18652,72013,51622,43848,78567,41248,13695,44165,67853,54917,53179,64034,10567,71791,68908,55565,59402,64147,85816,57855,61547,27700,68100,28810,58263,15723,83933,51103,58058,90578,82276,81096,81426,96451,77556,38607,76638,18906,62971,57047,48425,35576,11928,30625,83444,73520,51650,57647,60099,30122,94983,24128,10445,41368,26233,26859,21756,24676,19849,36420,38193,58470,39127,13595,87501,24317,15455,69399,98025,81019,48033,11376,39516,67875,92712,14867,38122,29741,42961,30469,51211,56458,15559,16021,33123,33282,33515,72823,54601,76698,56240,72175,60279,20004,68806,72325,28488,43933,50412,45200,22246,78668,79777,96765,67301,73273,49448,82636,23932,47305,29839,39192,18799,61265,37125,58943,64457,88424,24610,84935,89360,68893,30431,28898,10336,90257,59166,46081,26105,96888,36634,86284,35258,39972,22927,73241,53268,24206,27385,99543,31671,14663,30903,39861,24996,63089,88086,83921,21076,67897,66708,45721,60576,25103,52867,30450,36233,30010,96576,73171,56571,56575,64544,13955,78451,43285,18119,16725,83041,76107,79994,54364,35942,56691,19769,63435,34102,18845,22744,13354,75691,45549,23968,31387,83144,13375,15765,28577,88190,19736,73650,37930,25989,83926,94898,51798,39736,22437,55253,38415,71256,18376,42029,25858,44438,19515,38935,51649,71954,15882,18767,63193,25486,49130,37126,40604,34425,17043,12305,11634,26653,94167,36446,10516,67473,66864,72425,63981,18842,22461,42666,47770,69035,70372,28587,45266,15371,15798,45375,90225,16807,31016,68014,21337,19505,50016,10144,84093,21286,19430,34322,91068,94945,72305,24671,58048,65084,28545,21195,20548,22245,77191,96976,48583,76231,15734,61810,11356,68621,68786,98359,41367,26689,69913,76614,68101,88885,50308,79077,18270,28915,29178,53672,62878,10390,14922,68341,56529,41766,68309,56616,15126,61860,97789,11489,45692,41918,72151,72550,27156,36495,70738,17879,53535,17920,68880,78890,35850,14089,58078,65164,27068,26231,13376,57665,32213,77610,87816,21309,15146,86198,91137,55307,67467,40558,94627,82136,22351,89091,20260,23006,91393,47537,62496,98294,18840,71286,81312,31029,70466,35458,14060,22685,28286,25631,19512,40081,63837,14328,35474,22152,76232,51061,86057,17183) AND isnotnull(substr(ca_zip#12, 1, 5))) + +(20) Scan parquet default.customer_address +Output [2]: [ca_address_sk#13, ca_zip#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk)] +ReadSchema: struct + +(21) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#13, ca_zip#12] + +(22) Filter [codegen id : 4] +Input [2]: [ca_address_sk#13, ca_zip#12] +Condition : isnotnull(ca_address_sk#13) + +(23) Scan parquet default.customer +Output [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 3] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] + +(25) Filter [codegen id : 3] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] +Condition : ((isnotnull(c_preferred_cust_flag#15) AND (c_preferred_cust_flag#15 = Y)) AND isnotnull(c_current_addr_sk#14)) + +(26) Project [codegen id : 3] +Output [1]: [c_current_addr_sk#14] +Input [2]: [c_current_addr_sk#14, c_preferred_cust_flag#15] + +(27) BroadcastExchange +Input [1]: [c_current_addr_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(28) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ca_address_sk#13] +Right keys [1]: [c_current_addr_sk#14] +Join condition: None + +(29) Project [codegen id : 4] +Output [1]: [ca_zip#12] +Input [3]: [ca_address_sk#13, ca_zip#12, c_current_addr_sk#14] + +(30) HashAggregate [codegen id : 4] +Input [1]: [ca_zip#12] +Keys [1]: [ca_zip#12] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#17] +Results [2]: [ca_zip#12, count#18] + +(31) Exchange +Input [2]: [ca_zip#12, count#18] +Arguments: hashpartitioning(ca_zip#12, 5), true, [id=#19] + +(32) HashAggregate [codegen id : 5] +Input [2]: [ca_zip#12, count#18] +Keys [1]: [ca_zip#12] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#20] +Results [2]: [substr(ca_zip#12, 1, 5) AS ca_zip#21, count(1)#20 AS count(1)#22] + +(33) Filter [codegen id : 5] +Input [2]: [ca_zip#21, count(1)#22] +Condition : (count(1)#22 > 10) + +(34) Project [codegen id : 5] +Output [1]: [ca_zip#21] +Input [2]: [ca_zip#21, count(1)#22] + +(35) BroadcastExchange +Input [1]: [ca_zip#21] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [id=#23] + +(36) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [coalesce(substr(ca_zip#12, 1, 5), ), isnull(substr(ca_zip#12, 1, 5))] +Right keys [2]: [coalesce(ca_zip#21, ), isnull(ca_zip#21)] +Join condition: None + +(37) Project [codegen id : 6] +Output [1]: [substr(ca_zip#12, 1, 5) AS ca_zip#24] +Input [1]: [ca_zip#12] + +(38) HashAggregate [codegen id : 6] +Input [1]: [ca_zip#24] +Keys [1]: [ca_zip#24] +Functions: [] +Aggregate Attributes: [] +Results [1]: [ca_zip#24] + +(39) Exchange +Input [1]: [ca_zip#24] +Arguments: hashpartitioning(ca_zip#24, 5), true, [id=#25] + +(40) HashAggregate [codegen id : 7] +Input [1]: [ca_zip#24] +Keys [1]: [ca_zip#24] +Functions: [] +Aggregate Attributes: [] +Results [1]: [ca_zip#24] + +(41) BroadcastExchange +Input [1]: [ca_zip#24] +Arguments: HashedRelationBroadcastMode(List(substr(input[0, string, true], 1, 2)),false), [id=#26] + +(42) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [substr(s_zip#10, 1, 2)] +Right keys [1]: [substr(ca_zip#24, 1, 2)] +Join condition: None + +(43) Project [codegen id : 8] +Output [2]: [ss_net_profit#3, s_store_name#9] +Input [4]: [ss_net_profit#3, s_store_name#9, s_zip#10, ca_zip#24] + +(44) HashAggregate [codegen id : 8] +Input [2]: [ss_net_profit#3, s_store_name#9] +Keys [1]: [s_store_name#9] +Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum#27] +Results [2]: [s_store_name#9, sum#28] + +(45) Exchange +Input [2]: [s_store_name#9, sum#28] +Arguments: hashpartitioning(s_store_name#9, 5), true, [id=#29] + +(46) HashAggregate [codegen id : 9] +Input [2]: [s_store_name#9, sum#28] +Keys [1]: [s_store_name#9] +Functions [1]: [sum(UnscaledValue(ss_net_profit#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#30] +Results [2]: [s_store_name#9, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#30,17,2) AS sum(ss_net_profit)#31] + +(47) TakeOrderedAndProject +Input [2]: [s_store_name#9, sum(ss_net_profit)#31] +Arguments: 100, [s_store_name#9 ASC NULLS FIRST], [s_store_name#9, sum(ss_net_profit)#31] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt index fd00e42e4..cc62907dc 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q8/simplified.txt @@ -1,61 +1,70 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)] - WholeStageCodegen - HashAggregate [s_store_name,sum,sum(UnscaledValue(ss_net_profit))] [sum,sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit)] + WholeStageCodegen (9) + HashAggregate [s_store_name,sum] [sum(UnscaledValue(ss_net_profit)),sum(ss_net_profit),sum] InputAdapter Exchange [s_store_name] #1 - WholeStageCodegen - HashAggregate [s_store_name,ss_net_profit,sum,sum] [sum,sum] - Project [s_store_name,ss_net_profit] - BroadcastHashJoin [ca_zip,s_zip] - Project [s_store_name,s_zip,ss_net_profit] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [ss_net_profit,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_net_profit,ss_sold_date_sk,ss_store_sk] - Filter [ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_net_profit,ss_sold_date_sk,ss_store_sk] [ss_net_profit,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (8) + HashAggregate [s_store_name,ss_net_profit] [sum,sum] + Project [ss_net_profit,s_store_name] + BroadcastHashJoin [s_zip,ca_zip] + Project [ss_net_profit,s_store_name,s_zip] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_store_sk,ss_net_profit] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_qoy,d_year] - Scan parquet default.date_dim [d_date_sk,d_qoy,d_year] [d_date_sk,d_qoy,d_year] + Filter [d_qoy,d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_qoy] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [s_store_name,s_store_sk,s_zip] - Filter [s_store_sk,s_zip] - Scan parquet default.store [s_store_name,s_store_sk,s_zip] [s_store_name,s_store_sk,s_zip] + WholeStageCodegen (2) + Filter [s_store_sk,s_zip] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name,s_zip] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (7) HashAggregate [ca_zip] InputAdapter Exchange [ca_zip] #5 - WholeStageCodegen + WholeStageCodegen (6) HashAggregate [ca_zip] - BroadcastHashJoin [ca_zip,ca_zip] - Project [ca_zip] + Project [ca_zip] + BroadcastHashJoin [ca_zip,ca_zip] Filter [ca_zip] - Scan parquet default.customer_address [ca_zip] [ca_zip] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen - Project [ca_zip] - Filter [count(1)] - HashAggregate [ca_zip,count,count(1)] [ca_zip,count,count(1),count(1)] - InputAdapter - Exchange [ca_zip] #7 - WholeStageCodegen - HashAggregate [ca_zip,count,count] [count,count] - Project [ca_zip] - BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [ca_address_sk,ca_zip] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_zip] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (5) + Project [ca_zip] + Filter [count(1)] + HashAggregate [ca_zip,count] [count(1),ca_zip,count(1),count] + InputAdapter + Exchange [ca_zip] #7 + WholeStageCodegen (4) + HashAggregate [ca_zip] [count,count] + Project [ca_zip] + BroadcastHashJoin [ca_address_sk,c_current_addr_sk] Filter [ca_address_sk] - Scan parquet default.customer_address [ca_address_sk,ca_zip] [ca_address_sk,ca_zip] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [c_current_addr_sk] - Filter [c_current_addr_sk,c_preferred_cust_flag] - Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag] [c_current_addr_sk,c_preferred_cust_flag] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_zip] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (3) + Project [c_current_addr_sk] + Filter [c_preferred_cust_flag,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt index f6c20a1a4..36b045bfd 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/explain.txt @@ -1,97 +1,553 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[channel#1 ASC NULLS FIRST,id#2 ASC NULLS FIRST], output=[channel#1,id#2,sales#3,returns#4,profit#5]) -+- *(23) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[sum(sales#7), sum(returns#8), sum(profit#9)]) - +- Exchange hashpartitioning(channel#1, id#2, spark_grouping_id#6, 5) - +- *(22) HashAggregate(keys=[channel#1, id#2, spark_grouping_id#6], functions=[partial_sum(sales#7), partial_sum(returns#8), partial_sum(profit#9)]) - +- *(22) Expand [List(sales#7, returns#8, profit#9, channel#10, id#11, 0), List(sales#7, returns#8, profit#9, channel#10, null, 1), List(sales#7, returns#8, profit#9, null, null, 3)], [sales#7, returns#8, profit#9, channel#1, id#2, spark_grouping_id#6] - +- Union - :- *(7) HashAggregate(keys=[s_store_id#12], functions=[sum(UnscaledValue(ss_ext_sales_price#13)), sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - : +- Exchange hashpartitioning(s_store_id#12, 5) - : +- *(6) HashAggregate(keys=[s_store_id#12], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#13)), partial_sum(coalesce(cast(sr_return_amt#14 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#15 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - : +- *(6) Project [ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] - : +- *(6) BroadcastHashJoin [ss_promo_sk#17], [p_promo_sk#18], Inner, BuildRight - : :- *(6) Project [ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] - : : +- *(6) BroadcastHashJoin [ss_item_sk#19], [i_item_sk#20], Inner, BuildRight - : : :- *(6) Project [ss_item_sk#19, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16, s_store_id#12] - : : : +- *(6) BroadcastHashJoin [ss_store_sk#21], [s_store_sk#22], Inner, BuildRight - : : : :- *(6) Project [ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16] - : : : : +- *(6) BroadcastHashJoin [ss_sold_date_sk#23], [d_date_sk#24], Inner, BuildRight - : : : : :- *(6) Project [ss_sold_date_sk#23, ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ext_sales_price#13, ss_net_profit#15, sr_return_amt#14, sr_net_loss#16] - : : : : : +- *(6) BroadcastHashJoin [cast(ss_item_sk#19 as bigint), cast(ss_ticket_number#25 as bigint)], [sr_item_sk#26, sr_ticket_number#27], LeftOuter, BuildRight - : : : : : :- *(6) Project [ss_sold_date_sk#23, ss_item_sk#19, ss_store_sk#21, ss_promo_sk#17, ss_ticket_number#25, ss_ext_sales_price#13, ss_net_profit#15] - : : : : : : +- *(6) Filter (((isnotnull(ss_sold_date_sk#23) && isnotnull(ss_store_sk#21)) && isnotnull(ss_item_sk#19)) && isnotnull(ss_promo_sk#17)) - : : : : : : +- *(6) FileScan parquet default.store_sales[ss_sold_date_sk#23,ss_item_sk#19,ss_store_sk#21,ss_promo_sk#17,ss_ticket_number#25,ss_ext_sales_price#13,ss_net_profit#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)], ReadSchema: struct= 11192)) && (d_date#28 <= 11222)) && isnotnull(d_date_sk#24)) - : : : : +- *(2) FileScan parquet default.date_dim[d_date_sk#24,d_date#28] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), Is..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [s_store_sk#22, s_store_id#12] - : : : +- *(3) Filter isnotnull(s_store_sk#22) - : : : +- *(3) FileScan parquet default.store[s_store_sk#22,s_store_id#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [i_item_sk#20] - : : +- *(4) Filter ((isnotnull(i_current_price#29) && (i_current_price#29 > 50.00)) && isnotnull(i_item_sk#20)) - : : +- *(4) FileScan parquet default.item[i_item_sk#20,i_current_price#29] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(5) Project [p_promo_sk#18] - : +- *(5) Filter ((isnotnull(p_channel_tv#30) && (p_channel_tv#30 = N)) && isnotnull(p_promo_sk#18)) - : +- *(5) FileScan parquet default.promotion[p_promo_sk#18,p_channel_tv#30] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/promotion], PartitionFilters: [], PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)], ReadSchema: struct - :- *(14) HashAggregate(keys=[cp_catalog_page_id#31], functions=[sum(UnscaledValue(cs_ext_sales_price#32)), sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - : +- Exchange hashpartitioning(cp_catalog_page_id#31, 5) - : +- *(13) HashAggregate(keys=[cp_catalog_page_id#31], functions=[partial_sum(UnscaledValue(cs_ext_sales_price#32)), partial_sum(coalesce(cast(cr_return_amount#33 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#34 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#35 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - : +- *(13) Project [cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] - : +- *(13) BroadcastHashJoin [cs_promo_sk#36], [p_promo_sk#18], Inner, BuildRight - : :- *(13) Project [cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] - : : +- *(13) BroadcastHashJoin [cs_item_sk#37], [i_item_sk#20], Inner, BuildRight - : : :- *(13) Project [cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35, cp_catalog_page_id#31] - : : : +- *(13) BroadcastHashJoin [cs_catalog_page_sk#38], [cp_catalog_page_sk#39], Inner, BuildRight - : : : :- *(13) Project [cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35] - : : : : +- *(13) BroadcastHashJoin [cs_sold_date_sk#40], [d_date_sk#24], Inner, BuildRight - : : : : :- *(13) Project [cs_sold_date_sk#40, cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_ext_sales_price#32, cs_net_profit#34, cr_return_amount#33, cr_net_loss#35] - : : : : : +- *(13) BroadcastHashJoin [cs_item_sk#37, cs_order_number#41], [cr_item_sk#42, cr_order_number#43], LeftOuter, BuildRight - : : : : : :- *(13) Project [cs_sold_date_sk#40, cs_catalog_page_sk#38, cs_item_sk#37, cs_promo_sk#36, cs_order_number#41, cs_ext_sales_price#32, cs_net_profit#34] - : : : : : : +- *(13) Filter (((isnotnull(cs_sold_date_sk#40) && isnotnull(cs_catalog_page_sk#38)) && isnotnull(cs_item_sk#37)) && isnotnull(cs_promo_sk#36)) - : : : : : : +- *(13) FileScan parquet default.catalog_sales[cs_sold_date_sk#40,cs_catalog_page_sk#38,cs_item_sk#37,cs_promo_sk#36,cs_order_number#41,cs_ext_sales_price#32,cs_net_profit#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_p..., ReadSchema: struct - : : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(10) Project [cp_catalog_page_sk#39, cp_catalog_page_id#31] - : : : +- *(10) Filter isnotnull(cp_catalog_page_sk#39) - : : : +- *(10) FileScan parquet default.catalog_page[cp_catalog_page_sk#39,cp_catalog_page_id#31] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_page], PartitionFilters: [], PushedFilters: [IsNotNull(cp_catalog_page_sk)], ReadSchema: struct - : : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(21) HashAggregate(keys=[web_site_id#44], functions=[sum(UnscaledValue(ws_ext_sales_price#45)), sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - +- Exchange hashpartitioning(web_site_id#44, 5) - +- *(20) HashAggregate(keys=[web_site_id#44], functions=[partial_sum(UnscaledValue(ws_ext_sales_price#45)), partial_sum(coalesce(cast(wr_return_amt#46 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#47 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#48 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]) - +- *(20) Project [ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] - +- *(20) BroadcastHashJoin [ws_promo_sk#49], [p_promo_sk#18], Inner, BuildRight - :- *(20) Project [ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] - : +- *(20) BroadcastHashJoin [ws_item_sk#50], [i_item_sk#20], Inner, BuildRight - : :- *(20) Project [ws_item_sk#50, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48, web_site_id#44] - : : +- *(20) BroadcastHashJoin [ws_web_site_sk#51], [web_site_sk#52], Inner, BuildRight - : : :- *(20) Project [ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48] - : : : +- *(20) BroadcastHashJoin [ws_sold_date_sk#53], [d_date_sk#24], Inner, BuildRight - : : : :- *(20) Project [ws_sold_date_sk#53, ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_ext_sales_price#45, ws_net_profit#47, wr_return_amt#46, wr_net_loss#48] - : : : : +- *(20) BroadcastHashJoin [cast(ws_item_sk#50 as bigint), cast(ws_order_number#54 as bigint)], [wr_item_sk#55, wr_order_number#56], LeftOuter, BuildRight - : : : : :- *(20) Project [ws_sold_date_sk#53, ws_item_sk#50, ws_web_site_sk#51, ws_promo_sk#49, ws_order_number#54, ws_ext_sales_price#45, ws_net_profit#47] - : : : : : +- *(20) Filter (((isnotnull(ws_sold_date_sk#53) && isnotnull(ws_web_site_sk#51)) && isnotnull(ws_item_sk#50)) && isnotnull(ws_promo_sk#49)) - : : : : : +- *(20) FileScan parquet default.web_sales[ws_sold_date_sk#53,ws_item_sk#50,ws_web_site_sk#51,ws_promo_sk#49,ws_order_number#54,ws_ext_sales_price#45,ws_net_profit#47] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo..., ReadSchema: struct - : : : +- ReusedExchange [d_date_sk#24], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(17) Project [web_site_sk#52, web_site_id#44] - : : +- *(17) Filter isnotnull(web_site_sk#52) - : : +- *(17) FileScan parquet default.web_site[web_site_sk#52,web_site_id#44] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_site_sk)], ReadSchema: struct - : +- ReusedExchange [i_item_sk#20], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [p_promo_sk#18], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (99) ++- * HashAggregate (98) + +- Exchange (97) + +- * HashAggregate (96) + +- * Expand (95) + +- Union (94) + :- * HashAggregate (39) + : +- Exchange (38) + : +- * HashAggregate (37) + : +- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (22) + : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : :- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (9) + : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (8) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.store_sales (1) + : : : : : +- BroadcastExchange (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.store_returns (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * Filter (12) + : : : : +- * ColumnarToRow (11) + : : : : +- Scan parquet default.date_dim (10) + : : : +- BroadcastExchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.store (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * Filter (25) + : : +- * ColumnarToRow (24) + : : +- Scan parquet default.item (23) + : +- BroadcastExchange (34) + : +- * Project (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet default.promotion (30) + :- * HashAggregate (66) + : +- Exchange (65) + : +- * HashAggregate (64) + : +- * Project (63) + : +- * BroadcastHashJoin Inner BuildRight (62) + : :- * Project (60) + : : +- * BroadcastHashJoin Inner BuildRight (59) + : : :- * Project (57) + : : : +- * BroadcastHashJoin Inner BuildRight (56) + : : : :- * Project (51) + : : : : +- * BroadcastHashJoin Inner BuildRight (50) + : : : : :- * Project (48) + : : : : : +- * BroadcastHashJoin LeftOuter BuildRight (47) + : : : : : :- * Filter (42) + : : : : : : +- * ColumnarToRow (41) + : : : : : : +- Scan parquet default.catalog_sales (40) + : : : : : +- BroadcastExchange (46) + : : : : : +- * Filter (45) + : : : : : +- * ColumnarToRow (44) + : : : : : +- Scan parquet default.catalog_returns (43) + : : : : +- ReusedExchange (49) + : : : +- BroadcastExchange (55) + : : : +- * Filter (54) + : : : +- * ColumnarToRow (53) + : : : +- Scan parquet default.catalog_page (52) + : : +- ReusedExchange (58) + : +- ReusedExchange (61) + +- * HashAggregate (93) + +- Exchange (92) + +- * HashAggregate (91) + +- * Project (90) + +- * BroadcastHashJoin Inner BuildRight (89) + :- * Project (87) + : +- * BroadcastHashJoin Inner BuildRight (86) + : :- * Project (84) + : : +- * BroadcastHashJoin Inner BuildRight (83) + : : :- * Project (78) + : : : +- * BroadcastHashJoin Inner BuildRight (77) + : : : :- * Project (75) + : : : : +- * BroadcastHashJoin LeftOuter BuildRight (74) + : : : : :- * Filter (69) + : : : : : +- * ColumnarToRow (68) + : : : : : +- Scan parquet default.web_sales (67) + : : : : +- BroadcastExchange (73) + : : : : +- * Filter (72) + : : : : +- * ColumnarToRow (71) + : : : : +- Scan parquet default.web_returns (70) + : : : +- ReusedExchange (76) + : : +- BroadcastExchange (82) + : : +- * Filter (81) + : : +- * ColumnarToRow (80) + : : +- Scan parquet default.web_site (79) + : +- ReusedExchange (85) + +- ReusedExchange (88) + + +(1) Scan parquet default.store_sales +Output [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] + +(3) Filter [codegen id : 6] +Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7] +Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4)) + +(4) Scan parquet default.store_returns +Output [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(6) Filter [codegen id : 1] +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Condition : (isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#9)) + +(7) BroadcastExchange +Input [4]: [sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#12] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [2]: [cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)] +Right keys [2]: [sr_item_sk#8, sr_ticket_number#9] +Join condition: None + +(9) Project [codegen id : 6] +Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11] +Input [11]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7, sr_item_sk#8, sr_ticket_number#9, sr_return_amt#10, sr_net_loss#11] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#13, d_date#14] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 11192)) AND (d_date#14 <= 11222)) AND isnotnull(d_date_sk#13)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_date#14] + +(14) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(16) Project [codegen id : 6] +Output [7]: [ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, d_date_sk#13] + +(17) Scan parquet default.store +Output [2]: [s_store_sk#16, s_store_id#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#16, s_store_id#17] + +(19) Filter [codegen id : 3] +Input [2]: [s_store_sk#16, s_store_id#17] +Condition : isnotnull(s_store_sk#16) + +(20) BroadcastExchange +Input [2]: [s_store_sk#16, s_store_id#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#16] +Join condition: None + +(22) Project [codegen id : 6] +Output [7]: [ss_item_sk#2, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Input [9]: [ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_sk#16, s_store_id#17] + +(23) Scan parquet default.item +Output [2]: [i_item_sk#19, i_current_price#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [i_item_sk#19, i_current_price#20] + +(25) Filter [codegen id : 4] +Input [2]: [i_item_sk#19, i_current_price#20] +Condition : ((isnotnull(i_current_price#20) AND (i_current_price#20 > 50.00)) AND isnotnull(i_item_sk#19)) + +(26) Project [codegen id : 4] +Output [1]: [i_item_sk#19] +Input [2]: [i_item_sk#19, i_current_price#20] + +(27) BroadcastExchange +Input [1]: [i_item_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(29) Project [codegen id : 6] +Output [6]: [ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Input [8]: [ss_item_sk#2, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17, i_item_sk#19] + +(30) Scan parquet default.promotion +Output [2]: [p_promo_sk#22, p_channel_tv#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/promotion] +PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [2]: [p_promo_sk#22, p_channel_tv#23] + +(32) Filter [codegen id : 5] +Input [2]: [p_promo_sk#22, p_channel_tv#23] +Condition : ((isnotnull(p_channel_tv#23) AND (p_channel_tv#23 = N)) AND isnotnull(p_promo_sk#22)) + +(33) Project [codegen id : 5] +Output [1]: [p_promo_sk#22] +Input [2]: [p_promo_sk#22, p_channel_tv#23] + +(34) BroadcastExchange +Input [1]: [p_promo_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] + +(35) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ss_promo_sk#4] +Right keys [1]: [p_promo_sk#22] +Join condition: None + +(36) Project [codegen id : 6] +Output [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Input [7]: [ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17, p_promo_sk#22] + +(37) HashAggregate [codegen id : 6] +Input [5]: [ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#10, sr_net_loss#11, s_store_id#17] +Keys [1]: [s_store_id#17] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#25, sum#26, isEmpty#27, sum#28, isEmpty#29] +Results [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] + +(38) Exchange +Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] +Arguments: hashpartitioning(s_store_id#17, 5), true, [id=#35] + +(39) HashAggregate [codegen id : 7] +Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34] +Keys [1]: [s_store_id#17] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#36, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#37, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#38] +Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#36,17,2) AS sales#39, sum(coalesce(cast(sr_return_amt#10 as decimal(12,2)), 0.00))#37 AS returns#40, sum(CheckOverflow((promote_precision(cast(ss_net_profit#7 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#11 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#38 AS profit#41, store channel AS channel#42, concat(store, s_store_id#17) AS id#43] + +(40) Scan parquet default.catalog_sales +Output [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 13] +Input [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] + +(42) Filter [codegen id : 13] +Input [7]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50] +Condition : (((isnotnull(cs_sold_date_sk#44) AND isnotnull(cs_catalog_page_sk#45)) AND isnotnull(cs_item_sk#46)) AND isnotnull(cs_promo_sk#47)) + +(43) Scan parquet default.catalog_returns +Output [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 8] +Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] + +(45) Filter [codegen id : 8] +Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] +Condition : (isnotnull(cr_item_sk#51) AND isnotnull(cr_order_number#52)) + +(46) BroadcastExchange +Input [4]: [cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#55] + +(47) BroadcastHashJoin [codegen id : 13] +Left keys [2]: [cs_item_sk#46, cs_order_number#48] +Right keys [2]: [cr_item_sk#51, cr_order_number#52] +Join condition: None + +(48) Project [codegen id : 13] +Output [8]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54] +Input [11]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cr_item_sk#51, cr_order_number#52, cr_return_amount#53, cr_net_loss#54] + +(49) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#13] + +(50) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_sold_date_sk#44] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(51) Project [codegen id : 13] +Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54] +Input [9]: [cs_sold_date_sk#44, cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, d_date_sk#13] + +(52) Scan parquet default.catalog_page +Output [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_page] +PushedFilters: [IsNotNull(cp_catalog_page_sk)] +ReadSchema: struct + +(53) ColumnarToRow [codegen id : 10] +Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] + +(54) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] +Condition : isnotnull(cp_catalog_page_sk#56) + +(55) BroadcastExchange +Input [2]: [cp_catalog_page_sk#56, cp_catalog_page_id#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#58] + +(56) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_catalog_page_sk#45] +Right keys [1]: [cp_catalog_page_sk#56] +Join condition: None + +(57) Project [codegen id : 13] +Output [7]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Input [9]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_sk#56, cp_catalog_page_id#57] + +(58) ReusedExchange [Reuses operator id: 27] +Output [1]: [i_item_sk#19] + +(59) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_item_sk#46] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(60) Project [codegen id : 13] +Output [6]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Input [8]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57, i_item_sk#19] + +(61) ReusedExchange [Reuses operator id: 34] +Output [1]: [p_promo_sk#22] + +(62) BroadcastHashJoin [codegen id : 13] +Left keys [1]: [cs_promo_sk#47] +Right keys [1]: [p_promo_sk#22] +Join condition: None + +(63) Project [codegen id : 13] +Output [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Input [7]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57, p_promo_sk#22] + +(64) HashAggregate [codegen id : 13] +Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#53, cr_net_loss#54, cp_catalog_page_id#57] +Keys [1]: [cp_catalog_page_id#57] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#59, sum#60, isEmpty#61, sum#62, isEmpty#63] +Results [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] + +(65) Exchange +Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Arguments: hashpartitioning(cp_catalog_page_id#57, 5), true, [id=#69] + +(66) HashAggregate [codegen id : 14] +Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68] +Keys [1]: [cp_catalog_page_id#57] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#70, sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00))#71, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#72] +Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#70,17,2) AS sales#73, sum(coalesce(cast(cr_return_amount#53 as decimal(12,2)), 0.00))#71 AS returns#74, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#54 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#72 AS profit#75, catalog channel AS channel#76, concat(catalog_page, cp_catalog_page_id#57) AS id#77] + +(67) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] +ReadSchema: struct + +(68) ColumnarToRow [codegen id : 20] +Input [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] + +(69) Filter [codegen id : 20] +Input [7]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84] +Condition : (((isnotnull(ws_sold_date_sk#78) AND isnotnull(ws_web_site_sk#80)) AND isnotnull(ws_item_sk#79)) AND isnotnull(ws_promo_sk#81)) + +(70) Scan parquet default.web_returns +Output [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] +ReadSchema: struct + +(71) ColumnarToRow [codegen id : 15] +Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] + +(72) Filter [codegen id : 15] +Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] +Condition : (isnotnull(wr_item_sk#85) AND isnotnull(wr_order_number#86)) + +(73) BroadcastExchange +Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#89] + +(74) BroadcastHashJoin [codegen id : 20] +Left keys [2]: [cast(ws_item_sk#79 as bigint), cast(ws_order_number#82 as bigint)] +Right keys [2]: [wr_item_sk#85, wr_order_number#86] +Join condition: None + +(75) Project [codegen id : 20] +Output [8]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88] +Input [11]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_order_number#82, ws_ext_sales_price#83, ws_net_profit#84, wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88] + +(76) ReusedExchange [Reuses operator id: 14] +Output [1]: [d_date_sk#13] + +(77) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_sold_date_sk#78] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(78) Project [codegen id : 20] +Output [7]: [ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88] +Input [9]: [ws_sold_date_sk#78, ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, d_date_sk#13] + +(79) Scan parquet default.web_site +Output [2]: [web_site_sk#90, web_site_id#91] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_site_sk)] +ReadSchema: struct + +(80) ColumnarToRow [codegen id : 17] +Input [2]: [web_site_sk#90, web_site_id#91] + +(81) Filter [codegen id : 17] +Input [2]: [web_site_sk#90, web_site_id#91] +Condition : isnotnull(web_site_sk#90) + +(82) BroadcastExchange +Input [2]: [web_site_sk#90, web_site_id#91] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#92] + +(83) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_web_site_sk#80] +Right keys [1]: [web_site_sk#90] +Join condition: None + +(84) Project [codegen id : 20] +Output [7]: [ws_item_sk#79, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Input [9]: [ws_item_sk#79, ws_web_site_sk#80, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_sk#90, web_site_id#91] + +(85) ReusedExchange [Reuses operator id: 27] +Output [1]: [i_item_sk#19] + +(86) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_item_sk#79] +Right keys [1]: [i_item_sk#19] +Join condition: None + +(87) Project [codegen id : 20] +Output [6]: [ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Input [8]: [ws_item_sk#79, ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91, i_item_sk#19] + +(88) ReusedExchange [Reuses operator id: 34] +Output [1]: [p_promo_sk#22] + +(89) BroadcastHashJoin [codegen id : 20] +Left keys [1]: [ws_promo_sk#81] +Right keys [1]: [p_promo_sk#22] +Join condition: None + +(90) Project [codegen id : 20] +Output [5]: [ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Input [7]: [ws_promo_sk#81, ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91, p_promo_sk#22] + +(91) HashAggregate [codegen id : 20] +Input [5]: [ws_ext_sales_price#83, ws_net_profit#84, wr_return_amt#87, wr_net_loss#88, web_site_id#91] +Keys [1]: [web_site_id#91] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#83)), partial_sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [5]: [sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] +Results [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] + +(92) Exchange +Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Arguments: hashpartitioning(web_site_id#91, 5), true, [id=#103] + +(93) HashAggregate [codegen id : 21] +Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Keys [1]: [web_site_id#91] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#83)), sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#83))#104, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#105, sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#106] +Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#83))#104,17,2) AS sales#107, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#105 AS returns#108, sum(CheckOverflow((promote_precision(cast(ws_net_profit#84 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true))#106 AS profit#109, web channel AS channel#110, concat(web_site, web_site_id#91) AS id#111] + +(94) Union + +(95) Expand [codegen id : 22] +Input [5]: [sales#39, returns#40, profit#41, channel#42, id#43] +Arguments: [List(sales#39, returns#40, profit#41, channel#42, id#43, 0), List(sales#39, returns#40, profit#41, channel#42, null, 1), List(sales#39, returns#40, profit#41, null, null, 3)], [sales#39, returns#40, profit#41, channel#112, id#113, spark_grouping_id#114] + +(96) HashAggregate [codegen id : 22] +Input [6]: [sales#39, returns#40, profit#41, channel#112, id#113, spark_grouping_id#114] +Keys [3]: [channel#112, id#113, spark_grouping_id#114] +Functions [3]: [partial_sum(sales#39), partial_sum(returns#40), partial_sum(profit#41)] +Aggregate Attributes [6]: [sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Results [9]: [channel#112, id#113, spark_grouping_id#114, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] + +(97) Exchange +Input [9]: [channel#112, id#113, spark_grouping_id#114, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Arguments: hashpartitioning(channel#112, id#113, spark_grouping_id#114, 5), true, [id=#127] + +(98) HashAggregate [codegen id : 23] +Input [9]: [channel#112, id#113, spark_grouping_id#114, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Keys [3]: [channel#112, id#113, spark_grouping_id#114] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#128, sum(returns#40)#129, sum(profit#41)#130] +Results [5]: [channel#112, id#113, sum(sales#39)#128 AS sales#131, sum(returns#40)#129 AS returns#132, sum(profit#41)#130 AS profit#133] + +(99) TakeOrderedAndProject +Input [5]: [channel#112, id#113, sales#131, returns#132, profit#133] +Arguments: 100, [channel#112 ASC NULLS FIRST, id#113 ASC NULLS FIRST], [channel#112, id#113, sales#131, returns#132, profit#133] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt index 28b1a009b..37f46f14e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q80/simplified.txt @@ -1,133 +1,148 @@ -TakeOrderedAndProject [channel,id,profit,returns,sales] - WholeStageCodegen - HashAggregate [channel,id,spark_grouping_id,sum,sum,sum,sum(profit),sum(returns),sum(sales)] [profit,returns,sales,sum,sum,sum,sum(profit),sum(returns),sum(sales)] +TakeOrderedAndProject [channel,id,sales,returns,profit] + WholeStageCodegen (23) + HashAggregate [channel,id,spark_grouping_id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty] InputAdapter Exchange [channel,id,spark_grouping_id] #1 - WholeStageCodegen - HashAggregate [channel,id,profit,returns,sales,spark_grouping_id,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Expand [channel,id,profit,returns,sales] + WholeStageCodegen (22) + HashAggregate [channel,id,spark_grouping_id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] + Expand [sales,returns,profit,channel,id] InputAdapter Union - WholeStageCodegen - HashAggregate [s_store_id,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00))] + WholeStageCodegen (7) + HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] InputAdapter Exchange [s_store_id] #2 - WholeStageCodegen - HashAggregate [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit] - BroadcastHashJoin [p_promo_sk,ss_promo_sk] - Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_net_profit,ss_promo_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [s_store_id,sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [sr_net_loss,sr_return_amt,ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Project [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] - Filter [ss_item_sk,ss_promo_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] [ss_ext_sales_price,ss_item_sk,ss_net_profit,ss_promo_sk,ss_sold_date_sk,ss_store_sk,ss_ticket_number] + WholeStageCodegen (6) + HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_promo_sk,p_promo_sk] + Project [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] - Filter [sr_item_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] [sr_item_sk,sr_net_loss,sr_return_amt,sr_ticket_number] + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [s_store_id,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_store_id,s_store_sk] [s_store_id,s_store_sk] + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_id] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (4) Project [i_item_sk] Filter [i_current_price,i_item_sk] - Scan parquet default.item [i_current_price,i_item_sk] [i_current_price,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_current_price] InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (5) Project [p_promo_sk] Filter [p_channel_tv,p_promo_sk] - Scan parquet default.promotion [p_channel_tv,p_promo_sk] [p_channel_tv,p_promo_sk] - WholeStageCodegen - HashAggregate [cp_catalog_page_id,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00))] + ColumnarToRow + InputAdapter + Scan parquet default.promotion [p_promo_sk,p_channel_tv] + WholeStageCodegen (14) + HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] InputAdapter Exchange [cp_catalog_page_id] #8 - WholeStageCodegen - HashAggregate [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit] + WholeStageCodegen (13) + HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] BroadcastHashJoin [cs_promo_sk,p_promo_sk] - Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_net_profit,cs_promo_sk] + Project [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] BroadcastHashJoin [cs_item_sk,i_item_sk] - Project [cp_catalog_page_id,cr_net_loss,cr_return_amount,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] - BroadcastHashJoin [cp_catalog_page_sk,cs_catalog_page_sk] - Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk] + Project [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id] + BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk] + Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cr_net_loss,cr_return_amount,cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_promo_sk,cs_sold_date_sk] - BroadcastHashJoin [cr_item_sk,cr_order_number,cs_item_sk,cs_order_number] - Project [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] - Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] [cs_catalog_page_sk,cs_ext_sales_price,cs_item_sk,cs_net_profit,cs_order_number,cs_promo_sk,cs_sold_date_sk] + Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss] + BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number] + Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit] InputAdapter BroadcastExchange #9 - WholeStageCodegen - Project [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] - Filter [cr_item_sk,cr_order_number] - Scan parquet default.catalog_returns [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] [cr_item_sk,cr_net_loss,cr_order_number,cr_return_amount] + WholeStageCodegen (8) + Filter [cr_item_sk,cr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 + ReusedExchange [d_date_sk] #4 InputAdapter BroadcastExchange #10 - WholeStageCodegen - Project [cp_catalog_page_id,cp_catalog_page_sk] - Filter [cp_catalog_page_sk] - Scan parquet default.catalog_page [cp_catalog_page_id,cp_catalog_page_sk] [cp_catalog_page_id,cp_catalog_page_sk] + WholeStageCodegen (10) + Filter [cp_catalog_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id] InputAdapter - ReusedExchange [i_item_sk] [i_item_sk] #6 + ReusedExchange [i_item_sk] #6 InputAdapter - ReusedExchange [p_promo_sk] [p_promo_sk] #7 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),web_site_id] [channel,id,profit,returns,sales,sum,sum,sum,sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00))] + ReusedExchange [p_promo_sk] #7 + WholeStageCodegen (21) + HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty] InputAdapter Exchange [web_site_id] #11 - WholeStageCodegen - HashAggregate [sum,sum,sum,sum,sum,sum,web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum,sum,sum] - Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit] - BroadcastHashJoin [p_promo_sk,ws_promo_sk] - Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_net_profit,ws_promo_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [web_site_id,wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk] - BroadcastHashJoin [web_site_sk,ws_web_site_sk] - Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_web_site_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [wr_net_loss,wr_return_amt,ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] - Filter [ws_item_sk,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] [ws_ext_sales_price,ws_item_sk,ws_net_profit,ws_order_number,ws_promo_sk,ws_sold_date_sk,ws_web_site_sk] + WholeStageCodegen (20) + HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty] + Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_promo_sk,p_promo_sk] + Project [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Project [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss] + BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit] InputAdapter BroadcastExchange #12 - WholeStageCodegen - Project [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] - Filter [wr_item_sk,wr_order_number] - Scan parquet default.web_returns [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] [wr_item_sk,wr_net_loss,wr_order_number,wr_return_amt] + WholeStageCodegen (15) + Filter [wr_item_sk,wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 + ReusedExchange [d_date_sk] #4 InputAdapter BroadcastExchange #13 - WholeStageCodegen - Project [web_site_id,web_site_sk] - Filter [web_site_sk] - Scan parquet default.web_site [web_site_id,web_site_sk] [web_site_id,web_site_sk] + WholeStageCodegen (17) + Filter [web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_site_id] InputAdapter - ReusedExchange [i_item_sk] [i_item_sk] #6 + ReusedExchange [i_item_sk] #6 InputAdapter - ReusedExchange [p_promo_sk] [p_promo_sk] #7 + ReusedExchange [p_promo_sk] #7 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt index 9951d8388..2530fe9e8 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/explain.txt @@ -1,52 +1,298 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST,c_salutation#2 ASC NULLS FIRST,c_first_name#3 ASC NULLS FIRST,c_last_name#4 ASC NULLS FIRST,ca_street_number#5 ASC NULLS FIRST,ca_street_name#6 ASC NULLS FIRST,ca_street_type#7 ASC NULLS FIRST,ca_suite_number#8 ASC NULLS FIRST,ca_city#9 ASC NULLS FIRST,ca_county#10 ASC NULLS FIRST,ca_state#11 ASC NULLS FIRST,ca_zip#12 ASC NULLS FIRST,ca_country#13 ASC NULLS FIRST,ca_gmt_offset#14 ASC NULLS FIRST,ca_location_type#15 ASC NULLS FIRST,ctr_total_return#16 ASC NULLS FIRST], output=[c_customer_id#1,c_salutation#2,c_first_name#3,c_last_name#4,ca_street_number#5,ca_street_name#6,ca_street_type#7,ca_suite_number#8,ca_city#9,ca_county#10,ca_state#11,ca_zip#12,ca_country#13,ca_gmt_offset#14,ca_location_type#15,ctr_total_return#16]) -+- *(11) Project [c_customer_id#1, c_salutation#2, c_first_name#3, c_last_name#4, ca_street_number#5, ca_street_name#6, ca_street_type#7, ca_suite_number#8, ca_city#9, ca_county#10, ca_state#11, ca_zip#12, ca_country#13, ca_gmt_offset#14, ca_location_type#15, ctr_total_return#16] - +- *(11) BroadcastHashJoin [c_current_addr_sk#17], [ca_address_sk#18], Inner, BuildRight - :- *(11) Project [ctr_total_return#16, c_customer_id#1, c_current_addr_sk#17, c_salutation#2, c_first_name#3, c_last_name#4] - : +- *(11) BroadcastHashJoin [ctr_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - : :- *(11) Project [ctr_customer_sk#19, ctr_total_return#16] - : : +- *(11) BroadcastHashJoin [ctr_state#21], [ctr_state#21#22], Inner, BuildRight, (cast(ctr_total_return#16 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) - : : :- *(11) Filter isnotnull(ctr_total_return#16) - : : : +- *(11) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) - : : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 5) - : : : +- *(3) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) - : : : +- *(3) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] - : : : +- *(3) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight - : : : :- *(3) Project [cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] - : : : : +- *(3) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight - : : : : :- *(3) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] - : : : : : +- *(3) Filter ((isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) && isnotnull(cr_returning_customer_sk#24)) - : : : : : +- *(3) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer..., ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [ca_address_sk#18, ca_state#11] - : : : +- *(2) Filter (isnotnull(ca_address_sk#18) && isnotnull(ca_state#11)) - : : : +- *(2) FileScan parquet default.customer_address[ca_address_sk#18,ca_state#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[1, string, true])) - : : +- *(8) Filter isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#23) - : : +- *(8) HashAggregate(keys=[ctr_state#21], functions=[avg(ctr_total_return#16)]) - : : +- Exchange hashpartitioning(ctr_state#21, 5) - : : +- *(7) HashAggregate(keys=[ctr_state#21], functions=[partial_avg(ctr_total_return#16)]) - : : +- *(7) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[sum(UnscaledValue(cr_return_amt_inc_tax#25))]) - : : +- Exchange hashpartitioning(cr_returning_customer_sk#24, ca_state#11, 5) - : : +- *(6) HashAggregate(keys=[cr_returning_customer_sk#24, ca_state#11], functions=[partial_sum(UnscaledValue(cr_return_amt_inc_tax#25))]) - : : +- *(6) Project [cr_returning_customer_sk#24, cr_return_amt_inc_tax#25, ca_state#11] - : : +- *(6) BroadcastHashJoin [cr_returning_addr_sk#26], [ca_address_sk#18], Inner, BuildRight - : : :- *(6) Project [cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] - : : : +- *(6) BroadcastHashJoin [cr_returned_date_sk#27], [d_date_sk#28], Inner, BuildRight - : : : :- *(6) Project [cr_returned_date_sk#27, cr_returning_customer_sk#24, cr_returning_addr_sk#26, cr_return_amt_inc_tax#25] - : : : : +- *(6) Filter (isnotnull(cr_returned_date_sk#27) && isnotnull(cr_returning_addr_sk#26)) - : : : : +- *(6) FileScan parquet default.catalog_returns[cr_returned_date_sk#27,cr_returning_customer_sk#24,cr_returning_addr_sk#26,cr_return_amt_inc_tax#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)], ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] + +(3) Filter [codegen id : 3] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Condition : ((isnotnull(cr_returned_date_sk#1) AND isnotnull(cr_returning_addr_sk#3)) AND isnotnull(cr_returning_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#5, d_year#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#5, d_year#6] +Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#5] +Input [2]: [d_date_sk#5, d_year#6] + +(8) BroadcastExchange +Input [1]: [d_date_sk#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cr_returned_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(10) Project [codegen id : 3] +Output [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, d_date_sk#5] + +(11) Scan parquet default.customer_address +Output [2]: [ca_address_sk#8, ca_state#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_state#9] + +(13) Filter [codegen id : 2] +Input [2]: [ca_address_sk#8, ca_state#9] +Condition : (isnotnull(ca_address_sk#8) AND isnotnull(ca_state#9)) + +(14) BroadcastExchange +Input [2]: [ca_address_sk#8, ca_state#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [cr_returning_addr_sk#3] +Right keys [1]: [ca_address_sk#8] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] +Input [5]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, ca_address_sk#8, ca_state#9] + +(17) HashAggregate [codegen id : 3] +Input [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] +Keys [2]: [cr_returning_customer_sk#2, ca_state#9] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum#11] +Results [3]: [cr_returning_customer_sk#2, ca_state#9, sum#12] + +(18) Exchange +Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#12] +Arguments: hashpartitioning(cr_returning_customer_sk#2, ca_state#9, 5), true, [id=#13] + +(19) HashAggregate [codegen id : 11] +Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#12] +Keys [2]: [cr_returning_customer_sk#2, ca_state#9] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))#14] +Results [3]: [cr_returning_customer_sk#2 AS ctr_customer_sk#15, ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#4))#14,17,2) AS ctr_total_return#17] + +(20) Filter [codegen id : 11] +Input [3]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17] +Condition : isnotnull(ctr_total_return#17) + +(21) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 6] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] + +(23) Filter [codegen id : 6] +Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Condition : (isnotnull(cr_returned_date_sk#1) AND isnotnull(cr_returning_addr_sk#3)) + +(24) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#5] + +(25) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cr_returned_date_sk#1] +Right keys [1]: [d_date_sk#5] +Join condition: None + +(26) Project [codegen id : 6] +Output [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4] +Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, d_date_sk#5] + +(27) ReusedExchange [Reuses operator id: 14] +Output [2]: [ca_address_sk#8, ca_state#9] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cr_returning_addr_sk#3] +Right keys [1]: [ca_address_sk#8] +Join condition: None + +(29) Project [codegen id : 6] +Output [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] +Input [5]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, ca_address_sk#8, ca_state#9] + +(30) HashAggregate [codegen id : 6] +Input [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#9] +Keys [2]: [cr_returning_customer_sk#2, ca_state#9] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [cr_returning_customer_sk#2, ca_state#9, sum#19] + +(31) Exchange +Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#19] +Arguments: hashpartitioning(cr_returning_customer_sk#2, ca_state#9, 5), true, [id=#20] + +(32) HashAggregate [codegen id : 7] +Input [3]: [cr_returning_customer_sk#2, ca_state#9, sum#19] +Keys [2]: [cr_returning_customer_sk#2, ca_state#9] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))#21] +Results [2]: [ca_state#9 AS ctr_state#16, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#4))#21,17,2) AS ctr_total_return#17] + +(33) HashAggregate [codegen id : 7] +Input [2]: [ctr_state#16, ctr_total_return#17] +Keys [1]: [ctr_state#16] +Functions [1]: [partial_avg(ctr_total_return#17)] +Aggregate Attributes [2]: [sum#22, count#23] +Results [3]: [ctr_state#16, sum#24, count#25] + +(34) Exchange +Input [3]: [ctr_state#16, sum#24, count#25] +Arguments: hashpartitioning(ctr_state#16, 5), true, [id=#26] + +(35) HashAggregate [codegen id : 8] +Input [3]: [ctr_state#16, sum#24, count#25] +Keys [1]: [ctr_state#16] +Functions [1]: [avg(ctr_total_return#17)] +Aggregate Attributes [1]: [avg(ctr_total_return#17)#27] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#17)#27) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16 AS ctr_state#16#29] + +(36) Filter [codegen id : 8] +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] +Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) + +(37) BroadcastExchange +Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#30] + +(38) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_state#16] +Right keys [1]: [ctr_state#16#29] +Join condition: (cast(ctr_total_return#17 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28) + +(39) Project [codegen id : 11] +Output [2]: [ctr_customer_sk#15, ctr_total_return#17] +Input [5]: [ctr_customer_sk#15, ctr_state#16, ctr_total_return#17, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#28, ctr_state#16#29] + +(40) Scan parquet default.customer +Output [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 9] +Input [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] + +(42) Filter [codegen id : 9] +Input [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] +Condition : (isnotnull(c_customer_sk#31) AND isnotnull(c_current_addr_sk#33)) + +(43) BroadcastExchange +Input [6]: [c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ctr_customer_sk#15] +Right keys [1]: [c_customer_sk#31] +Join condition: None + +(45) Project [codegen id : 11] +Output [6]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] +Input [8]: [ctr_customer_sk#15, ctr_total_return#17, c_customer_sk#31, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36] + +(46) Scan parquet default.customer_address +Output [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 10] +Input [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] + +(48) Filter [codegen id : 10] +Input [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] +Condition : ((isnotnull(ca_state#9) AND (ca_state#9 = GA)) AND isnotnull(ca_address_sk#8)) + +(49) BroadcastExchange +Input [12]: [ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] + +(50) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [c_current_addr_sk#33] +Right keys [1]: [ca_address_sk#8] +Join condition: None + +(51) Project [codegen id : 11] +Output [16]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#17] +Input [18]: [ctr_total_return#17, c_customer_id#32, c_current_addr_sk#33, c_salutation#34, c_first_name#35, c_last_name#36, ca_address_sk#8, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] + +(52) TakeOrderedAndProject +Input [16]: [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#17] +Arguments: 100, [c_customer_id#32 ASC NULLS FIRST, c_salutation#34 ASC NULLS FIRST, c_first_name#35 ASC NULLS FIRST, c_last_name#36 ASC NULLS FIRST, ca_street_number#38 ASC NULLS FIRST, ca_street_name#39 ASC NULLS FIRST, ca_street_type#40 ASC NULLS FIRST, ca_suite_number#41 ASC NULLS FIRST, ca_city#42 ASC NULLS FIRST, ca_county#43 ASC NULLS FIRST, ca_state#9 ASC NULLS FIRST, ca_zip#44 ASC NULLS FIRST, ca_country#45 ASC NULLS FIRST, ca_gmt_offset#46 ASC NULLS FIRST, ca_location_type#47 ASC NULLS FIRST, ctr_total_return#17 ASC NULLS FIRST], [c_customer_id#32, c_salutation#34, c_first_name#35, c_last_name#36, ca_street_number#38, ca_street_name#39, ca_street_type#40, ca_suite_number#41, ca_city#42, ca_county#43, ca_state#9, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#17] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/simplified.txt index f42772616..2b7c6041b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q81/simplified.txt @@ -1,70 +1,77 @@ -TakeOrderedAndProject [c_customer_id,c_first_name,c_last_name,c_salutation,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip,ctr_total_return] - WholeStageCodegen - Project [c_customer_id,c_first_name,c_last_name,c_salutation,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip,ctr_total_return] +TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] + WholeStageCodegen (11) + Project [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_customer_id,c_first_name,c_last_name,c_salutation,ctr_total_return] - BroadcastHashJoin [c_customer_sk,ctr_customer_sk] + Project [ctr_total_return,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] + BroadcastHashJoin [ctr_customer_sk,c_customer_sk] Project [ctr_customer_sk,ctr_total_return] - BroadcastHashJoin [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,ctr_state,ctr_total_return] + BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] Filter [ctr_total_return] - HashAggregate [ca_state,cr_returning_customer_sk,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] [ctr_customer_sk,ctr_state,ctr_total_return,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] + HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_customer_sk,ctr_state,ctr_total_return,sum] InputAdapter - Exchange [ca_state,cr_returning_customer_sk] #1 - WholeStageCodegen - HashAggregate [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk,sum,sum] [sum,sum] - Project [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] - BroadcastHashJoin [ca_address_sk,cr_returning_addr_sk] - Project [cr_return_amt_inc_tax,cr_returning_addr_sk,cr_returning_customer_sk] + Exchange [cr_returning_customer_sk,ca_state] #1 + WholeStageCodegen (3) + HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum] + Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + BroadcastHashJoin [cr_returning_addr_sk,ca_address_sk] + Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] - Filter [cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] - Scan parquet default.catalog_returns [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] + Filter [cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [ca_address_sk,ca_state] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + WholeStageCodegen (2) + Filter [ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (8) Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))] - HashAggregate [avg(ctr_total_return),count,ctr_state,sum] [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),avg(ctr_total_return),count,ctr_state,sum] + HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,sum,count] InputAdapter Exchange [ctr_state] #5 - WholeStageCodegen - HashAggregate [count,count,ctr_state,ctr_total_return,sum,sum] [count,count,sum,sum] - HashAggregate [ca_state,cr_returning_customer_sk,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] [ctr_state,ctr_total_return,sum,sum(UnscaledValue(cr_return_amt_inc_tax))] + WholeStageCodegen (7) + HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count] + HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_total_return,sum] InputAdapter - Exchange [ca_state,cr_returning_customer_sk] #6 - WholeStageCodegen - HashAggregate [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk,sum,sum] [sum,sum] - Project [ca_state,cr_return_amt_inc_tax,cr_returning_customer_sk] - BroadcastHashJoin [ca_address_sk,cr_returning_addr_sk] - Project [cr_return_amt_inc_tax,cr_returning_addr_sk,cr_returning_customer_sk] + Exchange [cr_returning_customer_sk,ca_state] #6 + WholeStageCodegen (6) + HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum] + Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state] + BroadcastHashJoin [cr_returning_addr_sk,ca_address_sk] + Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] - Filter [cr_returned_date_sk,cr_returning_addr_sk] - Scan parquet default.catalog_returns [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] [cr_return_amt_inc_tax,cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk] + Filter [cr_returned_date_sk,cr_returning_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #2 + ReusedExchange [d_date_sk] #2 InputAdapter - ReusedExchange [ca_address_sk,ca_state] [ca_address_sk,ca_state] #3 + ReusedExchange [ca_address_sk,ca_state] #3 InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [c_current_addr_sk,c_customer_id,c_customer_sk,c_first_name,c_last_name,c_salutation] - Filter [c_current_addr_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_customer_id,c_customer_sk,c_first_name,c_last_name,c_salutation] [c_current_addr_sk,c_customer_id,c_customer_sk,c_first_name,c_last_name,c_salutation] + WholeStageCodegen (9) + Filter [c_customer_sk,c_current_addr_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name] InputAdapter BroadcastExchange #8 - WholeStageCodegen - Project [ca_address_sk,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip] [ca_address_sk,ca_city,ca_country,ca_county,ca_gmt_offset,ca_location_type,ca_state,ca_street_name,ca_street_number,ca_street_type,ca_suite_number,ca_zip] + WholeStageCodegen (10) + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/explain.txt index 4ac2b54c5..c7240792c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/explain.txt @@ -1,26 +1,160 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[i_item_id#1 ASC NULLS FIRST], output=[i_item_id#1,i_item_desc#2,i_current_price#3]) -+- *(5) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) - +- Exchange hashpartitioning(i_item_id#1, i_item_desc#2, i_current_price#3, 5) - +- *(4) HashAggregate(keys=[i_item_id#1, i_item_desc#2, i_current_price#3], functions=[]) - +- *(4) Project [i_item_id#1, i_item_desc#2, i_current_price#3] - +- *(4) BroadcastHashJoin [i_item_sk#4], [ss_item_sk#5], Inner, BuildRight - :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] - : +- *(4) BroadcastHashJoin [inv_date_sk#6], [d_date_sk#7], Inner, BuildRight - : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3, inv_date_sk#6] - : : +- *(4) BroadcastHashJoin [i_item_sk#4], [inv_item_sk#8], Inner, BuildRight - : : :- *(4) Project [i_item_sk#4, i_item_id#1, i_item_desc#2, i_current_price#3] - : : : +- *(4) Filter ((((isnotnull(i_current_price#3) && (i_current_price#3 >= 62.00)) && (cast(i_current_price#3 as decimal(12,2)) <= 92.00)) && i_manufact_id#9 IN (129,270,821,423)) && isnotnull(i_item_sk#4)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#4,i_item_id#1,i_item_desc#2,i_current_price#3,i_manufact_id#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), In(i_manufact_id, [129,27..., ReadSchema: struct= 100)) && (inv_quantity_on_hand#10 <= 500)) && isnotnull(inv_item_sk#8)) && isnotnull(inv_date_sk#6)) - : : +- *(1) FileScan parquet default.inventory[inv_date_sk#6,inv_item_sk#8,inv_quantity_on_hand#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/inventory], PartitionFilters: [], PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(i..., ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#7] - : +- *(2) Filter (((isnotnull(d_date#11) && (d_date#11 >= 11102)) && (d_date#11 <= 11162)) && isnotnull(d_date_sk#7)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#7,d_date#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), Is..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [ss_item_sk#5] - +- *(3) Filter isnotnull(ss_item_sk#5) - +- *(3) FileScan parquet default.store_sales[ss_item_sk#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * BroadcastHashJoin Inner BuildRight (23) + :- * Project (18) + : +- * BroadcastHashJoin Inner BuildRight (17) + : :- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (9) + : : +- * Project (8) + : : +- * Filter (7) + : : +- * ColumnarToRow (6) + : : +- Scan parquet default.inventory (5) + : +- BroadcastExchange (16) + : +- * Project (15) + : +- * Filter (14) + : +- * ColumnarToRow (13) + : +- Scan parquet default.date_dim (12) + +- BroadcastExchange (22) + +- * Filter (21) + +- * ColumnarToRow (20) + +- Scan parquet default.store_sales (19) + + +(1) Scan parquet default.item +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,62.00), In(i_manufact_id, [129,270,821,423]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(3) Filter [codegen id : 4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] +Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 62.00)) AND (cast(i_current_price#4 as decimal(12,2)) <= 92.00)) AND i_manufact_id#5 IN (129,270,821,423)) AND isnotnull(i_item_sk#1)) + +(4) Project [codegen id : 4] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5] + +(5) Scan parquet default.inventory +Output [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/inventory] +PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk), IsNotNull(inv_date_sk)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] + +(7) Filter [codegen id : 1] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] +Condition : ((((isnotnull(inv_quantity_on_hand#8) AND (inv_quantity_on_hand#8 >= 100)) AND (inv_quantity_on_hand#8 <= 500)) AND isnotnull(inv_item_sk#7)) AND isnotnull(inv_date_sk#6)) + +(8) Project [codegen id : 1] +Output [2]: [inv_date_sk#6, inv_item_sk#7] +Input [3]: [inv_date_sk#6, inv_item_sk#7, inv_quantity_on_hand#8] + +(9) BroadcastExchange +Input [2]: [inv_date_sk#6, inv_item_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#9] + +(10) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [inv_item_sk#7] +Join condition: None + +(11) Project [codegen id : 4] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, inv_item_sk#7] + +(12) Scan parquet default.date_dim +Output [2]: [d_date_sk#10, d_date#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] + +(14) Filter [codegen id : 2] +Input [2]: [d_date_sk#10, d_date#11] +Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 11102)) AND (d_date#11 <= 11162)) AND isnotnull(d_date_sk#10)) + +(15) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#11] + +(16) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(17) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [inv_date_sk#6] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(18) Project [codegen id : 4] +Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#6, d_date_sk#10] + +(19) Scan parquet default.store_sales +Output [1]: [ss_item_sk#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 3] +Input [1]: [ss_item_sk#13] + +(21) Filter [codegen id : 3] +Input [1]: [ss_item_sk#13] +Condition : isnotnull(ss_item_sk#13) + +(22) BroadcastExchange +Input [1]: [ss_item_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#13] +Join condition: None + +(24) Project [codegen id : 4] +Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#13] + +(25) HashAggregate [codegen id : 4] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(26) Exchange +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), true, [id=#15] + +(27) HashAggregate [codegen id : 5] +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Keys [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] + +(28) TakeOrderedAndProject +Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] +Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_current_price#4] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt index e0e54c6c0..92f8729ca 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q82/simplified.txt @@ -1,34 +1,41 @@ -TakeOrderedAndProject [i_current_price,i_item_desc,i_item_id] - WholeStageCodegen - HashAggregate [i_current_price,i_item_desc,i_item_id] +TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price] + WholeStageCodegen (5) + HashAggregate [i_item_id,i_item_desc,i_current_price] InputAdapter - Exchange [i_current_price,i_item_desc,i_item_id] #1 - WholeStageCodegen - HashAggregate [i_current_price,i_item_desc,i_item_id] - Project [i_current_price,i_item_desc,i_item_id] + Exchange [i_item_id,i_item_desc,i_current_price] #1 + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_current_price] + Project [i_item_id,i_item_desc,i_current_price] BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk] - BroadcastHashJoin [d_date_sk,inv_date_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk,inv_date_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + BroadcastHashJoin [inv_date_sk,d_date_sk] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price,inv_date_sk] BroadcastHashJoin [i_item_sk,inv_item_sk] - Project [i_current_price,i_item_desc,i_item_id,i_item_sk] - Filter [i_current_price,i_item_sk,i_manufact_id] - Scan parquet default.item [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] [i_current_price,i_item_desc,i_item_id,i_item_sk,i_manufact_id] + Project [i_item_sk,i_item_id,i_item_desc,i_current_price] + Filter [i_current_price,i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [inv_date_sk,inv_item_sk] - Filter [inv_date_sk,inv_item_sk,inv_quantity_on_hand] - Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] [inv_date_sk,inv_item_sk,inv_quantity_on_hand] + Filter [inv_quantity_on_hand,inv_item_sk,inv_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_quantity_on_hand] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [ss_item_sk] - Filter [ss_item_sk] - Scan parquet default.store_sales [ss_item_sk] [ss_item_sk] + WholeStageCodegen (3) + Filter [ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt index 06ae0bc8a..0d44a0146 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/explain.txt @@ -1,69 +1,344 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[item_id#1 ASC NULLS FIRST,sr_item_qty#2 ASC NULLS FIRST], output=[item_id#1,sr_item_qty#2,sr_dev#3,cr_item_qty#4,cr_dev#5,wr_item_qty#6,wr_dev#7,average#8]) -+- *(18) Project [item_id#1, sr_item_qty#2, (((cast(sr_item_qty#2 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS sr_dev#3, cr_item_qty#4, (((cast(cr_item_qty#4 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS cr_dev#5, wr_item_qty#6, (((cast(wr_item_qty#6 as double) / cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as double)) / 3.0) * 100.0) AS wr_dev#7, CheckOverflow((promote_precision(cast(cast(((sr_item_qty#2 + cr_item_qty#4) + wr_item_qty#6) as decimal(20,0)) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#8] - +- *(18) BroadcastHashJoin [item_id#1], [item_id#9], Inner, BuildRight - :- *(18) Project [item_id#1, sr_item_qty#2, cr_item_qty#4] - : +- *(18) BroadcastHashJoin [item_id#1], [item_id#10], Inner, BuildRight - : :- *(18) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(sr_return_quantity#12 as bigint))]) - : : +- Exchange hashpartitioning(i_item_id#11, 5) - : : +- *(5) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(sr_return_quantity#12 as bigint))]) - : : +- *(5) Project [sr_return_quantity#12, i_item_id#11] - : : +- *(5) BroadcastHashJoin [sr_returned_date_sk#13], [cast(d_date_sk#14 as bigint)], Inner, BuildRight - : : :- *(5) Project [sr_returned_date_sk#13, sr_return_quantity#12, i_item_id#11] - : : : +- *(5) BroadcastHashJoin [sr_item_sk#15], [cast(i_item_sk#16 as bigint)], Inner, BuildRight - : : : :- *(5) Project [sr_returned_date_sk#13, sr_item_sk#15, sr_return_quantity#12] - : : : : +- *(5) Filter (isnotnull(sr_item_sk#15) && isnotnull(sr_returned_date_sk#13)) - : : : : +- *(5) FileScan parquet default.store_returns[sr_returned_date_sk#13,sr_item_sk#15,sr_return_quantity#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [i_item_sk#16, i_item_id#11] - : : : +- *(1) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) - : : : +- *(1) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [d_date_sk#14] - : : +- *(4) BroadcastHashJoin [d_date#17], [d_date#17#18], LeftSemi, BuildRight - : : :- *(4) Project [d_date_sk#14, d_date#17] - : : : +- *(4) Filter isnotnull(d_date_sk#14) - : : : +- *(4) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) - : : +- *(3) Project [d_date#17 AS d_date#17#18] - : : +- *(3) BroadcastHashJoin [d_week_seq#19], [d_week_seq#19#20], LeftSemi, BuildRight - : : :- *(3) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [d_week_seq#19 AS d_week_seq#19#20] - : : +- *(2) Filter cast(d_date#17 as string) IN (2000-06-30,2000-09-27,2000-11-17) - : : +- *(2) FileScan parquet default.date_dim[d_date#17,d_week_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - : +- *(11) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(cr_return_quantity#21 as bigint))]) - : +- Exchange hashpartitioning(i_item_id#11, 5) - : +- *(10) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(cr_return_quantity#21 as bigint))]) - : +- *(10) Project [cr_return_quantity#21, i_item_id#11] - : +- *(10) BroadcastHashJoin [cr_returned_date_sk#22], [d_date_sk#14], Inner, BuildRight - : :- *(10) Project [cr_returned_date_sk#22, cr_return_quantity#21, i_item_id#11] - : : +- *(10) BroadcastHashJoin [cr_item_sk#23], [i_item_sk#16], Inner, BuildRight - : : :- *(10) Project [cr_returned_date_sk#22, cr_item_sk#23, cr_return_quantity#21] - : : : +- *(10) Filter (isnotnull(cr_item_sk#23) && isnotnull(cr_returned_date_sk#22)) - : : : +- *(10) FileScan parquet default.catalog_returns[cr_returned_date_sk#22,cr_item_sk#23,cr_return_quantity#21] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_returned_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [i_item_sk#16, i_item_id#11] - : : +- *(6) Filter (isnotnull(i_item_sk#16) && isnotnull(i_item_id#11)) - : : +- *(6) FileScan parquet default.item[i_item_sk#16,i_item_id#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(9) Project [d_date_sk#14] - : +- *(9) BroadcastHashJoin [d_date#17], [d_date#17#24], LeftSemi, BuildRight - : :- *(9) Project [d_date_sk#14, d_date#17] - : : +- *(9) Filter isnotnull(d_date_sk#14) - : : +- *(9) FileScan parquet default.date_dim[d_date_sk#14,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct - : +- ReusedExchange [d_date#17#24], BroadcastExchange HashedRelationBroadcastMode(List(input[0, date, true])) - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])) - +- *(17) HashAggregate(keys=[i_item_id#11], functions=[sum(cast(wr_return_quantity#25 as bigint))]) - +- Exchange hashpartitioning(i_item_id#11, 5) - +- *(16) HashAggregate(keys=[i_item_id#11], functions=[partial_sum(cast(wr_return_quantity#25 as bigint))]) - +- *(16) Project [wr_return_quantity#25, i_item_id#11] - +- *(16) BroadcastHashJoin [wr_returned_date_sk#26], [cast(d_date_sk#14 as bigint)], Inner, BuildRight - :- *(16) Project [wr_returned_date_sk#26, wr_return_quantity#25, i_item_id#11] - : +- *(16) BroadcastHashJoin [wr_item_sk#27], [cast(i_item_sk#16 as bigint)], Inner, BuildRight - : :- *(16) Project [wr_returned_date_sk#26, wr_item_sk#27, wr_return_quantity#25] - : : +- *(16) Filter (isnotnull(wr_item_sk#27) && isnotnull(wr_returned_date_sk#26)) - : : +- *(16) FileScan parquet default.web_returns[wr_returned_date_sk#26,wr_item_sk#27,wr_return_quantity#25] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_returned_date_sk)], ReadSchema: struct - : +- ReusedExchange [i_item_sk#16, i_item_id#11], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [d_date_sk#14], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (61) ++- * Project (60) + +- * BroadcastHashJoin Inner BuildRight (59) + :- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * HashAggregate (30) + : : +- Exchange (29) + : : +- * HashAggregate (28) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_returns (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.item (4) + : : +- BroadcastExchange (25) + : : +- * Project (24) + : : +- * BroadcastHashJoin LeftSemi BuildRight (23) + : : :- * Filter (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.date_dim (10) + : : +- BroadcastExchange (22) + : : +- * Project (21) + : : +- * BroadcastHashJoin LeftSemi BuildRight (20) + : : :- * ColumnarToRow (14) + : : : +- Scan parquet default.date_dim (13) + : : +- BroadcastExchange (19) + : : +- * Project (18) + : : +- * Filter (17) + : : +- * ColumnarToRow (16) + : : +- Scan parquet default.date_dim (15) + : +- BroadcastExchange (43) + : +- * HashAggregate (42) + : +- Exchange (41) + : +- * HashAggregate (40) + : +- * Project (39) + : +- * BroadcastHashJoin Inner BuildRight (38) + : :- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Filter (33) + : : : +- * ColumnarToRow (32) + : : : +- Scan parquet default.catalog_returns (31) + : : +- ReusedExchange (34) + : +- ReusedExchange (37) + +- BroadcastExchange (58) + +- * HashAggregate (57) + +- Exchange (56) + +- * HashAggregate (55) + +- * Project (54) + +- * BroadcastHashJoin Inner BuildRight (53) + :- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Filter (48) + : : +- * ColumnarToRow (47) + : : +- Scan parquet default.web_returns (46) + : +- ReusedExchange (49) + +- ReusedExchange (52) + + +(1) Scan parquet default.store_returns +Output [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_returned_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] + +(3) Filter [codegen id : 5] +Input [3]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3] +Condition : (isnotnull(sr_item_sk#2) AND isnotnull(sr_returned_date_sk#1)) + +(4) Scan parquet default.item +Output [2]: [i_item_sk#4, i_item_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_item_id)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_item_id#5] +Condition : (isnotnull(i_item_sk#4) AND isnotnull(i_item_id#5)) + +(7) BroadcastExchange +Input [2]: [i_item_sk#4, i_item_id#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#6] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_item_sk#2] +Right keys [1]: [cast(i_item_sk#4 as bigint)] +Join condition: None + +(9) Project [codegen id : 5] +Output [3]: [sr_returned_date_sk#1, sr_return_quantity#3, i_item_id#5] +Input [5]: [sr_returned_date_sk#1, sr_item_sk#2, sr_return_quantity#3, i_item_sk#4, i_item_id#5] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#7, d_date#8] + +(12) Filter [codegen id : 4] +Input [2]: [d_date_sk#7, d_date#8] +Condition : isnotnull(d_date_sk#7) + +(13) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +ReadSchema: struct + +(14) ColumnarToRow [codegen id : 3] +Input [2]: [d_date#8, d_week_seq#9] + +(15) Scan parquet default.date_dim +Output [2]: [d_date#8, d_week_seq#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] + +(17) Filter [codegen id : 2] +Input [2]: [d_date#8, d_week_seq#9] +Condition : cast(d_date#8 as string) IN (2000-06-30,2000-09-27,2000-11-17) + +(18) Project [codegen id : 2] +Output [1]: [d_week_seq#9 AS d_week_seq#9#10] +Input [2]: [d_date#8, d_week_seq#9] + +(19) BroadcastExchange +Input [1]: [d_week_seq#9#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(20) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [d_week_seq#9] +Right keys [1]: [d_week_seq#9#10] +Join condition: None + +(21) Project [codegen id : 3] +Output [1]: [d_date#8 AS d_date#8#12] +Input [2]: [d_date#8, d_week_seq#9] + +(22) BroadcastExchange +Input [1]: [d_date#8#12] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#13] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [d_date#8] +Right keys [1]: [d_date#8#12] +Join condition: None + +(24) Project [codegen id : 4] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(25) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(26) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [sr_returned_date_sk#1] +Right keys [1]: [cast(d_date_sk#7 as bigint)] +Join condition: None + +(27) Project [codegen id : 5] +Output [2]: [sr_return_quantity#3, i_item_id#5] +Input [4]: [sr_returned_date_sk#1, sr_return_quantity#3, i_item_id#5, d_date_sk#7] + +(28) HashAggregate [codegen id : 5] +Input [2]: [sr_return_quantity#3, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(cast(sr_return_quantity#3 as bigint))] +Aggregate Attributes [1]: [sum#15] +Results [2]: [i_item_id#5, sum#16] + +(29) Exchange +Input [2]: [i_item_id#5, sum#16] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#17] + +(30) HashAggregate [codegen id : 18] +Input [2]: [i_item_id#5, sum#16] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(cast(sr_return_quantity#3 as bigint))] +Aggregate Attributes [1]: [sum(cast(sr_return_quantity#3 as bigint))#18] +Results [2]: [i_item_id#5 AS item_id#19, sum(cast(sr_return_quantity#3 as bigint))#18 AS sr_item_qty#20] + +(31) Scan parquet default.catalog_returns +Output [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_returned_date_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 10] +Input [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] + +(33) Filter [codegen id : 10] +Input [3]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23] +Condition : (isnotnull(cr_item_sk#22) AND isnotnull(cr_returned_date_sk#21)) + +(34) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#4, i_item_id#5] + +(35) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_item_sk#22] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(36) Project [codegen id : 10] +Output [3]: [cr_returned_date_sk#21, cr_return_quantity#23, i_item_id#5] +Input [5]: [cr_returned_date_sk#21, cr_item_sk#22, cr_return_quantity#23, i_item_sk#4, i_item_id#5] + +(37) ReusedExchange [Reuses operator id: 25] +Output [1]: [d_date_sk#7] + +(38) BroadcastHashJoin [codegen id : 10] +Left keys [1]: [cr_returned_date_sk#21] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(39) Project [codegen id : 10] +Output [2]: [cr_return_quantity#23, i_item_id#5] +Input [4]: [cr_returned_date_sk#21, cr_return_quantity#23, i_item_id#5, d_date_sk#7] + +(40) HashAggregate [codegen id : 10] +Input [2]: [cr_return_quantity#23, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(cast(cr_return_quantity#23 as bigint))] +Aggregate Attributes [1]: [sum#24] +Results [2]: [i_item_id#5, sum#25] + +(41) Exchange +Input [2]: [i_item_id#5, sum#25] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#26] + +(42) HashAggregate [codegen id : 11] +Input [2]: [i_item_id#5, sum#25] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(cast(cr_return_quantity#23 as bigint))] +Aggregate Attributes [1]: [sum(cast(cr_return_quantity#23 as bigint))#27] +Results [2]: [i_item_id#5 AS item_id#28, sum(cast(cr_return_quantity#23 as bigint))#27 AS cr_item_qty#29] + +(43) BroadcastExchange +Input [2]: [item_id#28, cr_item_qty#29] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#30] + +(44) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#28] +Join condition: None + +(45) Project [codegen id : 18] +Output [3]: [item_id#19, sr_item_qty#20, cr_item_qty#29] +Input [4]: [item_id#19, sr_item_qty#20, item_id#28, cr_item_qty#29] + +(46) Scan parquet default.web_returns +Output [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_returned_date_sk)] +ReadSchema: struct + +(47) ColumnarToRow [codegen id : 16] +Input [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] + +(48) Filter [codegen id : 16] +Input [3]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33] +Condition : (isnotnull(wr_item_sk#32) AND isnotnull(wr_returned_date_sk#31)) + +(49) ReusedExchange [Reuses operator id: 7] +Output [2]: [i_item_sk#4, i_item_id#5] + +(50) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_item_sk#32] +Right keys [1]: [cast(i_item_sk#4 as bigint)] +Join condition: None + +(51) Project [codegen id : 16] +Output [3]: [wr_returned_date_sk#31, wr_return_quantity#33, i_item_id#5] +Input [5]: [wr_returned_date_sk#31, wr_item_sk#32, wr_return_quantity#33, i_item_sk#4, i_item_id#5] + +(52) ReusedExchange [Reuses operator id: 25] +Output [1]: [d_date_sk#7] + +(53) BroadcastHashJoin [codegen id : 16] +Left keys [1]: [wr_returned_date_sk#31] +Right keys [1]: [cast(d_date_sk#7 as bigint)] +Join condition: None + +(54) Project [codegen id : 16] +Output [2]: [wr_return_quantity#33, i_item_id#5] +Input [4]: [wr_returned_date_sk#31, wr_return_quantity#33, i_item_id#5, d_date_sk#7] + +(55) HashAggregate [codegen id : 16] +Input [2]: [wr_return_quantity#33, i_item_id#5] +Keys [1]: [i_item_id#5] +Functions [1]: [partial_sum(cast(wr_return_quantity#33 as bigint))] +Aggregate Attributes [1]: [sum#34] +Results [2]: [i_item_id#5, sum#35] + +(56) Exchange +Input [2]: [i_item_id#5, sum#35] +Arguments: hashpartitioning(i_item_id#5, 5), true, [id=#36] + +(57) HashAggregate [codegen id : 17] +Input [2]: [i_item_id#5, sum#35] +Keys [1]: [i_item_id#5] +Functions [1]: [sum(cast(wr_return_quantity#33 as bigint))] +Aggregate Attributes [1]: [sum(cast(wr_return_quantity#33 as bigint))#37] +Results [2]: [i_item_id#5 AS item_id#38, sum(cast(wr_return_quantity#33 as bigint))#37 AS wr_item_qty#39] + +(58) BroadcastExchange +Input [2]: [item_id#38, wr_item_qty#39] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] + +(59) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [item_id#19] +Right keys [1]: [item_id#38] +Join condition: None + +(60) Project [codegen id : 18] +Output [8]: [item_id#19, sr_item_qty#20, (((cast(sr_item_qty#20 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#29, (((cast(cr_item_qty#29 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(cast(((sr_item_qty#20 + cr_item_qty#29) + wr_item_qty#39) as decimal(20,0)) as decimal(21,1))) / 3.0), DecimalType(27,6), true) AS average#44] +Input [5]: [item_id#19, sr_item_qty#20, cr_item_qty#29, item_id#38, wr_item_qty#39] + +(61) TakeOrderedAndProject +Input [8]: [item_id#19, sr_item_qty#20, sr_dev#41, cr_item_qty#29, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] +Arguments: 100, [item_id#19 ASC NULLS FIRST, sr_item_qty#20 ASC NULLS FIRST], [item_id#19, sr_item_qty#20, sr_dev#41, cr_item_qty#29, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt index 94ffc5da9..c12d5ffaa 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q83/simplified.txt @@ -1,94 +1,91 @@ -TakeOrderedAndProject [average,cr_dev,cr_item_qty,item_id,sr_dev,sr_item_qty,wr_dev,wr_item_qty] - WholeStageCodegen - Project [cr_item_qty,item_id,sr_item_qty,wr_item_qty] +TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty,wr_dev,average] + WholeStageCodegen (18) + Project [item_id,sr_item_qty,cr_item_qty,wr_item_qty] BroadcastHashJoin [item_id,item_id] - Project [cr_item_qty,item_id,sr_item_qty] + Project [item_id,sr_item_qty,cr_item_qty] BroadcastHashJoin [item_id,item_id] - HashAggregate [i_item_id,sum,sum(cast(sr_return_quantity as bigint))] [item_id,sr_item_qty,sum,sum(cast(sr_return_quantity as bigint))] + HashAggregate [i_item_id,sum] [sum(cast(sr_return_quantity as bigint)),item_id,sr_item_qty,sum] InputAdapter Exchange [i_item_id] #1 - WholeStageCodegen - HashAggregate [i_item_id,sr_return_quantity,sum,sum] [sum,sum] - Project [i_item_id,sr_return_quantity] - BroadcastHashJoin [d_date_sk,sr_returned_date_sk] - Project [i_item_id,sr_return_quantity,sr_returned_date_sk] - BroadcastHashJoin [i_item_sk,sr_item_sk] - Project [sr_item_sk,sr_return_quantity,sr_returned_date_sk] - Filter [sr_item_sk,sr_returned_date_sk] - Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk] [sr_item_sk,sr_return_quantity,sr_returned_date_sk] + WholeStageCodegen (5) + HashAggregate [i_item_id,sr_return_quantity] [sum,sum] + Project [sr_return_quantity,i_item_id] + BroadcastHashJoin [sr_returned_date_sk,d_date_sk] + Project [sr_returned_date_sk,sr_return_quantity,i_item_id] + BroadcastHashJoin [sr_item_sk,i_item_sk] + Filter [sr_item_sk,sr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_return_quantity] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_id,i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] + WholeStageCodegen (1) + Filter [i_item_sk,i_item_id] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (4) Project [d_date_sk] BroadcastHashJoin [d_date,d_date] - Project [d_date,d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + Filter [d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date] BroadcastHashJoin [d_week_seq,d_week_seq] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (2) Project [d_week_seq] Filter [d_date] - Scan parquet default.date_dim [d_date,d_week_seq] [d_date,d_week_seq] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date,d_week_seq] InputAdapter BroadcastExchange #6 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(cast(cr_return_quantity as bigint))] [cr_item_qty,item_id,sum,sum(cast(cr_return_quantity as bigint))] + WholeStageCodegen (11) + HashAggregate [i_item_id,sum] [sum(cast(cr_return_quantity as bigint)),item_id,cr_item_qty,sum] InputAdapter Exchange [i_item_id] #7 - WholeStageCodegen - HashAggregate [cr_return_quantity,i_item_id,sum,sum] [sum,sum] + WholeStageCodegen (10) + HashAggregate [i_item_id,cr_return_quantity] [sum,sum] Project [cr_return_quantity,i_item_id] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cr_return_quantity,cr_returned_date_sk,i_item_id] + Project [cr_returned_date_sk,cr_return_quantity,i_item_id] BroadcastHashJoin [cr_item_sk,i_item_sk] - Project [cr_item_sk,cr_return_quantity,cr_returned_date_sk] - Filter [cr_item_sk,cr_returned_date_sk] - Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk] [cr_item_sk,cr_return_quantity,cr_returned_date_sk] + Filter [cr_item_sk,cr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_item_sk,cr_return_quantity] InputAdapter - BroadcastExchange #8 - WholeStageCodegen - Project [i_item_id,i_item_sk] - Filter [i_item_id,i_item_sk] - Scan parquet default.item [i_item_id,i_item_sk] [i_item_id,i_item_sk] + ReusedExchange [i_item_sk,i_item_id] #2 InputAdapter - BroadcastExchange #9 - WholeStageCodegen - Project [d_date_sk] - BroadcastHashJoin [d_date,d_date] - Project [d_date,d_date_sk] - Filter [d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] - InputAdapter - ReusedExchange [d_date] [d_date] #4 + ReusedExchange [d_date_sk] #3 InputAdapter - BroadcastExchange #10 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum(cast(wr_return_quantity as bigint))] [item_id,sum,sum(cast(wr_return_quantity as bigint)),wr_item_qty] + BroadcastExchange #8 + WholeStageCodegen (17) + HashAggregate [i_item_id,sum] [sum(cast(wr_return_quantity as bigint)),item_id,wr_item_qty,sum] InputAdapter - Exchange [i_item_id] #11 - WholeStageCodegen - HashAggregate [i_item_id,sum,sum,wr_return_quantity] [sum,sum] - Project [i_item_id,wr_return_quantity] - BroadcastHashJoin [d_date_sk,wr_returned_date_sk] - Project [i_item_id,wr_return_quantity,wr_returned_date_sk] - BroadcastHashJoin [i_item_sk,wr_item_sk] - Project [wr_item_sk,wr_return_quantity,wr_returned_date_sk] - Filter [wr_item_sk,wr_returned_date_sk] - Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk] [wr_item_sk,wr_return_quantity,wr_returned_date_sk] + Exchange [i_item_id] #9 + WholeStageCodegen (16) + HashAggregate [i_item_id,wr_return_quantity] [sum,sum] + Project [wr_return_quantity,i_item_id] + BroadcastHashJoin [wr_returned_date_sk,d_date_sk] + Project [wr_returned_date_sk,wr_return_quantity,i_item_id] + BroadcastHashJoin [wr_item_sk,i_item_sk] + Filter [wr_item_sk,wr_returned_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_return_quantity] InputAdapter - ReusedExchange [i_item_id,i_item_sk] [i_item_id,i_item_sk] #2 + ReusedExchange [i_item_sk,i_item_id] #2 InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #3 + ReusedExchange [d_date_sk] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt index 062eebe04..9e5fb4386 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/explain.txt @@ -1,35 +1,200 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[c_customer_id#1 ASC NULLS FIRST], output=[customer_id#2,customername#3]) -+- *(6) Project [c_customer_id#1 AS customer_id#2, concat(c_last_name#4, , , c_first_name#5) AS customername#3, c_customer_id#1] - +- *(6) BroadcastHashJoin [cast(cd_demo_sk#6 as bigint)], [sr_cdemo_sk#7], Inner, BuildRight - :- *(6) Project [c_customer_id#1, c_first_name#5, c_last_name#4, cd_demo_sk#6] - : +- *(6) BroadcastHashJoin [hd_income_band_sk#8], [ib_income_band_sk#9], Inner, BuildRight - : :- *(6) Project [c_customer_id#1, c_first_name#5, c_last_name#4, cd_demo_sk#6, hd_income_band_sk#8] - : : +- *(6) BroadcastHashJoin [c_current_hdemo_sk#10], [hd_demo_sk#11], Inner, BuildRight - : : :- *(6) Project [c_customer_id#1, c_current_hdemo_sk#10, c_first_name#5, c_last_name#4, cd_demo_sk#6] - : : : +- *(6) BroadcastHashJoin [c_current_cdemo_sk#12], [cd_demo_sk#6], Inner, BuildRight - : : : :- *(6) Project [c_customer_id#1, c_current_cdemo_sk#12, c_current_hdemo_sk#10, c_first_name#5, c_last_name#4] - : : : : +- *(6) BroadcastHashJoin [c_current_addr_sk#13], [ca_address_sk#14], Inner, BuildRight - : : : : :- *(6) Project [c_customer_id#1, c_current_cdemo_sk#12, c_current_hdemo_sk#10, c_current_addr_sk#13, c_first_name#5, c_last_name#4] - : : : : : +- *(6) Filter ((isnotnull(c_current_addr_sk#13) && isnotnull(c_current_cdemo_sk#12)) && isnotnull(c_current_hdemo_sk#10)) - : : : : : +- *(6) FileScan parquet default.customer[c_customer_id#1,c_current_cdemo_sk#12,c_current_hdemo_sk#10,c_current_addr_sk#13,c_first_name#5,c_last_name#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(2) Project [cd_demo_sk#6] - : : : +- *(2) Filter isnotnull(cd_demo_sk#6) - : : : +- *(2) FileScan parquet default.customer_demographics[cd_demo_sk#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [hd_demo_sk#11, hd_income_band_sk#8] - : : +- *(3) Filter (isnotnull(hd_demo_sk#11) && isnotnull(hd_income_band_sk#8)) - : : +- *(3) FileScan parquet default.household_demographics[hd_demo_sk#11,hd_income_band_sk#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [ib_income_band_sk#9] - : +- *(4) Filter ((((isnotnull(ib_lower_bound#16) && isnotnull(ib_upper_bound#17)) && (ib_lower_bound#16 >= 38128)) && (ib_upper_bound#17 <= 88128)) && isnotnull(ib_income_band_sk#9)) - : +- *(4) FileScan parquet default.income_band[ib_income_band_sk#9,ib_lower_bound#16,ib_upper_bound#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/income_band], PartitionFilters: [], PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), ..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) - +- *(5) Project [sr_cdemo_sk#7] - +- *(5) Filter isnotnull(sr_cdemo_sk#7) - +- *(5) FileScan parquet default.store_returns[sr_cdemo_sk#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_returns], PartitionFilters: [], PushedFilters: [IsNotNull(sr_cdemo_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (36) ++- * Project (35) + +- * BroadcastHashJoin Inner BuildRight (34) + :- * Project (29) + : +- * BroadcastHashJoin Inner BuildRight (28) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Project (16) + : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : :- * Project (10) + : : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.customer (1) + : : : : +- BroadcastExchange (8) + : : : : +- * Project (7) + : : : : +- * Filter (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.customer_address (4) + : : : +- BroadcastExchange (14) + : : : +- * Filter (13) + : : : +- * ColumnarToRow (12) + : : : +- Scan parquet default.customer_demographics (11) + : : +- BroadcastExchange (20) + : : +- * Filter (19) + : : +- * ColumnarToRow (18) + : : +- Scan parquet default.household_demographics (17) + : +- BroadcastExchange (27) + : +- * Project (26) + : +- * Filter (25) + : +- * ColumnarToRow (24) + : +- Scan parquet default.income_band (23) + +- BroadcastExchange (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.store_returns (30) + + +(1) Scan parquet default.customer +Output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] + +(3) Filter [codegen id : 6] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6] +Condition : ((isnotnull(c_current_addr_sk#4) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3)) + +(4) Scan parquet default.customer_address +Output [2]: [ca_address_sk#7, ca_city#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_city), EqualTo(ca_city,Edgewood), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [ca_address_sk#7, ca_city#8] + +(6) Filter [codegen id : 1] +Input [2]: [ca_address_sk#7, ca_city#8] +Condition : ((isnotnull(ca_city#8) AND (ca_city#8 = Edgewood)) AND isnotnull(ca_address_sk#7)) + +(7) Project [codegen id : 1] +Output [1]: [ca_address_sk#7] +Input [2]: [ca_address_sk#7, ca_city#8] + +(8) BroadcastExchange +Input [1]: [ca_address_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_addr_sk#4] +Right keys [1]: [ca_address_sk#7] +Join condition: None + +(10) Project [codegen id : 6] +Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6] +Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] + +(11) Scan parquet default.customer_demographics +Output [1]: [cd_demo_sk#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [1]: [cd_demo_sk#10] + +(13) Filter [codegen id : 2] +Input [1]: [cd_demo_sk#10] +Condition : isnotnull(cd_demo_sk#10) + +(14) BroadcastExchange +Input [1]: [cd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(15) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_cdemo_sk#2] +Right keys [1]: [cd_demo_sk#10] +Join condition: None + +(16) Project [codegen id : 6] +Output [5]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10] + +(17) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#12, hd_income_band_sk#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] + +(19) Filter [codegen id : 3] +Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] +Condition : (isnotnull(hd_demo_sk#12) AND isnotnull(hd_income_band_sk#13)) + +(20) BroadcastExchange +Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(21) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [c_current_hdemo_sk#3] +Right keys [1]: [hd_demo_sk#12] +Join condition: None + +(22) Project [codegen id : 6] +Output [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_income_band_sk#13] +Input [7]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_demo_sk#12, hd_income_band_sk#13] + +(23) Scan parquet default.income_band +Output [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/income_band] +PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] + +(25) Filter [codegen id : 4] +Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] +Condition : ((((isnotnull(ib_lower_bound#16) AND isnotnull(ib_upper_bound#17)) AND (ib_lower_bound#16 >= 38128)) AND (ib_upper_bound#17 <= 88128)) AND isnotnull(ib_income_band_sk#15)) + +(26) Project [codegen id : 4] +Output [1]: [ib_income_band_sk#15] +Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] + +(27) BroadcastExchange +Input [1]: [ib_income_band_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(28) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [hd_income_band_sk#13] +Right keys [1]: [ib_income_band_sk#15] +Join condition: None + +(29) Project [codegen id : 6] +Output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10] +Input [6]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_income_band_sk#13, ib_income_band_sk#15] + +(30) Scan parquet default.store_returns +Output [1]: [sr_cdemo_sk#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_cdemo_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [1]: [sr_cdemo_sk#19] + +(32) Filter [codegen id : 5] +Input [1]: [sr_cdemo_sk#19] +Condition : isnotnull(sr_cdemo_sk#19) + +(33) BroadcastExchange +Input [1]: [sr_cdemo_sk#19] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#20] + +(34) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cast(cd_demo_sk#10 as bigint)] +Right keys [1]: [sr_cdemo_sk#19] +Join condition: None + +(35) Project [codegen id : 6] +Output [3]: [c_customer_id#1 AS customer_id#21, concat(c_last_name#6, , , c_first_name#5) AS customername#22, c_customer_id#1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, sr_cdemo_sk#19] + +(36) TakeOrderedAndProject +Input [3]: [customer_id#21, customername#22, c_customer_id#1] +Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#21, customername#22] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt index 894b15298..014d808cf 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q84/simplified.txt @@ -1,45 +1,53 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername] - WholeStageCodegen - Project [c_customer_id,c_first_name,c_last_name] + WholeStageCodegen (6) + Project [c_customer_id,c_last_name,c_first_name] BroadcastHashJoin [cd_demo_sk,sr_cdemo_sk] Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk] BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk] Project [c_customer_id,c_first_name,c_last_name,cd_demo_sk,hd_income_band_sk] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name,cd_demo_sk] + Project [c_customer_id,c_current_hdemo_sk,c_first_name,c_last_name,cd_demo_sk] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] + Project [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_first_name,c_last_name] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] - Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] - Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_id,c_first_name,c_last_name] + Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name] InputAdapter BroadcastExchange #1 - WholeStageCodegen + WholeStageCodegen (1) Project [ca_address_sk] - Filter [ca_address_sk,ca_city] - Scan parquet default.customer_address [ca_address_sk,ca_city] [ca_address_sk,ca_city] + Filter [ca_city,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_city] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [cd_demo_sk] - Filter [cd_demo_sk] - Scan parquet default.customer_demographics [cd_demo_sk] [cd_demo_sk] + WholeStageCodegen (2) + Filter [cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [hd_demo_sk,hd_income_band_sk] - Filter [hd_demo_sk,hd_income_band_sk] - Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] [hd_demo_sk,hd_income_band_sk] + WholeStageCodegen (3) + Filter [hd_demo_sk,hd_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (4) Project [ib_income_band_sk] - Filter [ib_income_band_sk,ib_lower_bound,ib_upper_bound] - Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] [ib_income_band_sk,ib_lower_bound,ib_upper_bound] + Filter [ib_lower_bound,ib_upper_bound,ib_income_band_sk] + ColumnarToRow + InputAdapter + Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [sr_cdemo_sk] - Filter [sr_cdemo_sk] - Scan parquet default.store_returns [sr_cdemo_sk] [sr_cdemo_sk] + WholeStageCodegen (5) + Filter [sr_cdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_cdemo_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt index d87dde0ce..23598c2e7 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/explain.txt @@ -1,50 +1,287 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[substring(r_reason_desc, 1, 20)#1 ASC NULLS FIRST,aggOrder#2 ASC NULLS FIRST,avg(wr_refunded_cash)#3 ASC NULLS FIRST,avg(wr_fee)#4 ASC NULLS FIRST], output=[substring(r_reason_desc, 1, 20)#1,avg(ws_quantity)#5,avg(wr_refunded_cash)#3,avg(wr_fee)#4]) -+- *(9) HashAggregate(keys=[r_reason_desc#6], functions=[avg(cast(ws_quantity#7 as bigint)), avg(UnscaledValue(wr_refunded_cash#8)), avg(UnscaledValue(wr_fee#9))]) - +- Exchange hashpartitioning(r_reason_desc#6, 5) - +- *(8) HashAggregate(keys=[r_reason_desc#6], functions=[partial_avg(cast(ws_quantity#7 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#8)), partial_avg(UnscaledValue(wr_fee#9))]) - +- *(8) Project [ws_quantity#7, wr_fee#9, wr_refunded_cash#8, r_reason_desc#6] - +- *(8) BroadcastHashJoin [wr_reason_sk#10], [cast(r_reason_sk#11 as bigint)], Inner, BuildRight - :- *(8) Project [ws_quantity#7, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] - : +- *(8) BroadcastHashJoin [ws_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] - : : +- *(8) BroadcastHashJoin [wr_refunded_addr_sk#14], [cast(ca_address_sk#15 as bigint)], Inner, BuildRight, ((((ca_state#16 IN (IN,OH,NJ) && (ws_net_profit#17 >= 100.00)) && (ws_net_profit#17 <= 200.00)) || ((ca_state#16 IN (WI,CT,KY) && (ws_net_profit#17 >= 150.00)) && (ws_net_profit#17 <= 300.00))) || ((ca_state#16 IN (LA,IA,AR) && (ws_net_profit#17 >= 50.00)) && (ws_net_profit#17 <= 250.00))) - : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_net_profit#17, wr_refunded_addr_sk#14, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] - : : : +- *(8) BroadcastHashJoin [wr_returning_cdemo_sk#18, cd_marital_status#19, cd_education_status#20], [cast(cd_demo_sk#21 as bigint), cd_marital_status#22, cd_education_status#23], Inner, BuildRight - : : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_net_profit#17, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8, cd_marital_status#19, cd_education_status#20] - : : : : +- *(8) BroadcastHashJoin [wr_refunded_cdemo_sk#24], [cast(cd_demo_sk#25 as bigint)], Inner, BuildRight, ((((((cd_marital_status#19 = M) && (cd_education_status#20 = Advanced Degree)) && (ws_sales_price#26 >= 100.00)) && (ws_sales_price#26 <= 150.00)) || ((((cd_marital_status#19 = S) && (cd_education_status#20 = College)) && (ws_sales_price#26 >= 50.00)) && (ws_sales_price#26 <= 100.00))) || ((((cd_marital_status#19 = W) && (cd_education_status#20 = 2 yr Degree)) && (ws_sales_price#26 >= 150.00)) && (ws_sales_price#26 <= 200.00))) - : : : : :- *(8) Project [ws_sold_date_sk#12, ws_quantity#7, ws_sales_price#26, ws_net_profit#17, wr_refunded_cdemo_sk#24, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] - : : : : : +- *(8) BroadcastHashJoin [ws_web_page_sk#27], [wp_web_page_sk#28], Inner, BuildRight - : : : : : :- *(8) Project [ws_sold_date_sk#12, ws_web_page_sk#27, ws_quantity#7, ws_sales_price#26, ws_net_profit#17, wr_refunded_cdemo_sk#24, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#18, wr_reason_sk#10, wr_fee#9, wr_refunded_cash#8] - : : : : : : +- *(8) BroadcastHashJoin [cast(ws_item_sk#29 as bigint), cast(ws_order_number#30 as bigint)], [wr_item_sk#31, wr_order_number#32], Inner, BuildRight - : : : : : : :- *(8) Project [ws_sold_date_sk#12, ws_item_sk#29, ws_web_page_sk#27, ws_order_number#30, ws_quantity#7, ws_sales_price#26, ws_net_profit#17] - : : : : : : : +- *(8) Filter (((isnotnull(ws_item_sk#29) && isnotnull(ws_order_number#30)) && isnotnull(ws_web_page_sk#27)) && isnotnull(ws_sold_date_sk#12)) - : : : : : : : +- *(8) FileScan parquet default.web_sales[ws_sold_date_sk#12,ws_item_sk#29,ws_web_page_sk#27,ws_order_number#30,ws_quantity#7,ws_sales_price#26,ws_net_profit#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_..., ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : : +- *(3) Project [cd_demo_sk#25, cd_marital_status#19, cd_education_status#20] - : : : : +- *(3) Filter ((isnotnull(cd_demo_sk#25) && isnotnull(cd_education_status#20)) && isnotnull(cd_marital_status#19)) - : : : : +- *(3) FileScan parquet default.customer_demographics[cd_demo_sk#25,cd_marital_status#19,cd_education_status#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_education_status), IsNotNull(cd_marital_status)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint), input[1, string, true], input[2, string, true])) - : : : +- *(4) Project [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] - : : : +- *(4) Filter ((isnotnull(cd_demo_sk#21) && isnotnull(cd_marital_status#22)) && isnotnull(cd_education_status#23)) - : : : +- *(4) FileScan parquet default.customer_demographics[cd_demo_sk#21,cd_marital_status#22,cd_education_status#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(5) Project [ca_address_sk#15, ca_state#16] - : : +- *(5) Filter ((isnotnull(ca_country#33) && (ca_country#33 = United States)) && isnotnull(ca_address_sk#15)) - : : +- *(5) FileScan parquet default.customer_address[ca_address_sk#15,ca_state#16,ca_country#33] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(6) Project [d_date_sk#13] - : +- *(6) Filter ((isnotnull(d_year#34) && (d_year#34 = 2000)) && isnotnull(d_date_sk#13)) - : +- *(6) FileScan parquet default.date_dim[d_date_sk#13,d_year#34] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(7) Project [r_reason_sk#11, r_reason_desc#6] - +- *(7) Filter isnotnull(r_reason_sk#11) - +- *(7) FileScan parquet default.reason[r_reason_sk#11,r_reason_desc#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (51) ++- * HashAggregate (50) + +- Exchange (49) + +- * HashAggregate (48) + +- * Project (47) + +- * BroadcastHashJoin Inner BuildRight (46) + :- * Project (41) + : +- * BroadcastHashJoin Inner BuildRight (40) + : :- * Project (34) + : : +- * BroadcastHashJoin Inner BuildRight (33) + : : :- * Project (27) + : : : +- * BroadcastHashJoin Inner BuildRight (26) + : : : :- * Project (21) + : : : : +- * BroadcastHashJoin Inner BuildRight (20) + : : : : :- * Project (15) + : : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : : :- * Project (9) + : : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : : :- * Filter (3) + : : : : : : : +- * ColumnarToRow (2) + : : : : : : : +- Scan parquet default.web_sales (1) + : : : : : : +- BroadcastExchange (7) + : : : : : : +- * Filter (6) + : : : : : : +- * ColumnarToRow (5) + : : : : : : +- Scan parquet default.web_returns (4) + : : : : : +- BroadcastExchange (13) + : : : : : +- * Filter (12) + : : : : : +- * ColumnarToRow (11) + : : : : : +- Scan parquet default.web_page (10) + : : : : +- BroadcastExchange (19) + : : : : +- * Filter (18) + : : : : +- * ColumnarToRow (17) + : : : : +- Scan parquet default.customer_demographics (16) + : : : +- BroadcastExchange (25) + : : : +- * Filter (24) + : : : +- * ColumnarToRow (23) + : : : +- Scan parquet default.customer_demographics (22) + : : +- BroadcastExchange (32) + : : +- * Project (31) + : : +- * Filter (30) + : : +- * ColumnarToRow (29) + : : +- Scan parquet default.customer_address (28) + : +- BroadcastExchange (39) + : +- * Project (38) + : +- * Filter (37) + : +- * ColumnarToRow (36) + : +- Scan parquet default.date_dim (35) + +- BroadcastExchange (45) + +- * Filter (44) + +- * ColumnarToRow (43) + +- Scan parquet default.reason (42) + + +(1) Scan parquet default.web_sales +Output [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_date_sk), Or(Or(And(GreaterThanOrEqual(ws_sales_price,100.00),LessThanOrEqual(ws_sales_price,150.00)),And(GreaterThanOrEqual(ws_sales_price,50.00),LessThanOrEqual(ws_sales_price,100.00))),And(GreaterThanOrEqual(ws_sales_price,150.00),LessThanOrEqual(ws_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ws_net_profit,100.00),LessThanOrEqual(ws_net_profit,200.00)),And(GreaterThanOrEqual(ws_net_profit,150.00),LessThanOrEqual(ws_net_profit,300.00))),And(GreaterThanOrEqual(ws_net_profit,50.00),LessThanOrEqual(ws_net_profit,250.00)))] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 8] +Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] + +(3) Filter [codegen id : 8] +Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7] +Condition : (((((isnotnull(ws_item_sk#2) AND isnotnull(ws_order_number#4)) AND isnotnull(ws_web_page_sk#3)) AND isnotnull(ws_sold_date_sk#1)) AND ((((ws_sales_price#6 >= 100.00) AND (ws_sales_price#6 <= 150.00)) OR ((ws_sales_price#6 >= 50.00) AND (ws_sales_price#6 <= 100.00))) OR ((ws_sales_price#6 >= 150.00) AND (ws_sales_price#6 <= 200.00)))) AND ((((ws_net_profit#7 >= 100.00) AND (ws_net_profit#7 <= 200.00)) OR ((ws_net_profit#7 >= 150.00) AND (ws_net_profit#7 <= 300.00))) OR ((ws_net_profit#7 >= 50.00) AND (ws_net_profit#7 <= 250.00)))) + +(4) Scan parquet default.web_returns +Output [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr_returning_cdemo_sk), IsNotNull(wr_refunded_addr_sk), IsNotNull(wr_reason_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] + +(6) Filter [codegen id : 1] +Input [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Condition : (((((isnotnull(wr_item_sk#8) AND isnotnull(wr_order_number#13)) AND isnotnull(wr_refunded_cdemo_sk#9)) AND isnotnull(wr_returning_cdemo_sk#11)) AND isnotnull(wr_refunded_addr_sk#10)) AND isnotnull(wr_reason_sk#12)) + +(7) BroadcastExchange +Input [8]: [wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[5, bigint, false]),false), [id=#16] + +(8) BroadcastHashJoin [codegen id : 8] +Left keys [2]: [cast(ws_item_sk#2 as bigint), cast(ws_order_number#4 as bigint)] +Right keys [2]: [wr_item_sk#8, wr_order_number#13] +Join condition: None + +(9) Project [codegen id : 8] +Output [11]: [ws_sold_date_sk#1, ws_web_page_sk#3, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [15]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_item_sk#8, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_order_number#13, wr_fee#14, wr_refunded_cash#15] + +(10) Scan parquet default.web_page +Output [1]: [wp_web_page_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [1]: [wp_web_page_sk#17] + +(12) Filter [codegen id : 2] +Input [1]: [wp_web_page_sk#17] +Condition : isnotnull(wp_web_page_sk#17) + +(13) BroadcastExchange +Input [1]: [wp_web_page_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(14) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_web_page_sk#3] +Right keys [1]: [wp_web_page_sk#17] +Join condition: None + +(15) Project [codegen id : 8] +Output [10]: [ws_sold_date_sk#1, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [12]: [ws_sold_date_sk#1, ws_web_page_sk#3, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, wp_web_page_sk#17] + +(16) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] + +(18) Filter [codegen id : 3] +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Condition : (((isnotnull(cd_demo_sk#19) AND isnotnull(cd_marital_status#20)) AND isnotnull(cd_education_status#21)) AND ((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree)) OR ((cd_marital_status#20 = S) AND (cd_education_status#21 = College))) OR ((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree)))) + +(19) BroadcastExchange +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] + +(20) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_refunded_cdemo_sk#9] +Right keys [1]: [cast(cd_demo_sk#19 as bigint)] +Join condition: ((((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree)) AND (ws_sales_price#6 >= 100.00)) AND (ws_sales_price#6 <= 150.00)) OR ((((cd_marital_status#20 = S) AND (cd_education_status#21 = College)) AND (ws_sales_price#6 >= 50.00)) AND (ws_sales_price#6 <= 100.00))) OR ((((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree)) AND (ws_sales_price#6 >= 150.00)) AND (ws_sales_price#6 <= 200.00))) + +(21) Project [codegen id : 8] +Output [10]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#20, cd_education_status#21] +Input [13]: [ws_sold_date_sk#1, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#9, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] + +(22) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] + +(24) Filter [codegen id : 4] +Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] +Condition : ((isnotnull(cd_demo_sk#23) AND isnotnull(cd_marital_status#24)) AND isnotnull(cd_education_status#25)) + +(25) BroadcastExchange +Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint), input[1, string, false], input[2, string, false]),false), [id=#26] + +(26) BroadcastHashJoin [codegen id : 8] +Left keys [3]: [wr_returning_cdemo_sk#11, cd_marital_status#20, cd_education_status#21] +Right keys [3]: [cast(cd_demo_sk#23 as bigint), cd_marital_status#24, cd_education_status#25] +Join condition: None + +(27) Project [codegen id : 8] +Output [7]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [13]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_returning_cdemo_sk#11, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, cd_marital_status#20, cd_education_status#21, cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] + +(28) Scan parquet default.customer_address +Output [3]: [ca_address_sk#27, ca_state#28, ca_country#29] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [IN,OH,NJ]),In(ca_state, [WI,CT,KY])),In(ca_state, [LA,IA,AR]))] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 5] +Input [3]: [ca_address_sk#27, ca_state#28, ca_country#29] + +(30) Filter [codegen id : 5] +Input [3]: [ca_address_sk#27, ca_state#28, ca_country#29] +Condition : (((isnotnull(ca_country#29) AND (ca_country#29 = United States)) AND isnotnull(ca_address_sk#27)) AND ((ca_state#28 IN (IN,OH,NJ) OR ca_state#28 IN (WI,CT,KY)) OR ca_state#28 IN (LA,IA,AR))) + +(31) Project [codegen id : 5] +Output [2]: [ca_address_sk#27, ca_state#28] +Input [3]: [ca_address_sk#27, ca_state#28, ca_country#29] + +(32) BroadcastExchange +Input [2]: [ca_address_sk#27, ca_state#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] + +(33) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_refunded_addr_sk#10] +Right keys [1]: [cast(ca_address_sk#27 as bigint)] +Join condition: ((((ca_state#28 IN (IN,OH,NJ) AND (ws_net_profit#7 >= 100.00)) AND (ws_net_profit#7 <= 200.00)) OR ((ca_state#28 IN (WI,CT,KY) AND (ws_net_profit#7 >= 150.00)) AND (ws_net_profit#7 <= 300.00))) OR ((ca_state#28 IN (LA,IA,AR) AND (ws_net_profit#7 >= 50.00)) AND (ws_net_profit#7 <= 250.00))) + +(34) Project [codegen id : 8] +Output [5]: [ws_sold_date_sk#1, ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [9]: [ws_sold_date_sk#1, ws_quantity#5, ws_net_profit#7, wr_refunded_addr_sk#10, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, ca_address_sk#27, ca_state#28] + +(35) Scan parquet default.date_dim +Output [2]: [d_date_sk#31, d_year#32] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] +ReadSchema: struct + +(36) ColumnarToRow [codegen id : 6] +Input [2]: [d_date_sk#31, d_year#32] + +(37) Filter [codegen id : 6] +Input [2]: [d_date_sk#31, d_year#32] +Condition : ((isnotnull(d_year#32) AND (d_year#32 = 2000)) AND isnotnull(d_date_sk#31)) + +(38) Project [codegen id : 6] +Output [1]: [d_date_sk#31] +Input [2]: [d_date_sk#31, d_year#32] + +(39) BroadcastExchange +Input [1]: [d_date_sk#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] + +(40) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#31] +Join condition: None + +(41) Project [codegen id : 8] +Output [4]: [ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15] +Input [6]: [ws_sold_date_sk#1, ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, d_date_sk#31] + +(42) Scan parquet default.reason +Output [2]: [r_reason_sk#34, r_reason_desc#35] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_sk)] +ReadSchema: struct + +(43) ColumnarToRow [codegen id : 7] +Input [2]: [r_reason_sk#34, r_reason_desc#35] + +(44) Filter [codegen id : 7] +Input [2]: [r_reason_sk#34, r_reason_desc#35] +Condition : isnotnull(r_reason_sk#34) + +(45) BroadcastExchange +Input [2]: [r_reason_sk#34, r_reason_desc#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] + +(46) BroadcastHashJoin [codegen id : 8] +Left keys [1]: [wr_reason_sk#12] +Right keys [1]: [cast(r_reason_sk#34 as bigint)] +Join condition: None + +(47) Project [codegen id : 8] +Output [4]: [ws_quantity#5, wr_fee#14, wr_refunded_cash#15, r_reason_desc#35] +Input [6]: [ws_quantity#5, wr_reason_sk#12, wr_fee#14, wr_refunded_cash#15, r_reason_sk#34, r_reason_desc#35] + +(48) HashAggregate [codegen id : 8] +Input [4]: [ws_quantity#5, wr_fee#14, wr_refunded_cash#15, r_reason_desc#35] +Keys [1]: [r_reason_desc#35] +Functions [3]: [partial_avg(cast(ws_quantity#5 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#15)), partial_avg(UnscaledValue(wr_fee#14))] +Aggregate Attributes [6]: [sum#37, count#38, sum#39, count#40, sum#41, count#42] +Results [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] + +(49) Exchange +Input [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Arguments: hashpartitioning(r_reason_desc#35, 5), ENSURE_REQUIREMENTS, [id=#49] + +(50) HashAggregate [codegen id : 9] +Input [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Keys [1]: [r_reason_desc#35] +Functions [3]: [avg(cast(ws_quantity#5 as bigint)), avg(UnscaledValue(wr_refunded_cash#15)), avg(UnscaledValue(wr_fee#14))] +Aggregate Attributes [3]: [avg(cast(ws_quantity#5 as bigint))#50, avg(UnscaledValue(wr_refunded_cash#15))#51, avg(UnscaledValue(wr_fee#14))#52] +Results [4]: [substr(r_reason_desc#35, 1, 20) AS substr(r_reason_desc, 1, 20)#53, avg(cast(ws_quantity#5 as bigint))#50 AS avg(ws_quantity)#54, cast((avg(UnscaledValue(wr_refunded_cash#15))#51 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#55, cast((avg(UnscaledValue(wr_fee#14))#52 / 100.0) as decimal(11,6)) AS avg(wr_fee)#56] + +(51) TakeOrderedAndProject +Input [4]: [substr(r_reason_desc, 1, 20)#53, avg(ws_quantity)#54, avg(wr_refunded_cash)#55, avg(wr_fee)#56] +Arguments: 100, [substr(r_reason_desc, 1, 20)#53 ASC NULLS FIRST, avg(ws_quantity)#54 ASC NULLS FIRST, avg(wr_refunded_cash)#55 ASC NULLS FIRST, avg(wr_fee)#56 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#53, avg(ws_quantity)#54, avg(wr_refunded_cash)#55, avg(wr_fee)#56] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt index 86a427106..b95873716 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q85/simplified.txt @@ -1,66 +1,76 @@ -TakeOrderedAndProject [aggOrder,avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),substring(r_reason_desc, 1, 20)] - WholeStageCodegen - HashAggregate [avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),avg(cast(ws_quantity as bigint)),count,count,count,r_reason_desc,sum,sum,sum] [aggOrder,avg(UnscaledValue(wr_fee)),avg(UnscaledValue(wr_refunded_cash)),avg(cast(ws_quantity as bigint)),avg(wr_fee),avg(wr_refunded_cash),avg(ws_quantity),count,count,count,substring(r_reason_desc, 1, 20),sum,sum,sum] +TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee)] + WholeStageCodegen (9) + HashAggregate [r_reason_desc,sum,count,sum,count,sum,count] [avg(cast(ws_quantity as bigint)),avg(UnscaledValue(wr_refunded_cash)),avg(UnscaledValue(wr_fee)),substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refunded_cash),avg(wr_fee),sum,count,sum,count,sum,count] InputAdapter Exchange [r_reason_desc] #1 - WholeStageCodegen - HashAggregate [count,count,count,count,count,count,r_reason_desc,sum,sum,sum,sum,sum,sum,wr_fee,wr_refunded_cash,ws_quantity] [count,count,count,count,count,count,sum,sum,sum,sum,sum,sum] - Project [r_reason_desc,wr_fee,wr_refunded_cash,ws_quantity] - BroadcastHashJoin [r_reason_sk,wr_reason_sk] - Project [wr_fee,wr_reason_sk,wr_refunded_cash,ws_quantity] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [wr_fee,wr_reason_sk,wr_refunded_cash,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [ca_address_sk,ca_state,wr_refunded_addr_sk,ws_net_profit] - Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,ws_net_profit,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_education_status,cd_marital_status,cd_marital_status,wr_returning_cdemo_sk] - Project [cd_education_status,cd_marital_status,wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sold_date_sk] - BroadcastHashJoin [cd_demo_sk,cd_education_status,cd_marital_status,wr_refunded_cdemo_sk,ws_sales_price] - Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price,ws_sold_date_sk] - BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] - Project [wr_fee,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,ws_net_profit,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] - BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number] - Project [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] - Filter [ws_item_sk,ws_order_number,ws_sold_date_sk,ws_web_page_sk] - Scan parquet default.web_sales [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] [ws_item_sk,ws_net_profit,ws_order_number,ws_quantity,ws_sales_price,ws_sold_date_sk,ws_web_page_sk] + WholeStageCodegen (8) + HashAggregate [r_reason_desc,ws_quantity,wr_refunded_cash,wr_fee] [sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] + Project [ws_quantity,wr_fee,wr_refunded_cash,r_reason_desc] + BroadcastHashJoin [wr_reason_sk,r_reason_sk] + Project [ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [wr_refunded_addr_sk,ca_address_sk,ca_state,ws_net_profit] + Project [ws_sold_date_sk,ws_quantity,ws_net_profit,wr_refunded_addr_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [wr_returning_cdemo_sk,cd_marital_status,cd_education_status,cd_demo_sk,cd_marital_status,cd_education_status] + Project [ws_sold_date_sk,ws_quantity,ws_net_profit,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash,cd_marital_status,cd_education_status] + BroadcastHashJoin [wr_refunded_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ws_sales_price] + Project [ws_sold_date_sk,ws_quantity,ws_sales_price,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] + Project [ws_sold_date_sk,ws_web_page_sk,ws_quantity,ws_sales_price,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash] + BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number] + Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sold_date_sk,ws_sales_price,ws_net_profit] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] - Filter [wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] - Scan parquet default.web_returns [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] [wr_fee,wr_item_sk,wr_order_number,wr_reason_sk,wr_refunded_addr_sk,wr_refunded_cash,wr_refunded_cdemo_sk,wr_returning_cdemo_sk] + WholeStageCodegen (1) + Filter [wr_item_sk,wr_order_number,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [wp_web_page_sk] - Filter [wp_web_page_sk] - Scan parquet default.web_page [wp_web_page_sk] [wp_web_page_sk] + WholeStageCodegen (2) + Filter [wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_web_page_sk] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [cd_demo_sk,cd_education_status,cd_marital_status] - Filter [cd_demo_sk,cd_education_status,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] + WholeStageCodegen (3) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [cd_demo_sk,cd_education_status,cd_marital_status] - Filter [cd_demo_sk,cd_education_status,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] + WholeStageCodegen (4) + Filter [cd_demo_sk,cd_marital_status,cd_education_status] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (5) Project [ca_address_sk,ca_state] - Filter [ca_address_sk,ca_country] - Scan parquet default.customer_address [ca_address_sk,ca_country,ca_state] [ca_address_sk,ca_country,ca_state] + Filter [ca_country,ca_address_sk,ca_state] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country] InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (6) Project [d_date_sk] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] InputAdapter BroadcastExchange #8 - WholeStageCodegen - Project [r_reason_desc,r_reason_sk] - Filter [r_reason_sk] - Scan parquet default.reason [r_reason_desc,r_reason_sk] [r_reason_desc,r_reason_sk] + WholeStageCodegen (7) + Filter [r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet default.reason [r_reason_sk,r_reason_desc] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt index a72090405..20ae4d244 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/explain.txt @@ -1,25 +1,142 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[lochierarchy#1 DESC NULLS LAST,CASE WHEN (cast(lochierarchy#1 as int) = 0) THEN i_category#2 END ASC NULLS FIRST,rank_within_parent#3 ASC NULLS FIRST], output=[total_sum#4,i_category#2,i_class#5,lochierarchy#1,rank_within_parent#3]) -+- *(6) Project [total_sum#4, i_category#2, i_class#5, lochierarchy#1, rank_within_parent#3] - +- Window [rank(_w3#6) windowspecdefinition(_w1#7, _w2#8, _w3#6 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#3], [_w1#7, _w2#8], [_w3#6 DESC NULLS LAST] - +- *(5) Sort [_w1#7 ASC NULLS FIRST, _w2#8 ASC NULLS FIRST, _w3#6 DESC NULLS LAST], false, 0 - +- Exchange hashpartitioning(_w1#7, _w2#8, 5) - +- *(4) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[sum(UnscaledValue(ws_net_paid#10))]) - +- Exchange hashpartitioning(i_category#2, i_class#5, spark_grouping_id#9, 5) - +- *(3) HashAggregate(keys=[i_category#2, i_class#5, spark_grouping_id#9], functions=[partial_sum(UnscaledValue(ws_net_paid#10))]) - +- *(3) Expand [List(ws_net_paid#10, i_category#11, i_class#12, 0), List(ws_net_paid#10, i_category#11, null, 1), List(ws_net_paid#10, null, null, 3)], [ws_net_paid#10, i_category#2, i_class#5, spark_grouping_id#9] - +- *(3) Project [ws_net_paid#10, i_category#13 AS i_category#11, i_class#14 AS i_class#12] - +- *(3) BroadcastHashJoin [ws_item_sk#15], [i_item_sk#16], Inner, BuildRight - :- *(3) Project [ws_item_sk#15, ws_net_paid#10] - : +- *(3) BroadcastHashJoin [ws_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : :- *(3) Project [ws_sold_date_sk#17, ws_item_sk#15, ws_net_paid#10] - : : +- *(3) Filter (isnotnull(ws_sold_date_sk#17) && isnotnull(ws_item_sk#15)) - : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#17,ws_item_sk#15,ws_net_paid#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [d_date_sk#18] - : +- *(1) Filter (((isnotnull(d_month_seq#19) && (d_month_seq#19 >= 1200)) && (d_month_seq#19 <= 1211)) && isnotnull(d_date_sk#18)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#18,d_month_seq#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [i_item_sk#16, i_class#14, i_category#13] - +- *(2) Filter isnotnull(i_item_sk#16) - +- *(2) FileScan parquet default.item[i_item_sk#16,i_class#14,i_category#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_item_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (25) ++- * Project (24) + +- Window (23) + +- * Sort (22) + +- Exchange (21) + +- * HashAggregate (20) + +- Exchange (19) + +- * HashAggregate (18) + +- * Expand (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.web_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- BroadcastExchange (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.item (11) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] + +(3) Filter [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3] +Condition : (isnotnull(ws_sold_date_sk#1) AND isnotnull(ws_item_sk#2)) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 3] +Output [2]: [ws_item_sk#2, ws_net_paid#3] +Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_net_paid#3, d_date_sk#4] + +(11) Scan parquet default.item +Output [3]: [i_item_sk#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [i_item_sk#7, i_class#8, i_category#9] + +(13) Filter [codegen id : 2] +Input [3]: [i_item_sk#7, i_class#8, i_category#9] +Condition : isnotnull(i_item_sk#7) + +(14) BroadcastExchange +Input [3]: [i_item_sk#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#7] +Join condition: None + +(16) Project [codegen id : 3] +Output [3]: [ws_net_paid#3, i_category#9, i_class#8] +Input [5]: [ws_item_sk#2, ws_net_paid#3, i_item_sk#7, i_class#8, i_category#9] + +(17) Expand [codegen id : 3] +Input [3]: [ws_net_paid#3, i_category#9, i_class#8] +Arguments: [List(ws_net_paid#3, i_category#9, i_class#8, 0), List(ws_net_paid#3, i_category#9, null, 1), List(ws_net_paid#3, null, null, 3)], [ws_net_paid#3, i_category#11, i_class#12, spark_grouping_id#13] + +(18) HashAggregate [codegen id : 3] +Input [4]: [ws_net_paid#3, i_category#11, i_class#12, spark_grouping_id#13] +Keys [3]: [i_category#11, i_class#12, spark_grouping_id#13] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum#14] +Results [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] + +(19) Exchange +Input [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] +Arguments: hashpartitioning(i_category#11, i_class#12, spark_grouping_id#13, 5), true, [id=#16] + +(20) HashAggregate [codegen id : 4] +Input [4]: [i_category#11, i_class#12, spark_grouping_id#13, sum#15] +Keys [3]: [i_category#11, i_class#12, spark_grouping_id#13] +Functions [1]: [sum(UnscaledValue(ws_net_paid#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#17] +Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#17,17,2) AS total_sum#18, i_category#11, i_class#12, (cast((shiftright(spark_grouping_id#13, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint)) AS lochierarchy#19, (cast((shiftright(spark_grouping_id#13, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint)) AS _w1#20, CASE WHEN (cast(cast((shiftright(spark_grouping_id#13, 0) & 1) as tinyint) as int) = 0) THEN i_category#11 END AS _w2#21, MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#17,17,2) AS _w3#22] + +(21) Exchange +Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] +Arguments: hashpartitioning(_w1#20, _w2#21, 5), true, [id=#23] + +(22) Sort [codegen id : 5] +Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] +Arguments: [_w1#20 ASC NULLS FIRST, _w2#21 ASC NULLS FIRST, _w3#22 DESC NULLS LAST], false, 0 + +(23) Window +Input [7]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22] +Arguments: [rank(_w3#22) windowspecdefinition(_w1#20, _w2#21, _w3#22 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#24], [_w1#20, _w2#21], [_w3#22 DESC NULLS LAST] + +(24) Project [codegen id : 6] +Output [5]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] +Input [8]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, _w1#20, _w2#21, _w3#22, rank_within_parent#24] + +(25) TakeOrderedAndProject +Input [5]: [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] +Arguments: 100, [lochierarchy#19 DESC NULLS LAST, CASE WHEN (cast(lochierarchy#19 as int) = 0) THEN i_category#11 END ASC NULLS FIRST, rank_within_parent#24 ASC NULLS FIRST], [total_sum#18, i_category#11, i_class#12, lochierarchy#19, rank_within_parent#24] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt index 5ec040bda..cac22cb0f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q86/simplified.txt @@ -1,35 +1,39 @@ -TakeOrderedAndProject [i_category,i_class,lochierarchy,rank_within_parent,total_sum] - WholeStageCodegen - Project [i_category,i_class,lochierarchy,rank_within_parent,total_sum] +TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class] + WholeStageCodegen (6) + Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent] InputAdapter - Window [_w1,_w2,_w3] - WholeStageCodegen + Window [_w3,_w1,_w2] + WholeStageCodegen (5) Sort [_w1,_w2,_w3] InputAdapter Exchange [_w1,_w2] #1 - WholeStageCodegen - HashAggregate [i_category,i_class,spark_grouping_id,sum,sum(UnscaledValue(ws_net_paid))] [_w1,_w2,_w3,lochierarchy,sum,sum(UnscaledValue(ws_net_paid)),total_sum] + WholeStageCodegen (4) + HashAggregate [i_category,i_class,spark_grouping_id,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,lochierarchy,_w1,_w2,_w3,sum] InputAdapter Exchange [i_category,i_class,spark_grouping_id] #2 - WholeStageCodegen - HashAggregate [i_category,i_class,spark_grouping_id,sum,sum,ws_net_paid] [sum,sum] - Expand [i_category,i_class,ws_net_paid] - Project [i_category,i_class,ws_net_paid] - BroadcastHashJoin [i_item_sk,ws_item_sk] + WholeStageCodegen (3) + HashAggregate [i_category,i_class,spark_grouping_id,ws_net_paid] [sum,sum] + Expand [ws_net_paid,i_category,i_class] + Project [ws_net_paid,i_category,i_class] + BroadcastHashJoin [ws_item_sk,i_item_sk] Project [ws_item_sk,ws_net_paid] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_item_sk,ws_net_paid,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk] [ws_item_sk,ws_net_paid,ws_sold_date_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (1) Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [i_category,i_class,i_item_sk] - Filter [i_item_sk] - Scan parquet default.item [i_category,i_class,i_item_sk] [i_category,i_class,i_item_sk] + WholeStageCodegen (2) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_class,i_category] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt index e3689bd7b..3d59a670b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/explain.txt @@ -1,54 +1,323 @@ == Physical Plan == -*(13) HashAggregate(keys=[], functions=[count(1)]) -+- Exchange SinglePartition - +- *(12) HashAggregate(keys=[], functions=[partial_count(1)]) - +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#4, ), coalesce(c_first_name#5, ), coalesce(d_date#6, 0)], LeftAnti, BuildRight, (((c_last_name#1 <=> c_last_name#4) && (c_first_name#2 <=> c_first_name#5)) && (d_date#3 <=> d_date#6)) - :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : +- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : +- *(12) BroadcastHashJoin [coalesce(c_last_name#1, ), coalesce(c_first_name#2, ), coalesce(d_date#3, 0)], [coalesce(c_last_name#7, ), coalesce(c_first_name#8, ), coalesce(d_date#9, 0)], LeftAnti, BuildRight, (((c_last_name#1 <=> c_last_name#7) && (c_first_name#2 <=> c_first_name#8)) && (d_date#3 <=> d_date#9)) - : :- *(12) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : : +- Exchange hashpartitioning(c_last_name#1, c_first_name#2, d_date#3, 5) - : : +- *(3) HashAggregate(keys=[c_last_name#1, c_first_name#2, d_date#3], functions=[]) - : : +- *(3) Project [c_last_name#1, c_first_name#2, d_date#3] - : : +- *(3) BroadcastHashJoin [ss_customer_sk#10], [c_customer_sk#11], Inner, BuildRight - : : :- *(3) Project [ss_customer_sk#10, d_date#3] - : : : +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - : : : :- *(3) Project [ss_sold_date_sk#12, ss_customer_sk#10] - : : : : +- *(3) Filter (isnotnull(ss_sold_date_sk#12) && isnotnull(ss_customer_sk#10)) - : : : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_customer_sk#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [d_date_sk#13, d_date#3] - : : : +- *(1) Filter (((isnotnull(d_month_seq#14) && (d_month_seq#14 >= 1200)) && (d_month_seq#14 <= 1211)) && isnotnull(d_date_sk#13)) - : : : +- *(1) FileScan parquet default.date_dim[d_date_sk#13,d_date#3,d_month_seq#14] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [c_customer_sk#11, c_first_name#2, c_last_name#1] - : : +- *(2) Filter isnotnull(c_customer_sk#11) - : : +- *(2) FileScan parquet default.customer[c_customer_sk#11,c_first_name#2,c_last_name#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) - : +- *(7) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) - : +- Exchange hashpartitioning(c_last_name#7, c_first_name#8, d_date#9, 5) - : +- *(6) HashAggregate(keys=[c_last_name#7, c_first_name#8, d_date#9], functions=[]) - : +- *(6) Project [c_last_name#7, c_first_name#8, d_date#9] - : +- *(6) BroadcastHashJoin [cs_bill_customer_sk#15], [c_customer_sk#16], Inner, BuildRight - : :- *(6) Project [cs_bill_customer_sk#15, d_date#9] - : : +- *(6) BroadcastHashJoin [cs_sold_date_sk#17], [d_date_sk#18], Inner, BuildRight - : : :- *(6) Project [cs_sold_date_sk#17, cs_bill_customer_sk#15] - : : : +- *(6) Filter (isnotnull(cs_sold_date_sk#17) && isnotnull(cs_bill_customer_sk#15)) - : : : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#17,cs_bill_customer_sk#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)], ReadSchema: struct - : : +- ReusedExchange [d_date_sk#18, d_date#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- ReusedExchange [c_customer_sk#16, c_first_name#8, c_last_name#7], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- BroadcastExchange HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), coalesce(input[1, string, true], ), coalesce(input[2, date, true], 0))) - +- *(11) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) - +- Exchange hashpartitioning(c_last_name#4, c_first_name#5, d_date#6, 5) - +- *(10) HashAggregate(keys=[c_last_name#4, c_first_name#5, d_date#6], functions=[]) - +- *(10) Project [c_last_name#4, c_first_name#5, d_date#6] - +- *(10) BroadcastHashJoin [ws_bill_customer_sk#19], [c_customer_sk#20], Inner, BuildRight - :- *(10) Project [ws_bill_customer_sk#19, d_date#6] - : +- *(10) BroadcastHashJoin [ws_sold_date_sk#21], [d_date_sk#22], Inner, BuildRight - : :- *(10) Project [ws_sold_date_sk#21, ws_bill_customer_sk#19] - : : +- *(10) Filter (isnotnull(ws_sold_date_sk#21) && isnotnull(ws_bill_customer_sk#19)) - : : +- *(10) FileScan parquet default.web_sales[ws_sold_date_sk#21,ws_bill_customer_sk#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)], ReadSchema: struct - : +- ReusedExchange [d_date_sk#22, d_date#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- ReusedExchange [c_customer_sk#20, c_first_name#5, c_last_name#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +* HashAggregate (54) ++- Exchange (53) + +- * HashAggregate (52) + +- * HashAggregate (51) + +- * HashAggregate (50) + +- * HashAggregate (49) + +- * HashAggregate (48) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * BroadcastHashJoin LeftAnti BuildRight (44) + :- * BroadcastHashJoin LeftAnti BuildRight (30) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer (11) + : +- BroadcastExchange (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.catalog_sales (17) + : : +- ReusedExchange (20) + : +- ReusedExchange (23) + +- BroadcastExchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.web_sales (31) + : +- ReusedExchange (34) + +- ReusedExchange (37) + + +(1) Scan parquet default.store_sales +Output [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 11] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] + +(3) Filter [codegen id : 11] +Input [2]: [ss_sold_date_sk#1, ss_customer_sk#2] +Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_customer_sk#2)) + +(4) Scan parquet default.date_dim +Output [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#3)) + +(7) Project [codegen id : 1] +Output [2]: [d_date_sk#3, d_date#4] +Input [3]: [d_date_sk#3, d_date#4, d_month_seq#5] + +(8) BroadcastExchange +Input [2]: [d_date_sk#3, d_date#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#3] +Join condition: None + +(10) Project [codegen id : 11] +Output [2]: [ss_customer_sk#2, d_date#4] +Input [4]: [ss_sold_date_sk#1, ss_customer_sk#2, d_date_sk#3, d_date#4] + +(11) Scan parquet default.customer +Output [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] + +(13) Filter [codegen id : 2] +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Condition : isnotnull(c_customer_sk#7) + +(14) BroadcastExchange +Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(15) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [ss_customer_sk#2] +Right keys [1]: [c_customer_sk#7] +Join condition: None + +(16) Project [codegen id : 11] +Output [3]: [d_date#4, c_first_name#8, c_last_name#9] +Input [5]: [ss_customer_sk#2, d_date#4, c_customer_sk#7, c_first_name#8, c_last_name#9] + +(17) Scan parquet default.catalog_sales +Output [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 5] +Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] + +(19) Filter [codegen id : 5] +Input [2]: [cs_sold_date_sk#11, cs_bill_customer_sk#12] +Condition : (isnotnull(cs_sold_date_sk#11) AND isnotnull(cs_bill_customer_sk#12)) + +(20) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#13, d_date#14] + +(21) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#11] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(22) Project [codegen id : 5] +Output [2]: [cs_bill_customer_sk#12, d_date#14] +Input [4]: [cs_sold_date_sk#11, cs_bill_customer_sk#12, d_date_sk#13, d_date#14] + +(23) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#15, c_first_name#16, c_last_name#17] + +(24) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_bill_customer_sk#12] +Right keys [1]: [c_customer_sk#15] +Join condition: None + +(25) Project [codegen id : 5] +Output [3]: [c_last_name#17, c_first_name#16, d_date#14] +Input [5]: [cs_bill_customer_sk#12, d_date#14, c_customer_sk#15, c_first_name#16, c_last_name#17] + +(26) HashAggregate [codegen id : 5] +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#17, c_first_name#16, d_date#14] + +(27) Exchange +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Arguments: hashpartitioning(c_last_name#17, c_first_name#16, d_date#14, 5), true, [id=#18] + +(28) HashAggregate [codegen id : 6] +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#17, c_first_name#16, d_date#14] + +(29) BroadcastExchange +Input [3]: [c_last_name#17, c_first_name#16, d_date#14] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#19] + +(30) BroadcastHashJoin [codegen id : 11] +Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#17, ), isnull(c_last_name#17), coalesce(c_first_name#16, ), isnull(c_first_name#16), coalesce(d_date#14, 0), isnull(d_date#14)] +Join condition: None + +(31) Scan parquet default.web_sales +Output [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)] +ReadSchema: struct + +(32) ColumnarToRow [codegen id : 9] +Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] + +(33) Filter [codegen id : 9] +Input [2]: [ws_sold_date_sk#20, ws_bill_customer_sk#21] +Condition : (isnotnull(ws_sold_date_sk#20) AND isnotnull(ws_bill_customer_sk#21)) + +(34) ReusedExchange [Reuses operator id: 8] +Output [2]: [d_date_sk#22, d_date#23] + +(35) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_date_sk#20] +Right keys [1]: [d_date_sk#22] +Join condition: None + +(36) Project [codegen id : 9] +Output [2]: [ws_bill_customer_sk#21, d_date#23] +Input [4]: [ws_sold_date_sk#20, ws_bill_customer_sk#21, d_date_sk#22, d_date#23] + +(37) ReusedExchange [Reuses operator id: 14] +Output [3]: [c_customer_sk#24, c_first_name#25, c_last_name#26] + +(38) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_bill_customer_sk#21] +Right keys [1]: [c_customer_sk#24] +Join condition: None + +(39) Project [codegen id : 9] +Output [3]: [c_last_name#26, c_first_name#25, d_date#23] +Input [5]: [ws_bill_customer_sk#21, d_date#23, c_customer_sk#24, c_first_name#25, c_last_name#26] + +(40) HashAggregate [codegen id : 9] +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#26, c_first_name#25, d_date#23] + +(41) Exchange +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Arguments: hashpartitioning(c_last_name#26, c_first_name#25, d_date#23, 5), true, [id=#27] + +(42) HashAggregate [codegen id : 10] +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#26, c_first_name#25, d_date#23] + +(43) BroadcastExchange +Input [3]: [c_last_name#26, c_first_name#25, d_date#23] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 0), isnull(input[2, date, true])),false), [id=#28] + +(44) BroadcastHashJoin [codegen id : 11] +Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#4, 0), isnull(d_date#4)] +Right keys [6]: [coalesce(c_last_name#26, ), isnull(c_last_name#26), coalesce(c_first_name#25, ), isnull(c_first_name#25), coalesce(d_date#23, 0), isnull(d_date#23)] +Join condition: None + +(45) HashAggregate [codegen id : 11] +Input [3]: [d_date#4, c_first_name#8, c_last_name#9] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(46) Exchange +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#4, 5), true, [id=#29] + +(47) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(48) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(49) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(50) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results [3]: [c_last_name#9, c_first_name#8, d_date#4] + +(51) HashAggregate [codegen id : 12] +Input [3]: [c_last_name#9, c_first_name#8, d_date#4] +Keys [3]: [c_last_name#9, c_first_name#8, d_date#4] +Functions: [] +Aggregate Attributes: [] +Results: [] + +(52) HashAggregate [codegen id : 12] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#30] +Results [1]: [count#31] + +(53) Exchange +Input [1]: [count#31] +Arguments: SinglePartition, true, [id=#32] + +(54) HashAggregate [codegen id : 13] +Input [1]: [count#31] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#33] +Results [1]: [count(1)#33 AS count(1)#34] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt index 1a7672916..a5b57a4ac 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q87/simplified.txt @@ -1,74 +1,80 @@ -WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] +WholeStageCodegen (13) + HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] - HashAggregate [c_first_name,c_last_name,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_first_name,c_first_name,c_last_name,c_last_name,d_date,d_date] - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #2 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,ss_customer_sk] - Project [d_date,ss_customer_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_sold_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] + WholeStageCodegen (12) + HashAggregate [count,count] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (11) + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] + Project [d_date,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [d_date,d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date,d_date_sk,d_month_seq] [d_date,d_date_sk,d_month_seq] + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [c_customer_sk,c_first_name,c_last_name] - Filter [c_customer_sk] - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #6 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,cs_bill_customer_sk] - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_customer_sk,cs_sold_date_sk] - Filter [cs_bill_customer_sk,cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_sold_date_sk] + WholeStageCodegen (2) + Filter [c_customer_sk] + ColumnarToRow InputAdapter - ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 - InputAdapter - BroadcastExchange #7 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - InputAdapter - Exchange [c_first_name,c_last_name,d_date] #8 - WholeStageCodegen - HashAggregate [c_first_name,c_last_name,d_date] - Project [c_first_name,c_last_name,d_date] - BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk] - Project [d_date,ws_bill_customer_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_bill_customer_sk,ws_sold_date_sk] - Filter [ws_bill_customer_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk] [ws_bill_customer_sk,ws_sold_date_sk] + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (6) + HashAggregate [c_last_name,c_first_name,d_date] InputAdapter - ReusedExchange [d_date,d_date_sk] [d_date,d_date_sk] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] [c_customer_sk,c_first_name,c_last_name] #4 + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen (5) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt index 7eea370f8..22297e02e 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/explain.txt @@ -1,165 +1,960 @@ == Physical Plan == -BroadcastNestedLoopJoin BuildRight, Inner -:- BroadcastNestedLoopJoin BuildRight, Inner -: :- BroadcastNestedLoopJoin BuildRight, Inner -: : :- BroadcastNestedLoopJoin BuildRight, Inner -: : : :- BroadcastNestedLoopJoin BuildRight, Inner -: : : : :- BroadcastNestedLoopJoin BuildRight, Inner -: : : : : :- BroadcastNestedLoopJoin BuildRight, Inner -: : : : : : :- *(5) HashAggregate(keys=[], functions=[count(1)]) -: : : : : : : +- Exchange SinglePartition -: : : : : : : +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) -: : : : : : : +- *(4) Project -: : : : : : : +- *(4) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : : : : : : :- *(4) Project [ss_store_sk#1] -: : : : : : : : +- *(4) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : : : : : : :- *(4) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : : : : : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : : : : : : :- *(4) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : : : : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : : : : +- *(1) Project [hd_demo_sk#6] -: : : : : : : : : +- *(1) Filter (((((hd_dep_count#7 = 4) && (hd_vehicle_count#8 <= 6)) || ((hd_dep_count#7 = 2) && (hd_vehicle_count#8 <= 4))) || ((hd_dep_count#7 = 0) && (hd_vehicle_count#8 <= 2))) && isnotnull(hd_demo_sk#6)) -: : : : : : : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#6,hd_dep_count#7,hd_vehicle_count#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,..., ReadSchema: struct -: : : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : : : +- *(2) Project [t_time_sk#4] -: : : : : : : : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 8)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : : : : : : : +- *(2) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct -: : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : : +- *(3) Project [s_store_sk#2] -: : : : : : : +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#2)) -: : : : : : : +- *(3) FileScan parquet default.store[s_store_sk#2,s_store_name#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct -: : : : : : +- BroadcastExchange IdentityBroadcastMode -: : : : : : +- *(10) HashAggregate(keys=[], functions=[count(1)]) -: : : : : : +- Exchange SinglePartition -: : : : : : +- *(9) HashAggregate(keys=[], functions=[partial_count(1)]) -: : : : : : +- *(9) Project -: : : : : : +- *(9) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : : : : : :- *(9) Project [ss_store_sk#1] -: : : : : : : +- *(9) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : : : : : :- *(9) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : : : : : +- *(9) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : : : : : :- *(9) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : : : : : +- *(9) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : : : +- *(9) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : : +- *(7) Project [t_time_sk#4] -: : : : : : : +- *(7) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) -: : : : : : : +- *(7) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_ti..., ReadSchema: struct -: : : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : +- BroadcastExchange IdentityBroadcastMode -: : : : : +- *(15) HashAggregate(keys=[], functions=[count(1)]) -: : : : : +- Exchange SinglePartition -: : : : : +- *(14) HashAggregate(keys=[], functions=[partial_count(1)]) -: : : : : +- *(14) Project -: : : : : +- *(14) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : : : : :- *(14) Project [ss_store_sk#1] -: : : : : : +- *(14) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : : : : :- *(14) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : : : : +- *(14) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : : : : :- *(14) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : : : : +- *(14) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : : +- *(14) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : : +- *(12) Project [t_time_sk#4] -: : : : : : +- *(12) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 9)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : : : : : +- *(12) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNo..., ReadSchema: struct -: : : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : +- BroadcastExchange IdentityBroadcastMode -: : : : +- *(20) HashAggregate(keys=[], functions=[count(1)]) -: : : : +- Exchange SinglePartition -: : : : +- *(19) HashAggregate(keys=[], functions=[partial_count(1)]) -: : : : +- *(19) Project -: : : : +- *(19) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : : : :- *(19) Project [ss_store_sk#1] -: : : : : +- *(19) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : : : :- *(19) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : : : +- *(19) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : : : :- *(19) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : : : +- *(19) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : : +- *(19) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : : +- *(17) Project [t_time_sk#4] -: : : : : +- *(17) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) -: : : : : +- *(17) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct -: : : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : +- BroadcastExchange IdentityBroadcastMode -: : : +- *(25) HashAggregate(keys=[], functions=[count(1)]) -: : : +- Exchange SinglePartition -: : : +- *(24) HashAggregate(keys=[], functions=[partial_count(1)]) -: : : +- *(24) Project -: : : +- *(24) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : : :- *(24) Project [ss_store_sk#1] -: : : : +- *(24) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : : :- *(24) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : : +- *(24) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : : :- *(24) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : : +- *(24) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : : +- *(24) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : : +- *(22) Project [t_time_sk#4] -: : : : +- *(22) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 10)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : : : +- *(22) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct -: : : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : +- BroadcastExchange IdentityBroadcastMode -: : +- *(30) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(29) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(29) Project -: : +- *(29) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: : :- *(29) Project [ss_store_sk#1] -: : : +- *(29) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : : :- *(29) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : : +- *(29) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : : :- *(29) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : : +- *(29) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : : +- *(29) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : : +- *(27) Project [t_time_sk#4] -: : : +- *(27) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) -: : : +- *(27) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct -: : +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: +- BroadcastExchange IdentityBroadcastMode -: +- *(35) HashAggregate(keys=[], functions=[count(1)]) -: +- Exchange SinglePartition -: +- *(34) HashAggregate(keys=[], functions=[partial_count(1)]) -: +- *(34) Project -: +- *(34) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight -: :- *(34) Project [ss_store_sk#1] -: : +- *(34) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight -: : :- *(34) Project [ss_sold_time_sk#3, ss_store_sk#1] -: : : +- *(34) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight -: : : :- *(34) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] -: : : : +- *(34) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) -: : : : +- *(34) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct -: : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -: : +- *(32) Project [t_time_sk#4] -: : +- *(32) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 11)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#4)) -: : +- *(32) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct -: +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) -+- BroadcastExchange IdentityBroadcastMode - +- *(40) HashAggregate(keys=[], functions=[count(1)]) - +- Exchange SinglePartition - +- *(39) HashAggregate(keys=[], functions=[partial_count(1)]) - +- *(39) Project - +- *(39) BroadcastHashJoin [ss_store_sk#1], [s_store_sk#2], Inner, BuildRight - :- *(39) Project [ss_store_sk#1] - : +- *(39) BroadcastHashJoin [ss_sold_time_sk#3], [t_time_sk#4], Inner, BuildRight - : :- *(39) Project [ss_sold_time_sk#3, ss_store_sk#1] - : : +- *(39) BroadcastHashJoin [ss_hdemo_sk#5], [hd_demo_sk#6], Inner, BuildRight - : : :- *(39) Project [ss_sold_time_sk#3, ss_hdemo_sk#5, ss_store_sk#1] - : : : +- *(39) Filter ((isnotnull(ss_hdemo_sk#5) && isnotnull(ss_sold_time_sk#3)) && isnotnull(ss_store_sk#1)) - : : : +- *(39) FileScan parquet default.store_sales[ss_sold_time_sk#3,ss_hdemo_sk#5,ss_store_sk#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : +- ReusedExchange [hd_demo_sk#6], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(37) Project [t_time_sk#4] - : +- *(37) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 12)) && (t_minute#10 < 30)) && isnotnull(t_time_sk#4)) - : +- *(37) FileScan parquet default.time_dim[t_time_sk#4,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_t..., ReadSchema: struct - +- ReusedExchange [s_store_sk#2], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +BroadcastNestedLoopJoin Inner BuildRight (174) +:- BroadcastNestedLoopJoin Inner BuildRight (153) +: :- BroadcastNestedLoopJoin Inner BuildRight (132) +: : :- BroadcastNestedLoopJoin Inner BuildRight (111) +: : : :- BroadcastNestedLoopJoin Inner BuildRight (90) +: : : : :- BroadcastNestedLoopJoin Inner BuildRight (69) +: : : : : :- BroadcastNestedLoopJoin Inner BuildRight (48) +: : : : : : :- * HashAggregate (27) +: : : : : : : +- Exchange (26) +: : : : : : : +- * HashAggregate (25) +: : : : : : : +- * Project (24) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (23) +: : : : : : : :- * Project (17) +: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (16) +: : : : : : : : :- * Project (10) +: : : : : : : : : +- * BroadcastHashJoin Inner BuildRight (9) +: : : : : : : : : :- * Filter (3) +: : : : : : : : : : +- * ColumnarToRow (2) +: : : : : : : : : : +- Scan parquet default.store_sales (1) +: : : : : : : : : +- BroadcastExchange (8) +: : : : : : : : : +- * Project (7) +: : : : : : : : : +- * Filter (6) +: : : : : : : : : +- * ColumnarToRow (5) +: : : : : : : : : +- Scan parquet default.household_demographics (4) +: : : : : : : : +- BroadcastExchange (15) +: : : : : : : : +- * Project (14) +: : : : : : : : +- * Filter (13) +: : : : : : : : +- * ColumnarToRow (12) +: : : : : : : : +- Scan parquet default.time_dim (11) +: : : : : : : +- BroadcastExchange (22) +: : : : : : : +- * Project (21) +: : : : : : : +- * Filter (20) +: : : : : : : +- * ColumnarToRow (19) +: : : : : : : +- Scan parquet default.store (18) +: : : : : : +- BroadcastExchange (47) +: : : : : : +- * HashAggregate (46) +: : : : : : +- Exchange (45) +: : : : : : +- * HashAggregate (44) +: : : : : : +- * Project (43) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (42) +: : : : : : :- * Project (40) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (39) +: : : : : : : :- * Project (33) +: : : : : : : : +- * BroadcastHashJoin Inner BuildRight (32) +: : : : : : : : :- * Filter (30) +: : : : : : : : : +- * ColumnarToRow (29) +: : : : : : : : : +- Scan parquet default.store_sales (28) +: : : : : : : : +- ReusedExchange (31) +: : : : : : : +- BroadcastExchange (38) +: : : : : : : +- * Project (37) +: : : : : : : +- * Filter (36) +: : : : : : : +- * ColumnarToRow (35) +: : : : : : : +- Scan parquet default.time_dim (34) +: : : : : : +- ReusedExchange (41) +: : : : : +- BroadcastExchange (68) +: : : : : +- * HashAggregate (67) +: : : : : +- Exchange (66) +: : : : : +- * HashAggregate (65) +: : : : : +- * Project (64) +: : : : : +- * BroadcastHashJoin Inner BuildRight (63) +: : : : : :- * Project (61) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (60) +: : : : : : :- * Project (54) +: : : : : : : +- * BroadcastHashJoin Inner BuildRight (53) +: : : : : : : :- * Filter (51) +: : : : : : : : +- * ColumnarToRow (50) +: : : : : : : : +- Scan parquet default.store_sales (49) +: : : : : : : +- ReusedExchange (52) +: : : : : : +- BroadcastExchange (59) +: : : : : : +- * Project (58) +: : : : : : +- * Filter (57) +: : : : : : +- * ColumnarToRow (56) +: : : : : : +- Scan parquet default.time_dim (55) +: : : : : +- ReusedExchange (62) +: : : : +- BroadcastExchange (89) +: : : : +- * HashAggregate (88) +: : : : +- Exchange (87) +: : : : +- * HashAggregate (86) +: : : : +- * Project (85) +: : : : +- * BroadcastHashJoin Inner BuildRight (84) +: : : : :- * Project (82) +: : : : : +- * BroadcastHashJoin Inner BuildRight (81) +: : : : : :- * Project (75) +: : : : : : +- * BroadcastHashJoin Inner BuildRight (74) +: : : : : : :- * Filter (72) +: : : : : : : +- * ColumnarToRow (71) +: : : : : : : +- Scan parquet default.store_sales (70) +: : : : : : +- ReusedExchange (73) +: : : : : +- BroadcastExchange (80) +: : : : : +- * Project (79) +: : : : : +- * Filter (78) +: : : : : +- * ColumnarToRow (77) +: : : : : +- Scan parquet default.time_dim (76) +: : : : +- ReusedExchange (83) +: : : +- BroadcastExchange (110) +: : : +- * HashAggregate (109) +: : : +- Exchange (108) +: : : +- * HashAggregate (107) +: : : +- * Project (106) +: : : +- * BroadcastHashJoin Inner BuildRight (105) +: : : :- * Project (103) +: : : : +- * BroadcastHashJoin Inner BuildRight (102) +: : : : :- * Project (96) +: : : : : +- * BroadcastHashJoin Inner BuildRight (95) +: : : : : :- * Filter (93) +: : : : : : +- * ColumnarToRow (92) +: : : : : : +- Scan parquet default.store_sales (91) +: : : : : +- ReusedExchange (94) +: : : : +- BroadcastExchange (101) +: : : : +- * Project (100) +: : : : +- * Filter (99) +: : : : +- * ColumnarToRow (98) +: : : : +- Scan parquet default.time_dim (97) +: : : +- ReusedExchange (104) +: : +- BroadcastExchange (131) +: : +- * HashAggregate (130) +: : +- Exchange (129) +: : +- * HashAggregate (128) +: : +- * Project (127) +: : +- * BroadcastHashJoin Inner BuildRight (126) +: : :- * Project (124) +: : : +- * BroadcastHashJoin Inner BuildRight (123) +: : : :- * Project (117) +: : : : +- * BroadcastHashJoin Inner BuildRight (116) +: : : : :- * Filter (114) +: : : : : +- * ColumnarToRow (113) +: : : : : +- Scan parquet default.store_sales (112) +: : : : +- ReusedExchange (115) +: : : +- BroadcastExchange (122) +: : : +- * Project (121) +: : : +- * Filter (120) +: : : +- * ColumnarToRow (119) +: : : +- Scan parquet default.time_dim (118) +: : +- ReusedExchange (125) +: +- BroadcastExchange (152) +: +- * HashAggregate (151) +: +- Exchange (150) +: +- * HashAggregate (149) +: +- * Project (148) +: +- * BroadcastHashJoin Inner BuildRight (147) +: :- * Project (145) +: : +- * BroadcastHashJoin Inner BuildRight (144) +: : :- * Project (138) +: : : +- * BroadcastHashJoin Inner BuildRight (137) +: : : :- * Filter (135) +: : : : +- * ColumnarToRow (134) +: : : : +- Scan parquet default.store_sales (133) +: : : +- ReusedExchange (136) +: : +- BroadcastExchange (143) +: : +- * Project (142) +: : +- * Filter (141) +: : +- * ColumnarToRow (140) +: : +- Scan parquet default.time_dim (139) +: +- ReusedExchange (146) ++- BroadcastExchange (173) + +- * HashAggregate (172) + +- Exchange (171) + +- * HashAggregate (170) + +- * Project (169) + +- * BroadcastHashJoin Inner BuildRight (168) + :- * Project (166) + : +- * BroadcastHashJoin Inner BuildRight (165) + : :- * Project (159) + : : +- * BroadcastHashJoin Inner BuildRight (158) + : : :- * Filter (156) + : : : +- * ColumnarToRow (155) + : : : +- Scan parquet default.store_sales (154) + : : +- ReusedExchange (157) + : +- BroadcastExchange (164) + : +- * Project (163) + : +- * Filter (162) + : +- * ColumnarToRow (161) + : +- Scan parquet default.time_dim (160) + +- ReusedExchange (167) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.household_demographics +Output [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,2),LessThanOrEqual(hd_vehicle_count,4))),And(EqualTo(hd_dep_count,0),LessThanOrEqual(hd_vehicle_count,2))), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] + +(6) Filter [codegen id : 1] +Input [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] +Condition : (((((hd_dep_count#5 = 4) AND (hd_vehicle_count#6 <= 6)) OR ((hd_dep_count#5 = 2) AND (hd_vehicle_count#6 <= 4))) OR ((hd_dep_count#5 = 0) AND (hd_vehicle_count#6 <= 2))) AND isnotnull(hd_demo_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [hd_demo_sk#4] +Input [3]: [hd_demo_sk#4, hd_dep_count#5, hd_vehicle_count#6] + +(8) BroadcastExchange +Input [1]: [hd_demo_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(10) Project [codegen id : 4] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(11) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(13) Filter [codegen id : 2] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 8)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(14) Project [codegen id : 2] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(15) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(17) Project [codegen id : 4] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#12, s_store_name#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#12, s_store_name#13] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#12, s_store_name#13] +Condition : ((isnotnull(s_store_name#13) AND (s_store_name#13 = ese)) AND isnotnull(s_store_sk#12)) + +(21) Project [codegen id : 3] +Output [1]: [s_store_sk#12] +Input [2]: [s_store_sk#12, s_store_name#13] + +(22) BroadcastExchange +Input [1]: [s_store_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(24) Project [codegen id : 4] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(25) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#15] +Results [1]: [count#16] + +(26) Exchange +Input [1]: [count#16] +Arguments: SinglePartition, true, [id=#17] + +(27) HashAggregate [codegen id : 5] +Input [1]: [count#16] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#18] +Results [1]: [count(1)#18 AS h8_30_to_9#19] + +(28) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(30) Filter [codegen id : 9] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(31) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(33) Project [codegen id : 9] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(34) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 7] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(36) Filter [codegen id : 7] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 9)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) + +(37) Project [codegen id : 7] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(38) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(40) Project [codegen id : 9] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(41) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(43) Project [codegen id : 9] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(44) HashAggregate [codegen id : 9] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#21] +Results [1]: [count#22] + +(45) Exchange +Input [1]: [count#22] +Arguments: SinglePartition, true, [id=#23] + +(46) HashAggregate [codegen id : 10] +Input [1]: [count#22] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#24] +Results [1]: [count(1)#24 AS h9_to_9_30#25] + +(47) BroadcastExchange +Input [1]: [h9_to_9_30#25] +Arguments: IdentityBroadcastMode, [id=#26] + +(48) BroadcastNestedLoopJoin +Join condition: None + +(49) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(50) ColumnarToRow [codegen id : 14] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(51) Filter [codegen id : 14] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(52) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(53) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(54) Project [codegen id : 14] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(55) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(56) ColumnarToRow [codegen id : 12] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(57) Filter [codegen id : 12] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 9)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(58) Project [codegen id : 12] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(59) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] + +(60) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(61) Project [codegen id : 14] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(62) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(63) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(64) Project [codegen id : 14] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(65) HashAggregate [codegen id : 14] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#28] +Results [1]: [count#29] + +(66) Exchange +Input [1]: [count#29] +Arguments: SinglePartition, true, [id=#30] + +(67) HashAggregate [codegen id : 15] +Input [1]: [count#29] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#31] +Results [1]: [count(1)#31 AS h9_30_to_10#32] + +(68) BroadcastExchange +Input [1]: [h9_30_to_10#32] +Arguments: IdentityBroadcastMode, [id=#33] + +(69) BroadcastNestedLoopJoin +Join condition: None + +(70) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(71) ColumnarToRow [codegen id : 19] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(72) Filter [codegen id : 19] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(73) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(74) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(75) Project [codegen id : 19] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(76) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(77) ColumnarToRow [codegen id : 17] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(78) Filter [codegen id : 17] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 10)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) + +(79) Project [codegen id : 17] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(80) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#34] + +(81) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(82) Project [codegen id : 19] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(83) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(84) BroadcastHashJoin [codegen id : 19] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(85) Project [codegen id : 19] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(86) HashAggregate [codegen id : 19] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#35] +Results [1]: [count#36] + +(87) Exchange +Input [1]: [count#36] +Arguments: SinglePartition, true, [id=#37] + +(88) HashAggregate [codegen id : 20] +Input [1]: [count#36] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#38] +Results [1]: [count(1)#38 AS h10_to_10_30#39] + +(89) BroadcastExchange +Input [1]: [h10_to_10_30#39] +Arguments: IdentityBroadcastMode, [id=#40] + +(90) BroadcastNestedLoopJoin +Join condition: None + +(91) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(92) ColumnarToRow [codegen id : 24] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(93) Filter [codegen id : 24] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(94) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(95) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(96) Project [codegen id : 24] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(97) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(98) ColumnarToRow [codegen id : 22] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(99) Filter [codegen id : 22] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 10)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(100) Project [codegen id : 22] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(101) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#41] + +(102) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(103) Project [codegen id : 24] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(104) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(105) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(106) Project [codegen id : 24] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(107) HashAggregate [codegen id : 24] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#42] +Results [1]: [count#43] + +(108) Exchange +Input [1]: [count#43] +Arguments: SinglePartition, true, [id=#44] + +(109) HashAggregate [codegen id : 25] +Input [1]: [count#43] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#45] +Results [1]: [count(1)#45 AS h10_30_to_11#46] + +(110) BroadcastExchange +Input [1]: [h10_30_to_11#46] +Arguments: IdentityBroadcastMode, [id=#47] + +(111) BroadcastNestedLoopJoin +Join condition: None + +(112) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(113) ColumnarToRow [codegen id : 29] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(114) Filter [codegen id : 29] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(115) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(116) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(117) Project [codegen id : 29] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(118) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(119) ColumnarToRow [codegen id : 27] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(120) Filter [codegen id : 27] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 11)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) + +(121) Project [codegen id : 27] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(122) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48] + +(123) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(124) Project [codegen id : 29] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(125) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(126) BroadcastHashJoin [codegen id : 29] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(127) Project [codegen id : 29] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(128) HashAggregate [codegen id : 29] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#49] +Results [1]: [count#50] + +(129) Exchange +Input [1]: [count#50] +Arguments: SinglePartition, true, [id=#51] + +(130) HashAggregate [codegen id : 30] +Input [1]: [count#50] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#52] +Results [1]: [count(1)#52 AS h11_to_11_30#53] + +(131) BroadcastExchange +Input [1]: [h11_to_11_30#53] +Arguments: IdentityBroadcastMode, [id=#54] + +(132) BroadcastNestedLoopJoin +Join condition: None + +(133) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(134) ColumnarToRow [codegen id : 34] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(135) Filter [codegen id : 34] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(136) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(137) BroadcastHashJoin [codegen id : 34] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(138) Project [codegen id : 34] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(139) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(140) ColumnarToRow [codegen id : 32] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(141) Filter [codegen id : 32] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 11)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) + +(142) Project [codegen id : 32] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(143) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#55] + +(144) BroadcastHashJoin [codegen id : 34] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(145) Project [codegen id : 34] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(146) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(147) BroadcastHashJoin [codegen id : 34] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(148) Project [codegen id : 34] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(149) HashAggregate [codegen id : 34] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#56] +Results [1]: [count#57] + +(150) Exchange +Input [1]: [count#57] +Arguments: SinglePartition, true, [id=#58] + +(151) HashAggregate [codegen id : 35] +Input [1]: [count#57] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#59] +Results [1]: [count(1)#59 AS h11_30_to_12#60] + +(152) BroadcastExchange +Input [1]: [h11_30_to_12#60] +Arguments: IdentityBroadcastMode, [id=#61] + +(153) BroadcastNestedLoopJoin +Join condition: None + +(154) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(155) ColumnarToRow [codegen id : 39] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(156) Filter [codegen id : 39] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(157) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(158) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(159) Project [codegen id : 39] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(160) Scan parquet default.time_dim +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(161) ColumnarToRow [codegen id : 37] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(162) Filter [codegen id : 37] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 12)) AND (t_minute#10 < 30)) AND isnotnull(t_time_sk#8)) + +(163) Project [codegen id : 37] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] + +(164) BroadcastExchange +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#62] + +(165) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#8] +Join condition: None + +(166) Project [codegen id : 39] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] + +(167) ReusedExchange [Reuses operator id: 22] +Output [1]: [s_store_sk#12] + +(168) BroadcastHashJoin [codegen id : 39] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#12] +Join condition: None + +(169) Project [codegen id : 39] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#12] + +(170) HashAggregate [codegen id : 39] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#63] +Results [1]: [count#64] + +(171) Exchange +Input [1]: [count#64] +Arguments: SinglePartition, true, [id=#65] + +(172) HashAggregate [codegen id : 40] +Input [1]: [count#64] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#66] +Results [1]: [count(1)#66 AS h12_to_12_30#67] + +(173) BroadcastExchange +Input [1]: [h12_to_12_30#67] +Arguments: IdentityBroadcastMode, [id=#68] + +(174) BroadcastNestedLoopJoin +Join condition: None + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt index 77bbef312..8e72594b0 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q88/simplified.txt @@ -5,218 +5,246 @@ BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin BroadcastNestedLoopJoin - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h8_30_to_9] + WholeStageCodegen (5) + HashAggregate [count] [count(1),h8_30_to_9,count] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (4) + HashAggregate [count,count] Project - BroadcastHashJoin [s_store_sk,ss_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count,hd_vehicle_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] [hd_demo_sk,hd_dep_count,hd_vehicle_count] + Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [s_store_sk] Filter [s_store_name,s_store_sk] - Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name] BroadcastExchange #5 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h9_to_9_30] + WholeStageCodegen (10) + HashAggregate [count] [count(1),h9_to_9_30,count] InputAdapter Exchange #6 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (9) + HashAggregate [count,count] Project - BroadcastHashJoin [s_store_sk,ss_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] #2 InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (7) Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 + ReusedExchange [s_store_sk] #4 BroadcastExchange #8 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h9_30_to_10] + WholeStageCodegen (15) + HashAggregate [count] [count(1),h9_30_to_10,count] InputAdapter Exchange #9 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (14) + HashAggregate [count,count] Project - BroadcastHashJoin [s_store_sk,ss_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] #2 InputAdapter BroadcastExchange #10 - WholeStageCodegen + WholeStageCodegen (12) Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 + ReusedExchange [s_store_sk] #4 BroadcastExchange #11 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h10_to_10_30] + WholeStageCodegen (20) + HashAggregate [count] [count(1),h10_to_10_30,count] InputAdapter Exchange #12 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (19) + HashAggregate [count,count] Project - BroadcastHashJoin [s_store_sk,ss_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] #2 InputAdapter BroadcastExchange #13 - WholeStageCodegen + WholeStageCodegen (17) Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 + ReusedExchange [s_store_sk] #4 BroadcastExchange #14 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h10_30_to_11] + WholeStageCodegen (25) + HashAggregate [count] [count(1),h10_30_to_11,count] InputAdapter Exchange #15 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (24) + HashAggregate [count,count] Project - BroadcastHashJoin [s_store_sk,ss_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] #2 InputAdapter BroadcastExchange #16 - WholeStageCodegen + WholeStageCodegen (22) Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 + ReusedExchange [s_store_sk] #4 BroadcastExchange #17 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h11_to_11_30] + WholeStageCodegen (30) + HashAggregate [count] [count(1),h11_to_11_30,count] InputAdapter Exchange #18 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (29) + HashAggregate [count,count] Project - BroadcastHashJoin [s_store_sk,ss_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] #2 InputAdapter BroadcastExchange #19 - WholeStageCodegen + WholeStageCodegen (27) Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 + ReusedExchange [s_store_sk] #4 BroadcastExchange #20 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h11_30_to_12] + WholeStageCodegen (35) + HashAggregate [count] [count(1),h11_30_to_12,count] InputAdapter Exchange #21 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (34) + HashAggregate [count,count] Project - BroadcastHashJoin [s_store_sk,ss_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] #2 InputAdapter BroadcastExchange #22 - WholeStageCodegen + WholeStageCodegen (32) Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 + ReusedExchange [s_store_sk] #4 BroadcastExchange #23 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),h12_to_12_30] + WholeStageCodegen (40) + HashAggregate [count] [count(1),h12_to_12_30,count] InputAdapter Exchange #24 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (39) + HashAggregate [count,count] Project - BroadcastHashJoin [s_store_sk,ss_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] #2 InputAdapter BroadcastExchange #25 - WholeStageCodegen + WholeStageCodegen (37) Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] InputAdapter - ReusedExchange [s_store_sk] [s_store_sk] #4 + ReusedExchange [s_store_sk] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt index f34115720..8dca84461 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/explain.txt @@ -1,31 +1,175 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST,s_store_name#3 ASC NULLS FIRST], output=[i_category#4,i_class#5,i_brand#6,s_store_name#3,s_company_name#7,d_moy#8,sum_sales#1,avg_monthly_sales#2]) -+- *(7) Project [i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, sum_sales#1, avg_monthly_sales#2] - +- *(7) Filter (CASE WHEN NOT (avg_monthly_sales#2 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#1 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#2 as decimal(22,6)))), DecimalType(38,16)) ELSE null END > 0.1000000000000000) - +- Window [avg(_w0#9) windowspecdefinition(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#2], [i_category#4, i_brand#6, s_store_name#3, s_company_name#7] - +- *(6) Sort [i_category#4 ASC NULLS FIRST, i_brand#6 ASC NULLS FIRST, s_store_name#3 ASC NULLS FIRST, s_company_name#7 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_category#4, i_brand#6, s_store_name#3, s_company_name#7, 5) - +- *(5) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[sum(UnscaledValue(ss_sales_price#10))]) - +- Exchange hashpartitioning(i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8, 5) - +- *(4) HashAggregate(keys=[i_category#4, i_class#5, i_brand#6, s_store_name#3, s_company_name#7, d_moy#8], functions=[partial_sum(UnscaledValue(ss_sales_price#10))]) - +- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_sales_price#10, d_moy#8, s_store_name#3, s_company_name#7] - +- *(4) BroadcastHashJoin [ss_store_sk#11], [s_store_sk#12], Inner, BuildRight - :- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_store_sk#11, ss_sales_price#10, d_moy#8] - : +- *(4) BroadcastHashJoin [ss_sold_date_sk#13], [d_date_sk#14], Inner, BuildRight - : :- *(4) Project [i_brand#6, i_class#5, i_category#4, ss_sold_date_sk#13, ss_store_sk#11, ss_sales_price#10] - : : +- *(4) BroadcastHashJoin [i_item_sk#15], [ss_item_sk#16], Inner, BuildRight - : : :- *(4) Project [i_item_sk#15, i_brand#6, i_class#5, i_category#4] - : : : +- *(4) Filter (((i_category#4 IN (Books,Electronics,Sports) && i_class#5 IN (computers,stereo,football)) || (i_category#4 IN (Men,Jewelry,Women) && i_class#5 IN (shirts,birdal,dresses))) && isnotnull(i_item_sk#15)) - : : : +- *(4) FileScan parquet default.item[i_item_sk#15,i_brand#6,i_class#5,i_category#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [Or(And(In(i_category, [Books,Electronics,Sports]),In(i_class, [computers,stereo,football])),And(..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- *(1) Project [ss_sold_date_sk#13, ss_item_sk#16, ss_store_sk#11, ss_sales_price#10] - : : +- *(1) Filter ((isnotnull(ss_item_sk#16) && isnotnull(ss_sold_date_sk#13)) && isnotnull(ss_store_sk#11)) - : : +- *(1) FileScan parquet default.store_sales[ss_sold_date_sk#13,ss_item_sk#16,ss_store_sk#11,ss_sales_price#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#14, d_moy#8] - : +- *(2) Filter ((isnotnull(d_year#17) && (d_year#17 = 1999)) && isnotnull(d_date_sk#14)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#14,d_year#17,d_moy#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [s_store_sk#12, s_store_name#3, s_company_name#7] - +- *(3) Filter isnotnull(s_store_sk#12) - +- *(3) FileScan parquet default.store[s_store_sk#12,s_store_name#3,s_company_name#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (31) ++- * Project (30) + +- * Filter (29) + +- Window (28) + +- * Sort (27) + +- Exchange (26) + +- * HashAggregate (25) + +- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * BroadcastHashJoin Inner BuildRight (21) + :- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Project (9) + : : +- * BroadcastHashJoin Inner BuildRight (8) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- BroadcastExchange (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.store_sales (4) + : +- BroadcastExchange (14) + : +- * Project (13) + : +- * Filter (12) + : +- * ColumnarToRow (11) + : +- Scan parquet default.date_dim (10) + +- BroadcastExchange (20) + +- * Filter (19) + +- * ColumnarToRow (18) + +- Scan parquet default.store (17) + + +(1) Scan parquet default.item +Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [Or(And(In(i_category, [Books,Electronics,Sports]),In(i_class, [computers,stereo,football])),And(In(i_category, [Men,Jewelry,Women]),In(i_class, [shirts,birdal,dresses]))), IsNotNull(i_item_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] + +(3) Filter [codegen id : 4] +Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] +Condition : (((i_category#4 IN (Books,Electronics,Sports) AND i_class#3 IN (computers,stereo,football)) OR (i_category#4 IN (Men,Jewelry,Women) AND i_class#3 IN (shirts,birdal,dresses))) AND isnotnull(i_item_sk#1)) + +(4) Scan parquet default.store_sales +Output [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] + +(6) Filter [codegen id : 1] +Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] +Condition : ((isnotnull(ss_item_sk#6) AND isnotnull(ss_sold_date_sk#5)) AND isnotnull(ss_store_sk#7)) + +(7) BroadcastExchange +Input [4]: [ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#9] + +(8) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [i_item_sk#1] +Right keys [1]: [ss_item_sk#6] +Join condition: None + +(9) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_store_sk#7, ss_sales_price#8] +Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((isnotnull(d_year#11) AND (d_year#11 = 1999)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [2]: [d_date_sk#10, d_moy#12] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(14) BroadcastExchange +Input [2]: [d_date_sk#10, d_moy#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(16) Project [codegen id : 4] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, d_moy#12] +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_sold_date_sk#5, ss_store_sk#7, ss_sales_price#8, d_date_sk#10, d_moy#12] + +(17) Scan parquet default.store +Output [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] + +(19) Filter [codegen id : 3] +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Condition : isnotnull(s_store_sk#14) + +(20) BroadcastExchange +Input [3]: [s_store_sk#14, s_store_name#15, s_company_name#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] + +(21) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#7] +Right keys [1]: [s_store_sk#14] +Join condition: None + +(22) Project [codegen id : 4] +Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#8, d_moy#12, s_store_name#15, s_company_name#16] +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, d_moy#12, s_store_sk#14, s_store_name#15, s_company_name#16] + +(23) HashAggregate [codegen id : 4] +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#8, d_moy#12, s_store_name#15, s_company_name#16] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#8))] +Aggregate Attributes [1]: [sum#18] +Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] + +(24) Exchange +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, 5), true, [id=#20] + +(25) HashAggregate [codegen id : 5] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum#19] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12] +Functions [1]: [sum(UnscaledValue(ss_sales_price#8))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#8))#21] +Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#8))#21,17,2) AS sum_sales#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#8))#21,17,2) AS _w0#23] + +(26) Exchange +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#15, s_company_name#16, 5), true, [id=#24] + +(27) Sort [codegen id : 6] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#15 ASC NULLS FIRST, s_company_name#16 ASC NULLS FIRST], false, 0 + +(28) Window +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23] +Arguments: [avg(_w0#23) windowspecdefinition(i_category#4, i_brand#2, s_store_name#15, s_company_name#16, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#4, i_brand#2, s_store_name#15, s_company_name#16] + +(29) Filter [codegen id : 7] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23, avg_monthly_sales#25] +Condition : (CASE WHEN NOT (avg_monthly_sales#25 = 0.000000) THEN CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16), true) ELSE null END > 0.1000000000000000) + +(30) Project [codegen id : 7] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, _w0#23, avg_monthly_sales#25] + +(31) TakeOrderedAndProject +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6), true) ASC NULLS FIRST, s_store_name#15 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#15, s_company_name#16, d_moy#12, sum_sales#22, avg_monthly_sales#25] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt index 33e1ef0af..efeab7f69 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q89/simplified.txt @@ -1,43 +1,48 @@ -TakeOrderedAndProject [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] - WholeStageCodegen - Project [avg_monthly_sales,d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum_sales] +TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_class,i_brand,s_company_name,d_moy] + WholeStageCodegen (7) + Project [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum_sales,avg_monthly_sales] Filter [avg_monthly_sales,sum_sales] InputAdapter - Window [_w0,i_brand,i_category,s_company_name,s_store_name] - WholeStageCodegen - Sort [i_brand,i_category,s_company_name,s_store_name] + Window [_w0,i_category,i_brand,s_store_name,s_company_name] + WholeStageCodegen (6) + Sort [i_category,i_brand,s_store_name,s_company_name] InputAdapter - Exchange [i_brand,i_category,s_company_name,s_store_name] #1 - WholeStageCodegen - HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,sum,sum(UnscaledValue(ss_sales_price))] [_w0,sum,sum(UnscaledValue(ss_sales_price)),sum_sales] + Exchange [i_category,i_brand,s_store_name,s_company_name] #1 + WholeStageCodegen (5) + HashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,sum] [sum(UnscaledValue(ss_sales_price)),sum_sales,_w0,sum] InputAdapter - Exchange [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name] #2 - WholeStageCodegen - HashAggregate [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price,sum,sum] [sum,sum] - Project [d_moy,i_brand,i_category,i_class,s_company_name,s_store_name,ss_sales_price] - BroadcastHashJoin [s_store_sk,ss_store_sk] - Project [d_moy,i_brand,i_category,i_class,ss_sales_price,ss_store_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_brand,i_category,i_class,ss_sales_price,ss_sold_date_sk,ss_store_sk] + Exchange [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy] #2 + WholeStageCodegen (4) + HashAggregate [i_category,i_class,i_brand,s_store_name,s_company_name,d_moy,ss_sales_price] [sum,sum] + Project [i_brand,i_class,i_category,ss_sales_price,d_moy,s_store_name,s_company_name] + BroadcastHashJoin [ss_store_sk,s_store_sk] + Project [i_brand,i_class,i_category,ss_store_sk,ss_sales_price,d_moy] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [i_brand,i_class,i_category,ss_sold_date_sk,ss_store_sk,ss_sales_price] BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [i_brand,i_category,i_class,i_item_sk] - Filter [i_category,i_class,i_item_sk] - Scan parquet default.item [i_brand,i_category,i_class,i_item_sk] [i_brand,i_category,i_class,i_item_sk] + Filter [i_category,i_class,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_brand,i_class,i_category] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] - Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] - Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] [ss_item_sk,ss_sales_price,ss_sold_date_sk,ss_store_sk] + WholeStageCodegen (1) + Filter [ss_item_sk,ss_sold_date_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_sales_price] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk,d_moy] - Filter [d_date_sk,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [s_company_name,s_store_name,s_store_sk] - Filter [s_store_sk] - Scan parquet default.store [s_company_name,s_store_name,s_store_sk] [s_company_name,s_store_name,s_store_sk] + WholeStageCodegen (3) + Filter [s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name,s_company_name] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt index e822c1ed0..55f7977b2 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/explain.txt @@ -1,109 +1,718 @@ == Physical Plan == -*(1) Project [CASE WHEN (Subquery subquery1150 > 62316685) THEN Subquery subquery1151 ELSE Subquery subquery1152 END AS bucket1#1, CASE WHEN (Subquery subquery1154 > 19045798) THEN Subquery subquery1155 ELSE Subquery subquery1156 END AS bucket2#2, CASE WHEN (Subquery subquery1158 > 365541424) THEN Subquery subquery1159 ELSE Subquery subquery1160 END AS bucket3#3, CASE WHEN (Subquery subquery1162 > 216357808) THEN Subquery subquery1163 ELSE Subquery subquery1164 END AS bucket4#4, CASE WHEN (Subquery subquery1166 > 184483884) THEN Subquery subquery1167 ELSE Subquery subquery1168 END AS bucket5#5] -: :- Subquery subquery1150 -: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(1) Project -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery1151 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- *(1) Project [ss_ext_discount_amt#7] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery1152 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) -: : +- *(1) Project [ss_net_paid#8] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 1)) && (ss_quantity#6 <= 20)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)], ReadSchema: struct -: :- Subquery subquery1154 -: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(1) Project -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery1155 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- *(1) Project [ss_ext_discount_amt#7] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery1156 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) -: : +- *(1) Project [ss_net_paid#8] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 21)) && (ss_quantity#6 <= 40)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)], ReadSchema: struct -: :- Subquery subquery1158 -: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(1) Project -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery1159 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- *(1) Project [ss_ext_discount_amt#7] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery1160 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) -: : +- *(1) Project [ss_net_paid#8] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 41)) && (ss_quantity#6 <= 60)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)], ReadSchema: struct -: :- Subquery subquery1162 -: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(1) Project -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery1163 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- *(1) Project [ss_ext_discount_amt#7] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery1164 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) -: : +- *(1) Project [ss_net_paid#8] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 61)) && (ss_quantity#6 <= 80)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)], ReadSchema: struct -: :- Subquery subquery1166 -: : +- *(2) HashAggregate(keys=[], functions=[count(1)]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_count(1)]) -: : +- *(1) Project -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct -: :- Subquery subquery1167 -: : +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- Exchange SinglePartition -: : +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_ext_discount_amt#7))]) -: : +- *(1) Project [ss_ext_discount_amt#7] -: : +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) -: : +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_ext_discount_amt#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct -: +- Subquery subquery1168 -: +- *(2) HashAggregate(keys=[], functions=[avg(UnscaledValue(ss_net_paid#8))]) -: +- Exchange SinglePartition -: +- *(1) HashAggregate(keys=[], functions=[partial_avg(UnscaledValue(ss_net_paid#8))]) -: +- *(1) Project [ss_net_paid#8] -: +- *(1) Filter ((isnotnull(ss_quantity#6) && (ss_quantity#6 >= 81)) && (ss_quantity#6 <= 100)) -: +- *(1) FileScan parquet default.store_sales[ss_quantity#6,ss_net_paid#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)], ReadSchema: struct -+- *(1) Filter (isnotnull(r_reason_sk#9) && (r_reason_sk#9 = 1)) - +- *(1) FileScan parquet default.reason[r_reason_sk#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)], ReadSchema: struct \ No newline at end of file +* Project (4) ++- * Filter (3) + +- * ColumnarToRow (2) + +- Scan parquet default.reason (1) + + +(1) Scan parquet default.reason +Output [1]: [r_reason_sk#1] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_sk), EqualTo(r_reason_sk,1)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 1] +Input [1]: [r_reason_sk#1] + +(3) Filter [codegen id : 1] +Input [1]: [r_reason_sk#1] +Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) + +(4) Project [codegen id : 1] +Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3] > 62316685) THEN Subquery scalar-subquery#4, [id=#5] ELSE Subquery scalar-subquery#6, [id=#7] END AS bucket1#8, CASE WHEN (Subquery scalar-subquery#9, [id=#10] > 19045798) THEN Subquery scalar-subquery#11, [id=#12] ELSE Subquery scalar-subquery#13, [id=#14] END AS bucket2#15, CASE WHEN (Subquery scalar-subquery#16, [id=#17] > 365541424) THEN Subquery scalar-subquery#18, [id=#19] ELSE Subquery scalar-subquery#20, [id=#21] END AS bucket3#22, CASE WHEN (Subquery scalar-subquery#23, [id=#24] > 216357808) THEN Subquery scalar-subquery#25, [id=#26] ELSE Subquery scalar-subquery#27, [id=#28] END AS bucket4#29, CASE WHEN (Subquery scalar-subquery#30, [id=#31] > 184483884) THEN Subquery scalar-subquery#32, [id=#33] ELSE Subquery scalar-subquery#34, [id=#35] END AS bucket5#36] +Input [1]: [r_reason_sk#1] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#2, [id=#3] +* HashAggregate (11) ++- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet default.store_sales (5) + + +(5) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(6) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(7) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) + +(8) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(9) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#38] +Results [1]: [count#39] + +(10) Exchange +Input [1]: [count#39] +Arguments: SinglePartition, true, [id=#40] + +(11) HashAggregate [codegen id : 2] +Input [1]: [count#39] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#41] +Results [1]: [count(1)#41 AS count(1)#42] + +Subquery:2 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#4, [id=#5] +* HashAggregate (18) ++- Exchange (17) + +- * HashAggregate (16) + +- * Project (15) + +- * Filter (14) + +- * ColumnarToRow (13) + +- Scan parquet default.store_sales (12) + + +(12) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(13) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(14) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) + +(15) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(16) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#44, count#45] +Results [2]: [sum#46, count#47] + +(17) Exchange +Input [2]: [sum#46, count#47] +Arguments: SinglePartition, true, [id=#48] + +(18) HashAggregate [codegen id : 2] +Input [2]: [sum#46, count#47] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#49] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#49 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#50] + +Subquery:3 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#6, [id=#7] +* HashAggregate (25) ++- Exchange (24) + +- * HashAggregate (23) + +- * Project (22) + +- * Filter (21) + +- * ColumnarToRow (20) + +- Scan parquet default.store_sales (19) + + +(19) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] +ReadSchema: struct + +(20) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(21) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) + +(22) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(23) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#52, count#53] +Results [2]: [sum#54, count#55] + +(24) Exchange +Input [2]: [sum#54, count#55] +Arguments: SinglePartition, true, [id=#56] + +(25) HashAggregate [codegen id : 2] +Input [2]: [sum#54, count#55] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#57] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#57 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#58] + +Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#9, [id=#10] +* HashAggregate (32) ++- Exchange (31) + +- * HashAggregate (30) + +- * Project (29) + +- * Filter (28) + +- * ColumnarToRow (27) + +- Scan parquet default.store_sales (26) + + +(26) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(27) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(28) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) + +(29) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(30) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#59] +Results [1]: [count#60] + +(31) Exchange +Input [1]: [count#60] +Arguments: SinglePartition, true, [id=#61] + +(32) HashAggregate [codegen id : 2] +Input [1]: [count#60] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#62] +Results [1]: [count(1)#62 AS count(1)#63] + +Subquery:5 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +* HashAggregate (39) ++- Exchange (38) + +- * HashAggregate (37) + +- * Project (36) + +- * Filter (35) + +- * ColumnarToRow (34) + +- Scan parquet default.store_sales (33) + + +(33) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(35) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) + +(36) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(37) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#64, count#65] +Results [2]: [sum#66, count#67] + +(38) Exchange +Input [2]: [sum#66, count#67] +Arguments: SinglePartition, true, [id=#68] + +(39) HashAggregate [codegen id : 2] +Input [2]: [sum#66, count#67] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#69] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#69 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#70] + +Subquery:6 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +* HashAggregate (46) ++- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * Filter (42) + +- * ColumnarToRow (41) + +- Scan parquet default.store_sales (40) + + +(40) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] +ReadSchema: struct + +(41) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(42) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 21)) AND (ss_quantity#37 <= 40)) + +(43) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(44) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#71, count#72] +Results [2]: [sum#73, count#74] + +(45) Exchange +Input [2]: [sum#73, count#74] +Arguments: SinglePartition, true, [id=#75] + +(46) HashAggregate [codegen id : 2] +Input [2]: [sum#73, count#74] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#76] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#76 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#77] + +Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#16, [id=#17] +* HashAggregate (53) ++- Exchange (52) + +- * HashAggregate (51) + +- * Project (50) + +- * Filter (49) + +- * ColumnarToRow (48) + +- Scan parquet default.store_sales (47) + + +(47) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(49) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) + +(50) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(51) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#78] +Results [1]: [count#79] + +(52) Exchange +Input [1]: [count#79] +Arguments: SinglePartition, true, [id=#80] + +(53) HashAggregate [codegen id : 2] +Input [1]: [count#79] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#81] +Results [1]: [count(1)#81 AS count(1)#82] + +Subquery:8 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#18, [id=#19] +* HashAggregate (60) ++- Exchange (59) + +- * HashAggregate (58) + +- * Project (57) + +- * Filter (56) + +- * ColumnarToRow (55) + +- Scan parquet default.store_sales (54) + + +(54) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(55) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(56) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) + +(57) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(58) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#83, count#84] +Results [2]: [sum#85, count#86] + +(59) Exchange +Input [2]: [sum#85, count#86] +Arguments: SinglePartition, true, [id=#87] + +(60) HashAggregate [codegen id : 2] +Input [2]: [sum#85, count#86] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#88] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#88 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#89] + +Subquery:9 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#20, [id=#21] +* HashAggregate (67) ++- Exchange (66) + +- * HashAggregate (65) + +- * Project (64) + +- * Filter (63) + +- * ColumnarToRow (62) + +- Scan parquet default.store_sales (61) + + +(61) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] +ReadSchema: struct + +(62) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(63) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 41)) AND (ss_quantity#37 <= 60)) + +(64) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(65) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#90, count#91] +Results [2]: [sum#92, count#93] + +(66) Exchange +Input [2]: [sum#92, count#93] +Arguments: SinglePartition, true, [id=#94] + +(67) HashAggregate [codegen id : 2] +Input [2]: [sum#92, count#93] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#95] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#95 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#96] + +Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#23, [id=#24] +* HashAggregate (74) ++- Exchange (73) + +- * HashAggregate (72) + +- * Project (71) + +- * Filter (70) + +- * ColumnarToRow (69) + +- Scan parquet default.store_sales (68) + + +(68) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(69) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(70) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) + +(71) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(72) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#97] +Results [1]: [count#98] + +(73) Exchange +Input [1]: [count#98] +Arguments: SinglePartition, true, [id=#99] + +(74) HashAggregate [codegen id : 2] +Input [1]: [count#98] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#100] +Results [1]: [count(1)#100 AS count(1)#101] + +Subquery:11 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#25, [id=#26] +* HashAggregate (81) ++- Exchange (80) + +- * HashAggregate (79) + +- * Project (78) + +- * Filter (77) + +- * ColumnarToRow (76) + +- Scan parquet default.store_sales (75) + + +(75) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(76) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(77) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) + +(78) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(79) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#102, count#103] +Results [2]: [sum#104, count#105] + +(80) Exchange +Input [2]: [sum#104, count#105] +Arguments: SinglePartition, true, [id=#106] + +(81) HashAggregate [codegen id : 2] +Input [2]: [sum#104, count#105] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#107] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#107 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#108] + +Subquery:12 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#27, [id=#28] +* HashAggregate (88) ++- Exchange (87) + +- * HashAggregate (86) + +- * Project (85) + +- * Filter (84) + +- * ColumnarToRow (83) + +- Scan parquet default.store_sales (82) + + +(82) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] +ReadSchema: struct + +(83) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(84) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 61)) AND (ss_quantity#37 <= 80)) + +(85) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(86) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#109, count#110] +Results [2]: [sum#111, count#112] + +(87) Exchange +Input [2]: [sum#111, count#112] +Arguments: SinglePartition, true, [id=#113] + +(88) HashAggregate [codegen id : 2] +Input [2]: [sum#111, count#112] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#114] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#114 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#115] + +Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#30, [id=#31] +* HashAggregate (95) ++- Exchange (94) + +- * HashAggregate (93) + +- * Project (92) + +- * Filter (91) + +- * ColumnarToRow (90) + +- Scan parquet default.store_sales (89) + + +(89) Scan parquet default.store_sales +Output [1]: [ss_quantity#37] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(90) ColumnarToRow [codegen id : 1] +Input [1]: [ss_quantity#37] + +(91) Filter [codegen id : 1] +Input [1]: [ss_quantity#37] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) + +(92) Project [codegen id : 1] +Output: [] +Input [1]: [ss_quantity#37] + +(93) HashAggregate [codegen id : 1] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#116] +Results [1]: [count#117] + +(94) Exchange +Input [1]: [count#117] +Arguments: SinglePartition, true, [id=#118] + +(95) HashAggregate [codegen id : 2] +Input [1]: [count#117] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#119] +Results [1]: [count(1)#119 AS count(1)#120] + +Subquery:14 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#32, [id=#33] +* HashAggregate (102) ++- Exchange (101) + +- * HashAggregate (100) + +- * Project (99) + +- * Filter (98) + +- * ColumnarToRow (97) + +- Scan parquet default.store_sales (96) + + +(96) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(97) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(98) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) + +(99) Project [codegen id : 1] +Output [1]: [ss_ext_discount_amt#43] +Input [2]: [ss_quantity#37, ss_ext_discount_amt#43] + +(100) HashAggregate [codegen id : 1] +Input [1]: [ss_ext_discount_amt#43] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [2]: [sum#121, count#122] +Results [2]: [sum#123, count#124] + +(101) Exchange +Input [2]: [sum#123, count#124] +Arguments: SinglePartition, true, [id=#125] + +(102) HashAggregate [codegen id : 2] +Input [2]: [sum#123, count#124] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#43))#126] +Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#43))#126 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#127] + +Subquery:15 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#34, [id=#35] +* HashAggregate (109) ++- Exchange (108) + +- * HashAggregate (107) + +- * Project (106) + +- * Filter (105) + +- * ColumnarToRow (104) + +- Scan parquet default.store_sales (103) + + +(103) Scan parquet default.store_sales +Output [2]: [ss_quantity#37, ss_net_paid#51] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] +ReadSchema: struct + +(104) ColumnarToRow [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(105) Filter [codegen id : 1] +Input [2]: [ss_quantity#37, ss_net_paid#51] +Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 81)) AND (ss_quantity#37 <= 100)) + +(106) Project [codegen id : 1] +Output [1]: [ss_net_paid#51] +Input [2]: [ss_quantity#37, ss_net_paid#51] + +(107) HashAggregate [codegen id : 1] +Input [1]: [ss_net_paid#51] +Keys: [] +Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [2]: [sum#128, count#129] +Results [2]: [sum#130, count#131] + +(108) Exchange +Input [2]: [sum#130, count#131] +Arguments: SinglePartition, true, [id=#132] + +(109) HashAggregate [codegen id : 2] +Input [2]: [sum#130, count#131] +Keys: [] +Functions [1]: [avg(UnscaledValue(ss_net_paid#51))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#51))#133] +Results [1]: [cast((avg(UnscaledValue(ss_net_paid#51))#133 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#134] + + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt index f0bccc7ec..66502080d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q9/simplified.txt @@ -1,154 +1,186 @@ -WholeStageCodegen +WholeStageCodegen (1) Project Subquery #1 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] + WholeStageCodegen (2) + HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (1) + HashAggregate [count,count] Project Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity] [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] Subquery #2 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + WholeStageCodegen (2) + HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] InputAdapter Exchange #2 - WholeStageCodegen - HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] Subquery #3 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + WholeStageCodegen (2) + HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] InputAdapter Exchange #3 - WholeStageCodegen - HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] - Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_net_paid] Subquery #4 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] + WholeStageCodegen (2) + HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #4 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (1) + HashAggregate [count,count] Project Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity] [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] Subquery #5 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + WholeStageCodegen (2) + HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] InputAdapter Exchange #5 - WholeStageCodegen - HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] Subquery #6 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + WholeStageCodegen (2) + HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] InputAdapter Exchange #6 - WholeStageCodegen - HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] - Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_net_paid] Subquery #7 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] + WholeStageCodegen (2) + HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #7 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (1) + HashAggregate [count,count] Project Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity] [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] Subquery #8 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + WholeStageCodegen (2) + HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] InputAdapter Exchange #8 - WholeStageCodegen - HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] Subquery #9 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + WholeStageCodegen (2) + HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] InputAdapter Exchange #9 - WholeStageCodegen - HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] - Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_net_paid] Subquery #10 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] + WholeStageCodegen (2) + HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #10 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (1) + HashAggregate [count,count] Project Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity] [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] Subquery #11 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + WholeStageCodegen (2) + HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] InputAdapter Exchange #11 - WholeStageCodegen - HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] Subquery #12 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + WholeStageCodegen (2) + HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] InputAdapter Exchange #12 - WholeStageCodegen - HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] - Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_net_paid] Subquery #13 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] + WholeStageCodegen (2) + HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #13 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (1) + HashAggregate [count,count] Project Filter [ss_quantity] - Scan parquet default.store_sales [ss_quantity] [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity] Subquery #14 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_ext_discount_amt)),count,sum] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),count,sum] + WholeStageCodegen (2) + HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] InputAdapter Exchange #14 - WholeStageCodegen - HashAggregate [count,count,ss_ext_discount_amt,sum,sum] [count,count,sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] Project [ss_ext_discount_amt] Filter [ss_quantity] - Scan parquet default.store_sales [ss_ext_discount_amt,ss_quantity] [ss_ext_discount_amt,ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt] Subquery #15 - WholeStageCodegen - HashAggregate [avg(UnscaledValue(ss_net_paid)),count,sum] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),count,sum] + WholeStageCodegen (2) + HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] InputAdapter Exchange #15 - WholeStageCodegen - HashAggregate [count,count,ss_net_paid,sum,sum] [count,count,sum,sum] + WholeStageCodegen (1) + HashAggregate [ss_net_paid] [sum,count,sum,count] Project [ss_net_paid] Filter [ss_quantity] - Scan parquet default.store_sales [ss_net_paid,ss_quantity] [ss_net_paid,ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_net_paid] Filter [r_reason_sk] - Scan parquet default.reason [r_reason_sk] [r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet default.reason [r_reason_sk] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt index 909e73efa..550bf89ce 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/explain.txt @@ -1,47 +1,280 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[am_pm_ratio#1 ASC NULLS FIRST], output=[am_pm_ratio#1]) -+- *(11) Project [CheckOverflow((promote_precision(cast(amc#2 as decimal(15,4))) / promote_precision(cast(pmc#3 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#1] - +- BroadcastNestedLoopJoin BuildRight, Inner - :- *(5) HashAggregate(keys=[], functions=[count(1)]) - : +- Exchange SinglePartition - : +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) - : +- *(4) Project - : +- *(4) BroadcastHashJoin [ws_web_page_sk#4], [wp_web_page_sk#5], Inner, BuildRight - : :- *(4) Project [ws_web_page_sk#4] - : : +- *(4) BroadcastHashJoin [ws_sold_time_sk#6], [t_time_sk#7], Inner, BuildRight - : : :- *(4) Project [ws_sold_time_sk#6, ws_web_page_sk#4] - : : : +- *(4) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight - : : : :- *(4) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] - : : : : +- *(4) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) - : : : : +- *(4) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(1) Project [hd_demo_sk#9] - : : : +- *(1) Filter ((isnotnull(hd_dep_count#10) && (hd_dep_count#10 = 6)) && isnotnull(hd_demo_sk#9)) - : : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_dep_count#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [t_time_sk#7] - : : +- *(2) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 8)) && (t_hour#11 <= 9)) && isnotnull(t_time_sk#7)) - : : +- *(2) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [wp_web_page_sk#5] - : +- *(3) Filter (((isnotnull(wp_char_count#12) && (wp_char_count#12 >= 5000)) && (wp_char_count#12 <= 5200)) && isnotnull(wp_web_page_sk#5)) - : +- *(3) FileScan parquet default.web_page[wp_web_page_sk#5,wp_char_count#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_page], PartitionFilters: [], PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,..., ReadSchema: struct - +- BroadcastExchange IdentityBroadcastMode - +- *(10) HashAggregate(keys=[], functions=[count(1)]) - +- Exchange SinglePartition - +- *(9) HashAggregate(keys=[], functions=[partial_count(1)]) - +- *(9) Project - +- *(9) BroadcastHashJoin [ws_web_page_sk#4], [wp_web_page_sk#5], Inner, BuildRight - :- *(9) Project [ws_web_page_sk#4] - : +- *(9) BroadcastHashJoin [ws_sold_time_sk#6], [t_time_sk#7], Inner, BuildRight - : :- *(9) Project [ws_sold_time_sk#6, ws_web_page_sk#4] - : : +- *(9) BroadcastHashJoin [ws_ship_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight - : : :- *(9) Project [ws_sold_time_sk#6, ws_ship_hdemo_sk#8, ws_web_page_sk#4] - : : : +- *(9) Filter ((isnotnull(ws_ship_hdemo_sk#8) && isnotnull(ws_sold_time_sk#6)) && isnotnull(ws_web_page_sk#4)) - : : : +- *(9) FileScan parquet default.web_sales[ws_sold_time_sk#6,ws_ship_hdemo_sk#8,ws_web_page_sk#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)], ReadSchema: struct - : : +- ReusedExchange [hd_demo_sk#9], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [t_time_sk#7] - : +- *(7) Filter (((isnotnull(t_hour#11) && (t_hour#11 >= 19)) && (t_hour#11 <= 20)) && isnotnull(t_time_sk#7)) - : +- *(7) FileScan parquet default.time_dim[t_time_sk#7,t_hour#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)], ReadSchema: struct - +- ReusedExchange [wp_web_page_sk#5], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +TakeOrderedAndProject (50) ++- * Project (49) + +- BroadcastNestedLoopJoin Inner BuildRight (48) + :- * HashAggregate (27) + : +- Exchange (26) + : +- * HashAggregate (25) + : +- * Project (24) + : +- * BroadcastHashJoin Inner BuildRight (23) + : :- * Project (17) + : : +- * BroadcastHashJoin Inner BuildRight (16) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.web_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.household_demographics (4) + : : +- BroadcastExchange (15) + : : +- * Project (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.time_dim (11) + : +- BroadcastExchange (22) + : +- * Project (21) + : +- * Filter (20) + : +- * ColumnarToRow (19) + : +- Scan parquet default.web_page (18) + +- BroadcastExchange (47) + +- * HashAggregate (46) + +- Exchange (45) + +- * HashAggregate (44) + +- * Project (43) + +- * BroadcastHashJoin Inner BuildRight (42) + :- * Project (40) + : +- * BroadcastHashJoin Inner BuildRight (39) + : :- * Project (33) + : : +- * BroadcastHashJoin Inner BuildRight (32) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.web_sales (28) + : : +- ReusedExchange (31) + : +- BroadcastExchange (38) + : +- * Project (37) + : +- * Filter (36) + : +- * ColumnarToRow (35) + : +- Scan parquet default.time_dim (34) + +- ReusedExchange (41) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) + +(4) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#4, hd_dep_count#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] + +(6) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] +Condition : ((isnotnull(hd_dep_count#5) AND (hd_dep_count#5 = 6)) AND isnotnull(hd_demo_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [hd_demo_sk#4] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] + +(8) BroadcastExchange +Input [1]: [hd_demo_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_ship_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(10) Project [codegen id : 4] +Output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#4] + +(11) Scan parquet default.time_dim +Output [2]: [t_time_sk#7, t_hour#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [2]: [t_time_sk#7, t_hour#8] + +(13) Filter [codegen id : 2] +Input [2]: [t_time_sk#7, t_hour#8] +Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 8)) AND (t_hour#8 <= 9)) AND isnotnull(t_time_sk#7)) + +(14) Project [codegen id : 2] +Output [1]: [t_time_sk#7] +Input [2]: [t_time_sk#7, t_hour#8] + +(15) BroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_sold_time_sk#1] +Right keys [1]: [t_time_sk#7] +Join condition: None + +(17) Project [codegen id : 4] +Output [1]: [ws_web_page_sk#3] +Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] + +(18) Scan parquet default.web_page +Output [2]: [wp_web_page_sk#10, wp_char_count#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_page] +PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,5200), IsNotNull(wp_web_page_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [wp_web_page_sk#10, wp_char_count#11] + +(20) Filter [codegen id : 3] +Input [2]: [wp_web_page_sk#10, wp_char_count#11] +Condition : (((isnotnull(wp_char_count#11) AND (wp_char_count#11 >= 5000)) AND (wp_char_count#11 <= 5200)) AND isnotnull(wp_web_page_sk#10)) + +(21) Project [codegen id : 3] +Output [1]: [wp_web_page_sk#10] +Input [2]: [wp_web_page_sk#10, wp_char_count#11] + +(22) BroadcastExchange +Input [1]: [wp_web_page_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_web_page_sk#3] +Right keys [1]: [wp_web_page_sk#10] +Join condition: None + +(24) Project [codegen id : 4] +Output: [] +Input [2]: [ws_web_page_sk#3, wp_web_page_sk#10] + +(25) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#13] +Results [1]: [count#14] + +(26) Exchange +Input [1]: [count#14] +Arguments: SinglePartition, true, [id=#15] + +(27) HashAggregate [codegen id : 5] +Input [1]: [count#14] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#16] +Results [1]: [count(1)#16 AS amc#17] + +(28) Scan parquet default.web_sales +Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 9] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] + +(30) Filter [codegen id : 9] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3] +Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_web_page_sk#3)) + +(31) ReusedExchange [Reuses operator id: 8] +Output [1]: [hd_demo_sk#4] + +(32) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_ship_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(33) Project [codegen id : 9] +Output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] +Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#4] + +(34) Scan parquet default.time_dim +Output [2]: [t_time_sk#7, t_hour#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)] +ReadSchema: struct + +(35) ColumnarToRow [codegen id : 7] +Input [2]: [t_time_sk#7, t_hour#8] + +(36) Filter [codegen id : 7] +Input [2]: [t_time_sk#7, t_hour#8] +Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 19)) AND (t_hour#8 <= 20)) AND isnotnull(t_time_sk#7)) + +(37) Project [codegen id : 7] +Output [1]: [t_time_sk#7] +Input [2]: [t_time_sk#7, t_hour#8] + +(38) BroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(39) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_sold_time_sk#1] +Right keys [1]: [t_time_sk#7] +Join condition: None + +(40) Project [codegen id : 9] +Output [1]: [ws_web_page_sk#3] +Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] + +(41) ReusedExchange [Reuses operator id: 22] +Output [1]: [wp_web_page_sk#10] + +(42) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_web_page_sk#3] +Right keys [1]: [wp_web_page_sk#10] +Join condition: None + +(43) Project [codegen id : 9] +Output: [] +Input [2]: [ws_web_page_sk#3, wp_web_page_sk#10] + +(44) HashAggregate [codegen id : 9] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#19] +Results [1]: [count#20] + +(45) Exchange +Input [1]: [count#20] +Arguments: SinglePartition, true, [id=#21] + +(46) HashAggregate [codegen id : 10] +Input [1]: [count#20] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#22] +Results [1]: [count(1)#22 AS pmc#23] + +(47) BroadcastExchange +Input [1]: [pmc#23] +Arguments: IdentityBroadcastMode, [id=#24] + +(48) BroadcastNestedLoopJoin +Join condition: None + +(49) Project [codegen id : 11] +Output [1]: [CheckOverflow((promote_precision(cast(amc#17 as decimal(15,4))) / promote_precision(cast(pmc#23 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#25] +Input [2]: [amc#17, pmc#23] + +(50) TakeOrderedAndProject +Input [1]: [am_pm_ratio#25] +Arguments: 100, [am_pm_ratio#25 ASC NULLS FIRST], [am_pm_ratio#25] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt index 1d0dc7ad2..121d84d9d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q90/simplified.txt @@ -1,64 +1,74 @@ TakeOrderedAndProject [am_pm_ratio] - WholeStageCodegen + WholeStageCodegen (11) Project [amc,pmc] InputAdapter BroadcastNestedLoopJoin - WholeStageCodegen - HashAggregate [count,count(1)] [amc,count,count(1)] + WholeStageCodegen (5) + HashAggregate [count] [count(1),amc,count] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (4) + HashAggregate [count,count] Project - BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] Project [ws_web_page_sk] - BroadcastHashJoin [t_time_sk,ws_sold_time_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] Project [ws_sold_time_sk,ws_web_page_sk] - BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] - Project [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] + Filter [hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [t_time_sk] Filter [t_hour,t_time_sk] - Scan parquet default.time_dim [t_hour,t_time_sk] [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [wp_web_page_sk] Filter [wp_char_count,wp_web_page_sk] - Scan parquet default.web_page [wp_char_count,wp_web_page_sk] [wp_char_count,wp_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_page [wp_web_page_sk,wp_char_count] BroadcastExchange #5 - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),pmc] + WholeStageCodegen (10) + HashAggregate [count] [count(1),pmc,count] InputAdapter Exchange #6 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (9) + HashAggregate [count,count] Project - BroadcastHashJoin [wp_web_page_sk,ws_web_page_sk] + BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk] Project [ws_web_page_sk] - BroadcastHashJoin [t_time_sk,ws_sold_time_sk] + BroadcastHashJoin [ws_sold_time_sk,t_time_sk] Project [ws_sold_time_sk,ws_web_page_sk] - BroadcastHashJoin [hd_demo_sk,ws_ship_hdemo_sk] - Project [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] - Scan parquet default.web_sales [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk] + Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk] InputAdapter - ReusedExchange [hd_demo_sk] [hd_demo_sk] #2 + ReusedExchange [hd_demo_sk] #2 InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (7) Project [t_time_sk] Filter [t_hour,t_time_sk] - Scan parquet default.time_dim [t_hour,t_time_sk] [t_hour,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour] InputAdapter - ReusedExchange [wp_web_page_sk] [wp_web_page_sk] #4 + ReusedExchange [wp_web_page_sk] #4 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt index 66e681cea..1956baf78 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/explain.txt @@ -1,45 +1,264 @@ == Physical Plan == -*(9) Sort [Returns_Loss#1 DESC NULLS LAST], true, 0 -+- Exchange rangepartitioning(Returns_Loss#1 DESC NULLS LAST, 5) - +- *(8) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[sum(UnscaledValue(cr_net_loss#7))]) - +- Exchange hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6, 5) - +- *(7) HashAggregate(keys=[cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#5, cd_education_status#6], functions=[partial_sum(UnscaledValue(cr_net_loss#7))]) - +- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#5, cd_education_status#6] - +- *(7) BroadcastHashJoin [c_current_hdemo_sk#8], [hd_demo_sk#9], Inner, BuildRight - :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#8, cd_marital_status#5, cd_education_status#6] - : +- *(7) BroadcastHashJoin [c_current_cdemo_sk#10], [cd_demo_sk#11], Inner, BuildRight - : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#10, c_current_hdemo_sk#8] - : : +- *(7) BroadcastHashJoin [c_current_addr_sk#12], [ca_address_sk#13], Inner, BuildRight - : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#10, c_current_hdemo_sk#8, c_current_addr_sk#12] - : : : +- *(7) BroadcastHashJoin [cr_returning_customer_sk#14], [c_customer_sk#15], Inner, BuildRight - : : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#14, cr_net_loss#7] - : : : : +- *(7) BroadcastHashJoin [cr_returned_date_sk#16], [d_date_sk#17], Inner, BuildRight - : : : : :- *(7) Project [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#16, cr_returning_customer_sk#14, cr_net_loss#7] - : : : : : +- *(7) BroadcastHashJoin [cc_call_center_sk#18], [cr_call_center_sk#19], Inner, BuildRight - : : : : : :- *(7) Project [cc_call_center_sk#18, cc_call_center_id#2, cc_name#3, cc_manager#4] - : : : : : : +- *(7) Filter isnotnull(cc_call_center_sk#18) - : : : : : : +- *(7) FileScan parquet default.call_center[cc_call_center_sk#18,cc_call_center_id#2,cc_name#3,cc_manager#4] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct - : : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))) - : : : : : +- *(1) Project [cr_returned_date_sk#16, cr_returning_customer_sk#14, cr_call_center_sk#19, cr_net_loss#7] - : : : : : +- *(1) Filter ((isnotnull(cr_call_center_sk#19) && isnotnull(cr_returned_date_sk#16)) && isnotnull(cr_returning_customer_sk#14)) - : : : : : +- *(1) FileScan parquet default.catalog_returns[cr_returned_date_sk#16,cr_returning_customer_sk#14,cr_call_center_sk#19,cr_net_loss#7] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_returns], PartitionFilters: [], PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(3) Project [c_customer_sk#15, c_current_cdemo_sk#10, c_current_hdemo_sk#8, c_current_addr_sk#12] - : : : +- *(3) Filter (((isnotnull(c_customer_sk#15) && isnotnull(c_current_addr_sk#12)) && isnotnull(c_current_cdemo_sk#10)) && isnotnull(c_current_hdemo_sk#8)) - : : : +- *(3) FileScan parquet default.customer[c_customer_sk#15,c_current_cdemo_sk#10,c_current_hdemo_sk#8,c_current_addr_sk#12] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer], PartitionFilters: [], PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(4) Project [ca_address_sk#13] - : : +- *(4) Filter ((isnotnull(ca_gmt_offset#22) && (ca_gmt_offset#22 = -7.00)) && isnotnull(ca_address_sk#13)) - : : +- *(4) FileScan parquet default.customer_address[ca_address_sk#13,ca_gmt_offset#22] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(5) Project [cd_demo_sk#11, cd_marital_status#5, cd_education_status#6] - : +- *(5) Filter ((((cd_marital_status#5 = M) && (cd_education_status#6 = Unknown)) || ((cd_marital_status#5 = W) && (cd_education_status#6 = Advanced Degree))) && isnotnull(cd_demo_sk#11)) - : +- *(5) FileScan parquet default.customer_demographics[cd_demo_sk#11,cd_marital_status#5,cd_education_status#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_demographics], PartitionFilters: [], PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(6) Project [hd_demo_sk#9] - +- *(6) Filter ((isnotnull(hd_buy_potential#23) && StartsWith(hd_buy_potential#23, Unknown)) && isnotnull(hd_demo_sk#9)) - +- *(6) FileScan parquet default.household_demographics[hd_demo_sk#9,hd_buy_potential#23] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)], ReadSchema: struct \ No newline at end of file +* Sort (47) ++- Exchange (46) + +- * HashAggregate (45) + +- Exchange (44) + +- * HashAggregate (43) + +- * Project (42) + +- * BroadcastHashJoin Inner BuildRight (41) + :- * Project (35) + : +- * BroadcastHashJoin Inner BuildRight (34) + : :- * Project (29) + : : +- * BroadcastHashJoin Inner BuildRight (28) + : : :- * Project (22) + : : : +- * BroadcastHashJoin Inner BuildRight (21) + : : : :- * Project (16) + : : : : +- * BroadcastHashJoin Inner BuildRight (15) + : : : : :- * Project (9) + : : : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : : : :- * Filter (3) + : : : : : : +- * ColumnarToRow (2) + : : : : : : +- Scan parquet default.call_center (1) + : : : : : +- BroadcastExchange (7) + : : : : : +- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.catalog_returns (4) + : : : : +- BroadcastExchange (14) + : : : : +- * Project (13) + : : : : +- * Filter (12) + : : : : +- * ColumnarToRow (11) + : : : : +- Scan parquet default.date_dim (10) + : : : +- BroadcastExchange (20) + : : : +- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.customer (17) + : : +- BroadcastExchange (27) + : : +- * Project (26) + : : +- * Filter (25) + : : +- * ColumnarToRow (24) + : : +- Scan parquet default.customer_address (23) + : +- BroadcastExchange (33) + : +- * Filter (32) + : +- * ColumnarToRow (31) + : +- Scan parquet default.customer_demographics (30) + +- BroadcastExchange (40) + +- * Project (39) + +- * Filter (38) + +- * ColumnarToRow (37) + +- Scan parquet default.household_demographics (36) + + +(1) Scan parquet default.call_center +Output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 7] +Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] + +(3) Filter [codegen id : 7] +Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4] +Condition : isnotnull(cc_call_center_sk#1) + +(4) Scan parquet default.catalog_returns +Output [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_returns] +PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] + +(6) Filter [codegen id : 1] +Input [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] +Condition : ((isnotnull(cr_call_center_sk#7) AND isnotnull(cr_returned_date_sk#5)) AND isnotnull(cr_returning_customer_sk#6)) + +(7) BroadcastExchange +Input [4]: [cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#9] + +(8) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cc_call_center_sk#1] +Right keys [1]: [cr_call_center_sk#7] +Join condition: None + +(9) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_net_loss#8] +Input [8]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_call_center_sk#7, cr_net_loss#8] + +(10) Scan parquet default.date_dim +Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(12) Filter [codegen id : 2] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#12)) AND (d_year#11 = 1998)) AND (d_moy#12 = 11)) AND isnotnull(d_date_sk#10)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#11, d_moy#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cr_returned_date_sk#5] +Right keys [1]: [d_date_sk#10] +Join condition: None + +(16) Project [codegen id : 7] +Output [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#6, cr_net_loss#8] +Input [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returned_date_sk#5, cr_returning_customer_sk#6, cr_net_loss#8, d_date_sk#10] + +(17) Scan parquet default.customer +Output [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer] +PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] +ReadSchema: struct + +(18) ColumnarToRow [codegen id : 3] +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] + +(19) Filter [codegen id : 3] +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] +Condition : (((isnotnull(c_customer_sk#14) AND isnotnull(c_current_addr_sk#17)) AND isnotnull(c_current_cdemo_sk#15)) AND isnotnull(c_current_hdemo_sk#16)) + +(20) BroadcastExchange +Input [4]: [c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] + +(21) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [cr_returning_customer_sk#6] +Right keys [1]: [c_customer_sk#14] +Join condition: None + +(22) Project [codegen id : 7] +Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#6, cr_net_loss#8, c_customer_sk#14, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17] + +(23) Scan parquet default.customer_address +Output [2]: [ca_address_sk#19, ca_gmt_offset#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(24) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#19, ca_gmt_offset#20] + +(25) Filter [codegen id : 4] +Input [2]: [ca_address_sk#19, ca_gmt_offset#20] +Condition : ((isnotnull(ca_gmt_offset#20) AND (ca_gmt_offset#20 = -7.00)) AND isnotnull(ca_address_sk#19)) + +(26) Project [codegen id : 4] +Output [1]: [ca_address_sk#19] +Input [2]: [ca_address_sk#19, ca_gmt_offset#20] + +(27) BroadcastExchange +Input [1]: [ca_address_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(28) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#19] +Join condition: None + +(29) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16] +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16, c_current_addr_sk#17, ca_address_sk#19] + +(30) Scan parquet default.customer_demographics +Output [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_demographics] +PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)] +ReadSchema: struct + +(31) ColumnarToRow [codegen id : 5] +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] + +(32) Filter [codegen id : 5] +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Condition : ((((cd_marital_status#23 = M) AND (cd_education_status#24 = Unknown)) OR ((cd_marital_status#23 = W) AND (cd_education_status#24 = Advanced Degree))) AND isnotnull(cd_demo_sk#22)) + +(33) BroadcastExchange +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] + +(34) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_cdemo_sk#15] +Right keys [1]: [cd_demo_sk#22] +Join condition: None + +(35) Project [codegen id : 7] +Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_hdemo_sk#16, cd_marital_status#23, cd_education_status#24] +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_cdemo_sk#15, c_current_hdemo_sk#16, cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] + +(36) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#26, hd_buy_potential#27] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 6] +Input [2]: [hd_demo_sk#26, hd_buy_potential#27] + +(38) Filter [codegen id : 6] +Input [2]: [hd_demo_sk#26, hd_buy_potential#27] +Condition : ((isnotnull(hd_buy_potential#27) AND StartsWith(hd_buy_potential#27, Unknown)) AND isnotnull(hd_demo_sk#26)) + +(39) Project [codegen id : 6] +Output [1]: [hd_demo_sk#26] +Input [2]: [hd_demo_sk#26, hd_buy_potential#27] + +(40) BroadcastExchange +Input [1]: [hd_demo_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] + +(41) BroadcastHashJoin [codegen id : 7] +Left keys [1]: [c_current_hdemo_sk#16] +Right keys [1]: [hd_demo_sk#26] +Join condition: None + +(42) Project [codegen id : 7] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, cd_marital_status#23, cd_education_status#24] +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, c_current_hdemo_sk#16, cd_marital_status#23, cd_education_status#24, hd_demo_sk#26] + +(43) HashAggregate [codegen id : 7] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#8, cd_marital_status#23, cd_education_status#24] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24] +Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#8))] +Aggregate Attributes [1]: [sum#29] +Results [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, sum#30] + +(44) Exchange +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, sum#30] +Arguments: hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, 5), true, [id=#31] + +(45) HashAggregate [codegen id : 8] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24, sum#30] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#23, cd_education_status#24] +Functions [1]: [sum(UnscaledValue(cr_net_loss#8))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#8))#32] +Results [4]: [cc_call_center_id#2 AS Call_Center#33, cc_name#3 AS Call_Center_Name#34, cc_manager#4 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#8))#32,17,2) AS Returns_Loss#36] + +(46) Exchange +Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36] +Arguments: rangepartitioning(Returns_Loss#36 DESC NULLS LAST, 5), true, [id=#37] + +(47) Sort [codegen id : 9] +Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36] +Arguments: [Returns_Loss#36 DESC NULLS LAST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt index 1030b9095..58ebe15d1 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q91/simplified.txt @@ -1,61 +1,71 @@ -WholeStageCodegen +WholeStageCodegen (9) Sort [Returns_Loss] InputAdapter Exchange [Returns_Loss] #1 - WholeStageCodegen - HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,sum,sum(UnscaledValue(cr_net_loss))] [Call_Center,Call_Center_Name,Manager,Returns_Loss,sum,sum(UnscaledValue(cr_net_loss))] + WholeStageCodegen (8) + HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,sum] [sum(UnscaledValue(cr_net_loss)),Call_Center,Call_Center_Name,Manager,Returns_Loss,sum] InputAdapter - Exchange [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status] #2 - WholeStageCodegen - HashAggregate [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss,sum,sum] [sum,sum] - Project [cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] + Exchange [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status] #2 + WholeStageCodegen (7) + HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,cr_net_loss] [sum,sum] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,cd_marital_status,cd_education_status] BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk] - Project [c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cd_education_status,cd_marital_status,cr_net_loss] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_hdemo_sk,cd_marital_status,cd_education_status] BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk] - Project [c_current_cdemo_sk,c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cr_net_loss] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk] BroadcastHashJoin [c_current_addr_sk,ca_address_sk] - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,cc_call_center_id,cc_manager,cc_name,cr_net_loss] - BroadcastHashJoin [c_customer_sk,cr_returning_customer_sk] - Project [cc_call_center_id,cc_manager,cc_name,cr_net_loss,cr_returning_customer_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] + BroadcastHashJoin [cr_returning_customer_sk,c_customer_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_returning_customer_sk,cr_net_loss] BroadcastHashJoin [cr_returned_date_sk,d_date_sk] - Project [cc_call_center_id,cc_manager,cc_name,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] + Project [cc_call_center_id,cc_name,cc_manager,cr_returned_date_sk,cr_returning_customer_sk,cr_net_loss] BroadcastHashJoin [cc_call_center_sk,cr_call_center_sk] - Project [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] - Filter [cc_call_center_sk] - Scan parquet default.call_center [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] [cc_call_center_id,cc_call_center_sk,cc_manager,cc_name] + Filter [cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] - Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] - Scan parquet default.catalog_returns [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] [cr_call_center_sk,cr_net_loss,cr_returned_date_sk,cr_returning_customer_sk] + WholeStageCodegen (1) + Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] - Filter [d_date_sk,d_moy,d_year] - Scan parquet default.date_dim [d_date_sk,d_moy,d_year] [d_date_sk,d_moy,d_year] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] InputAdapter BroadcastExchange #5 - WholeStageCodegen - Project [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] - Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] - Scan parquet default.customer [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_customer_sk] + WholeStageCodegen (3) + Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (4) Project [ca_address_sk] - Filter [ca_address_sk,ca_gmt_offset] - Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] [ca_address_sk,ca_gmt_offset] + Filter [ca_gmt_offset,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset] InputAdapter BroadcastExchange #7 - WholeStageCodegen - Project [cd_demo_sk,cd_education_status,cd_marital_status] - Filter [cd_demo_sk,cd_education_status,cd_marital_status] - Scan parquet default.customer_demographics [cd_demo_sk,cd_education_status,cd_marital_status] [cd_demo_sk,cd_education_status,cd_marital_status] + WholeStageCodegen (5) + Filter [cd_marital_status,cd_education_status,cd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] InputAdapter BroadcastExchange #8 - WholeStageCodegen + WholeStageCodegen (6) Project [hd_demo_sk] Filter [hd_buy_potential,hd_demo_sk] - Scan parquet default.household_demographics [hd_buy_potential,hd_demo_sk] [hd_buy_potential,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt index 8a696f0e5..8a441392f 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/explain.txt @@ -1,33 +1,196 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[Excess Discount Amount #1 ASC NULLS FIRST], output=[Excess Discount Amount #1]) -+- *(7) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_discount_amt#2))]) - +- Exchange SinglePartition - +- *(6) HashAggregate(keys=[], functions=[partial_sum(UnscaledValue(ws_ext_discount_amt#2))]) - +- *(6) Project [ws_ext_discount_amt#2] - +- *(6) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight - :- *(6) Project [ws_sold_date_sk#3, ws_ext_discount_amt#2] - : +- *(6) BroadcastHashJoin [i_item_sk#5], [ws_item_sk#6#7], Inner, BuildRight, (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) - : :- *(6) Project [ws_sold_date_sk#3, ws_ext_discount_amt#2, i_item_sk#5] - : : +- *(6) BroadcastHashJoin [ws_item_sk#6], [i_item_sk#5], Inner, BuildRight - : : :- *(6) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] - : : : +- *(6) Filter ((isnotnull(ws_item_sk#6) && isnotnull(ws_ext_discount_amt#2)) && isnotnull(ws_sold_date_sk#3)) - : : : +- *(6) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt), IsNotNull(ws_sold_date_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [i_item_sk#5] - : : +- *(1) Filter ((isnotnull(i_manufact_id#9) && (i_manufact_id#9 = 350)) && isnotnull(i_item_sk#5)) - : : +- *(1) FileScan parquet default.item[i_item_sk#5,i_manufact_id#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : +- *(4) Filter isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#8) - : +- *(4) HashAggregate(keys=[ws_item_sk#6], functions=[avg(UnscaledValue(ws_ext_discount_amt#2))]) - : +- Exchange hashpartitioning(ws_item_sk#6, 5) - : +- *(3) HashAggregate(keys=[ws_item_sk#6], functions=[partial_avg(UnscaledValue(ws_ext_discount_amt#2))]) - : +- *(3) Project [ws_item_sk#6, ws_ext_discount_amt#2] - : +- *(3) BroadcastHashJoin [ws_sold_date_sk#3], [d_date_sk#4], Inner, BuildRight - : :- *(3) Project [ws_sold_date_sk#3, ws_item_sk#6, ws_ext_discount_amt#2] - : : +- *(3) Filter (isnotnull(ws_sold_date_sk#3) && isnotnull(ws_item_sk#6)) - : : +- *(3) FileScan parquet default.web_sales[ws_sold_date_sk#3,ws_item_sk#6,ws_ext_discount_amt#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [d_date_sk#4] - : +- *(2) Filter (((isnotnull(d_date#10) && (cast(d_date#10 as string) >= 2000-01-27)) && (d_date#10 <= 11073)) && isnotnull(d_date_sk#4)) - : +- *(2) FileScan parquet default.date_dim[d_date_sk#4,d_date#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#4], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +* Sort (34) ++- * HashAggregate (33) + +- Exchange (32) + +- * HashAggregate (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.web_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.item (4) + : +- BroadcastExchange (25) + : +- * Filter (24) + : +- * HashAggregate (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- * Project (20) + : +- * BroadcastHashJoin Inner BuildRight (19) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.web_sales (11) + : +- BroadcastExchange (18) + : +- * Project (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet default.date_dim (14) + +- ReusedExchange (28) + + +(1) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt), IsNotNull(ws_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] + +(3) Filter [codegen id : 6] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] +Condition : ((isnotnull(ws_item_sk#2) AND isnotnull(ws_ext_discount_amt#3)) AND isnotnull(ws_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [2]: [i_item_sk#4, i_manufact_id#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(6) Filter [codegen id : 1] +Input [2]: [i_item_sk#4, i_manufact_id#5] +Condition : ((isnotnull(i_manufact_id#5) AND (i_manufact_id#5 = 350)) AND isnotnull(i_item_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [i_item_sk#4] +Input [2]: [i_item_sk#4, i_manufact_id#5] + +(8) BroadcastExchange +Input [1]: [i_item_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(10) Project [codegen id : 6] +Output [3]: [ws_sold_date_sk#1, ws_ext_discount_amt#3, i_item_sk#4] +Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3, i_item_sk#4] + +(11) Scan parquet default.web_sales +Output [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_sold_date_sk), IsNotNull(ws_item_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] + +(13) Filter [codegen id : 3] +Input [3]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3] +Condition : (isnotnull(ws_sold_date_sk#1) AND isnotnull(ws_item_sk#2)) + +(14) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#8] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] + +(16) Filter [codegen id : 2] +Input [2]: [d_date_sk#7, d_date#8] +Condition : (((isnotnull(d_date#8) AND (d_date#8 >= 10983)) AND (d_date#8 <= 11073)) AND isnotnull(d_date_sk#7)) + +(17) Project [codegen id : 2] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#8] + +(18) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] + +(19) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(20) Project [codegen id : 3] +Output [2]: [ws_item_sk#2, ws_ext_discount_amt#3] +Input [4]: [ws_sold_date_sk#1, ws_item_sk#2, ws_ext_discount_amt#3, d_date_sk#7] + +(21) HashAggregate [codegen id : 3] +Input [2]: [ws_item_sk#2, ws_ext_discount_amt#3] +Keys [1]: [ws_item_sk#2] +Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#3))] +Aggregate Attributes [2]: [sum#10, count#11] +Results [3]: [ws_item_sk#2, sum#12, count#13] + +(22) Exchange +Input [3]: [ws_item_sk#2, sum#12, count#13] +Arguments: hashpartitioning(ws_item_sk#2, 5), true, [id=#14] + +(23) HashAggregate [codegen id : 4] +Input [3]: [ws_item_sk#2, sum#12, count#13] +Keys [1]: [ws_item_sk#2] +Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#3))] +Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#3))#15] +Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#3))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7), true) AS (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2 AS ws_item_sk#2#17] + +(24) Filter [codegen id : 4] +Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2#17] +Condition : isnotnull((CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(25) BroadcastExchange +Input [2]: [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#18] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [i_item_sk#4] +Right keys [1]: [ws_item_sk#2#17] +Join condition: (cast(ws_ext_discount_amt#3 as decimal(14,7)) > (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16) + +(27) Project [codegen id : 6] +Output [2]: [ws_sold_date_sk#1, ws_ext_discount_amt#3] +Input [5]: [ws_sold_date_sk#1, ws_ext_discount_amt#3, i_item_sk#4, (CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))#16, ws_item_sk#2#17] + +(28) ReusedExchange [Reuses operator id: 18] +Output [1]: [d_date_sk#7] + +(29) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_sold_date_sk#1] +Right keys [1]: [d_date_sk#7] +Join condition: None + +(30) Project [codegen id : 6] +Output [1]: [ws_ext_discount_amt#3] +Input [3]: [ws_sold_date_sk#1, ws_ext_discount_amt#3, d_date_sk#7] + +(31) HashAggregate [codegen id : 6] +Input [1]: [ws_ext_discount_amt#3] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#3))] +Aggregate Attributes [1]: [sum#19] +Results [1]: [sum#20] + +(32) Exchange +Input [1]: [sum#20] +Arguments: SinglePartition, true, [id=#21] + +(33) HashAggregate [codegen id : 7] +Input [1]: [sum#20] +Keys: [] +Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))#22] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#3))#22,17,2) AS Excess Discount Amount #23] + +(34) Sort [codegen id : 7] +Input [1]: [Excess Discount Amount #23] +Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt index cab3234b8..1f24a7c96 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q92/simplified.txt @@ -1,44 +1,50 @@ -TakeOrderedAndProject [Excess Discount Amount ] - WholeStageCodegen - HashAggregate [sum,sum(UnscaledValue(ws_ext_discount_amt))] [Excess Discount Amount ,sum,sum(UnscaledValue(ws_ext_discount_amt))] +WholeStageCodegen (7) + Sort [Excess Discount Amount ] + HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [sum,sum,ws_ext_discount_amt] [sum,sum] + WholeStageCodegen (6) + HashAggregate [ws_ext_discount_amt] [sum,sum] Project [ws_ext_discount_amt] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_ext_discount_amt,ws_sold_date_sk] - BroadcastHashJoin [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),i_item_sk,ws_ext_discount_amt,ws_item_sk] - Project [i_item_sk,ws_ext_discount_amt,ws_sold_date_sk] - BroadcastHashJoin [i_item_sk,ws_item_sk] - Project [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] - Filter [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_ext_discount_amt] + BroadcastHashJoin [i_item_sk,ws_item_sk,ws_ext_discount_amt,(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] + Project [ws_sold_date_sk,ws_ext_discount_amt,i_item_sk] + BroadcastHashJoin [ws_item_sk,i_item_sk] + Filter [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [i_item_sk] - Filter [i_item_sk,i_manufact_id] - Scan parquet default.item [i_item_sk,i_manufact_id] [i_item_sk,i_manufact_id] + Filter [i_manufact_id,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_manufact_id] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (4) Filter [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6)))] - HashAggregate [avg(UnscaledValue(ws_ext_discount_amt)),count,sum,ws_item_sk] [(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),avg(UnscaledValue(ws_ext_discount_amt)),count,sum,ws_item_sk] + HashAggregate [ws_item_sk,sum,count] [avg(UnscaledValue(ws_ext_discount_amt)),(CAST(1.3 AS DECIMAL(11,6)) * CAST(avg(ws_ext_discount_amt) AS DECIMAL(11,6))),ws_item_sk,sum,count] InputAdapter Exchange [ws_item_sk] #4 - WholeStageCodegen - HashAggregate [count,count,sum,sum,ws_ext_discount_amt,ws_item_sk] [count,count,sum,sum] - Project [ws_ext_discount_amt,ws_item_sk] - BroadcastHashJoin [d_date_sk,ws_sold_date_sk] - Project [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] - Filter [ws_item_sk,ws_sold_date_sk] - Scan parquet default.web_sales [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] [ws_ext_discount_amt,ws_item_sk,ws_sold_date_sk] + WholeStageCodegen (3) + HashAggregate [ws_item_sk,ws_ext_discount_amt] [sum,count,sum,count] + Project [ws_item_sk,ws_ext_discount_amt] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_ext_discount_amt] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #5 + ReusedExchange [d_date_sk] #5 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt index 9da0e4c24..620aa6727 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/explain.txt @@ -1,18 +1,111 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[sumsales#1 ASC NULLS FIRST,ss_customer_sk#2 ASC NULLS FIRST], output=[ss_customer_sk#2,sumsales#1]) -+- *(4) HashAggregate(keys=[ss_customer_sk#2], functions=[sum(act_sales#3)]) - +- Exchange hashpartitioning(ss_customer_sk#2, 5) - +- *(3) HashAggregate(keys=[ss_customer_sk#2], functions=[partial_sum(act_sales#3)]) - +- *(3) Project [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#5 - sr_return_quantity#4) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#5 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#6 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#3] - +- *(3) BroadcastHashJoin [sr_reason_sk#7], [cast(r_reason_sk#8 as bigint)], Inner, BuildRight - :- *(3) Project [ss_customer_sk#2, ss_quantity#5, ss_sales_price#6, sr_reason_sk#7, sr_return_quantity#4] - : +- *(3) BroadcastHashJoin [cast(ss_item_sk#9 as bigint), cast(ss_ticket_number#10 as bigint)], [sr_item_sk#11, sr_ticket_number#12], Inner, BuildRight - : :- *(3) FileScan parquet default.store_sales[ss_item_sk#9,ss_customer_sk#2,ss_ticket_number#10,ss_quantity#5,ss_sales_price#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(2) Project [r_reason_sk#8] - +- *(2) Filter ((isnotnull(r_reason_desc#13) && (r_reason_desc#13 = reason 28)) && isnotnull(r_reason_sk#8)) - +- *(2) FileScan parquet default.reason[r_reason_sk#8,r_reason_desc#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/reason], PartitionFilters: [], PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28), IsNotNull(r_reason_sk)], ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (19) ++- * HashAggregate (18) + +- Exchange (17) + +- * HashAggregate (16) + +- * Project (15) + +- * BroadcastHashJoin Inner BuildRight (14) + :- * Project (8) + : +- * BroadcastHashJoin Inner BuildRight (7) + : :- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (6) + : +- * Filter (5) + : +- * ColumnarToRow (4) + : +- Scan parquet default.store_returns (3) + +- BroadcastExchange (13) + +- * Project (12) + +- * Filter (11) + +- * ColumnarToRow (10) + +- Scan parquet default.reason (9) + + +(1) Scan parquet default.store_sales +Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] + +(3) Scan parquet default.store_returns +Output [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_returns] +PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_reason_sk)] +ReadSchema: struct + +(4) ColumnarToRow [codegen id : 1] +Input [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] + +(5) Filter [codegen id : 1] +Input [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] +Condition : ((isnotnull(sr_item_sk#6) AND isnotnull(sr_ticket_number#8)) AND isnotnull(sr_reason_sk#7)) + +(6) BroadcastExchange +Input [4]: [sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[2, bigint, false]),false), [id=#10] + +(7) BroadcastHashJoin [codegen id : 3] +Left keys [2]: [cast(ss_item_sk#1 as bigint), cast(ss_ticket_number#3 as bigint)] +Right keys [2]: [sr_item_sk#6, sr_ticket_number#8] +Join condition: None + +(8) Project [codegen id : 3] +Output [5]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#7, sr_return_quantity#9] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, sr_item_sk#6, sr_reason_sk#7, sr_ticket_number#8, sr_return_quantity#9] + +(9) Scan parquet default.reason +Output [2]: [r_reason_sk#11, r_reason_desc#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/reason] +PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28), IsNotNull(r_reason_sk)] +ReadSchema: struct + +(10) ColumnarToRow [codegen id : 2] +Input [2]: [r_reason_sk#11, r_reason_desc#12] + +(11) Filter [codegen id : 2] +Input [2]: [r_reason_sk#11, r_reason_desc#12] +Condition : ((isnotnull(r_reason_desc#12) AND (r_reason_desc#12 = reason 28)) AND isnotnull(r_reason_sk#11)) + +(12) Project [codegen id : 2] +Output [1]: [r_reason_sk#11] +Input [2]: [r_reason_sk#11, r_reason_desc#12] + +(13) BroadcastExchange +Input [1]: [r_reason_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(14) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [sr_reason_sk#7] +Right keys [1]: [cast(r_reason_sk#11 as bigint)] +Join condition: None + +(15) Project [codegen id : 3] +Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#9) THEN CheckOverflow((promote_precision(cast(cast((ss_quantity#4 - sr_return_quantity#9) as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) ELSE CheckOverflow((promote_precision(cast(cast(ss_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2), true) END AS act_sales#14] +Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#7, sr_return_quantity#9, r_reason_sk#11] + +(16) HashAggregate [codegen id : 3] +Input [2]: [ss_customer_sk#2, act_sales#14] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [partial_sum(act_sales#14)] +Aggregate Attributes [2]: [sum#15, isEmpty#16] +Results [3]: [ss_customer_sk#2, sum#17, isEmpty#18] + +(17) Exchange +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] +Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#19] + +(18) HashAggregate [codegen id : 4] +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] +Keys [1]: [ss_customer_sk#2] +Functions [1]: [sum(act_sales#14)] +Aggregate Attributes [1]: [sum(act_sales#14)#20] +Results [2]: [ss_customer_sk#2, sum(act_sales#14)#20 AS sumsales#21] + +(19) TakeOrderedAndProject +Input [2]: [ss_customer_sk#2, sumsales#21] +Arguments: 100, [sumsales#21 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], [ss_customer_sk#2, sumsales#21] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt index 6b2505ff0..81de31a44 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q93/simplified.txt @@ -1,24 +1,29 @@ -TakeOrderedAndProject [ss_customer_sk,sumsales] - WholeStageCodegen - HashAggregate [ss_customer_sk,sum,sum(act_sales)] [sum,sum(act_sales),sumsales] +TakeOrderedAndProject [sumsales,ss_customer_sk] + WholeStageCodegen (4) + HashAggregate [ss_customer_sk,sum,isEmpty] [sum(act_sales),sumsales,sum,isEmpty] InputAdapter Exchange [ss_customer_sk] #1 - WholeStageCodegen - HashAggregate [act_sales,ss_customer_sk,sum,sum] [sum,sum] - Project [sr_return_quantity,ss_customer_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [r_reason_sk,sr_reason_sk] - Project [sr_reason_sk,sr_return_quantity,ss_customer_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [sr_item_sk,sr_ticket_number,ss_item_sk,ss_ticket_number] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_ticket_number] [ss_customer_sk,ss_item_sk,ss_quantity,ss_sales_price,ss_ticket_number] + WholeStageCodegen (3) + HashAggregate [ss_customer_sk,act_sales] [sum,isEmpty,sum,isEmpty] + Project [ss_customer_sk,sr_return_quantity,ss_quantity,ss_sales_price] + BroadcastHashJoin [sr_reason_sk,r_reason_sk] + Project [ss_customer_sk,ss_quantity,ss_sales_price,sr_reason_sk,sr_return_quantity] + BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] - Filter [sr_item_sk,sr_reason_sk,sr_ticket_number] - Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] [sr_item_sk,sr_reason_sk,sr_return_quantity,sr_ticket_number] + WholeStageCodegen (1) + Filter [sr_item_sk,sr_ticket_number,sr_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [r_reason_sk] Filter [r_reason_desc,r_reason_sk] - Scan parquet default.reason [r_reason_desc,r_reason_sk] [r_reason_desc,r_reason_sk] + ColumnarToRow + InputAdapter + Scan parquet default.reason [r_reason_sk,r_reason_desc] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt index 424585ea5..2abbe4f9b 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/explain.txt @@ -1,37 +1,235 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) -+- *(8) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_ship_cost#4)), sum(UnscaledValue(ws_net_profit#5)), count(distinct ws_order_number#6)]) - +- Exchange SinglePartition - +- *(7) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) - +- *(7) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) - +- Exchange hashpartitioning(ws_order_number#6, 5) - +- *(6) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) - +- *(6) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - +- *(6) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight - :- *(6) Project [ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : +- *(6) BroadcastHashJoin [ws_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight - : :- *(6) Project [ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : : +- *(6) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight - : : :- *(6) BroadcastHashJoin [cast(ws_order_number#6 as bigint)], [wr_order_number#13], LeftAnti, BuildRight - : : : :- *(6) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : : : : +- *(6) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight, NOT (ws_warehouse_sk#15 = ws_warehouse_sk#15#16) - : : : : :- *(6) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_warehouse_sk#15, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : : : : : +- *(6) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) - : : : : : +- *(6) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_warehouse_sk#15,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) - : : : +- *(2) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(3) Project [d_date_sk#12] - : : +- *(3) Filter (((isnotnull(d_date#17) && (cast(d_date#17 as string) >= 1999-02-01)) && (d_date#17 <= 10683)) && isnotnull(d_date_sk#12)) - : : +- *(3) FileScan parquet default.date_dim[d_date_sk#12,d_date#17] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(4) Project [ca_address_sk#10] - : +- *(4) Filter ((isnotnull(ca_state#18) && (ca_state#18 = IL)) && isnotnull(ca_address_sk#10)) - : +- *(4) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(5) Project [web_site_sk#8] - +- *(5) Filter ((isnotnull(web_company_name#19) && (web_company_name#19 = pri)) && isnotnull(web_site_sk#8)) - +- *(5) FileScan parquet default.web_site[web_site_sk#8,web_company_name#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file +* Sort (41) ++- * HashAggregate (40) + +- Exchange (39) + +- * HashAggregate (38) + +- * HashAggregate (37) + +- Exchange (36) + +- * HashAggregate (35) + +- * Project (34) + +- * BroadcastHashJoin Inner BuildRight (33) + :- * Project (27) + : +- * BroadcastHashJoin Inner BuildRight (26) + : :- * Project (20) + : : +- * BroadcastHashJoin Inner BuildRight (19) + : : :- * BroadcastHashJoin LeftAnti BuildRight (13) + : : : :- * Project (9) + : : : : +- * BroadcastHashJoin LeftSemi BuildRight (8) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_sales (1) + : : : : +- BroadcastExchange (7) + : : : : +- * Project (6) + : : : : +- * ColumnarToRow (5) + : : : : +- Scan parquet default.web_sales (4) + : : : +- BroadcastExchange (12) + : : : +- * ColumnarToRow (11) + : : : +- Scan parquet default.web_returns (10) + : : +- BroadcastExchange (18) + : : +- * Project (17) + : : +- * Filter (16) + : : +- * ColumnarToRow (15) + : : +- Scan parquet default.date_dim (14) + : +- BroadcastExchange (25) + : +- * Project (24) + : +- * Filter (23) + : +- * ColumnarToRow (22) + : +- Scan parquet default.customer_address (21) + +- BroadcastExchange (32) + +- * Project (31) + +- * Filter (30) + +- * ColumnarToRow (29) + +- Scan parquet default.web_site (28) + + +(1) Scan parquet default.web_sales +Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 6] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(3) Filter [codegen id : 6] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(4) Scan parquet default.web_sales +Output [2]: [ws_warehouse_sk#4, ws_order_number#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [ws_warehouse_sk#4, ws_order_number#5] + +(6) Project [codegen id : 1] +Output [2]: [ws_warehouse_sk#4 AS ws_warehouse_sk#4#8, ws_order_number#5 AS ws_order_number#5#9] +Input [2]: [ws_warehouse_sk#4, ws_order_number#5] + +(7) BroadcastExchange +Input [2]: [ws_warehouse_sk#4#8, ws_order_number#5#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_order_number#5] +Right keys [1]: [ws_order_number#5#9] +Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#4#8) + +(9) Project [codegen id : 6] +Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] + +(10) Scan parquet default.web_returns +Output [1]: [wr_order_number#11] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [1]: [wr_order_number#11] + +(12) BroadcastExchange +Input [1]: [wr_order_number#11] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#12] + +(13) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [cast(ws_order_number#5 as bigint)] +Right keys [1]: [wr_order_number#11] +Join condition: None + +(14) Scan parquet default.date_dim +Output [2]: [d_date_sk#13, d_date#14] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(15) ColumnarToRow [codegen id : 3] +Input [2]: [d_date_sk#13, d_date#14] + +(16) Filter [codegen id : 3] +Input [2]: [d_date_sk#13, d_date#14] +Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 10623)) AND (d_date#14 <= 10683)) AND isnotnull(d_date_sk#13)) + +(17) Project [codegen id : 3] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_date#14] + +(18) BroadcastExchange +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] + +(19) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#13] +Join condition: None + +(20) Project [codegen id : 6] +Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#13] + +(21) Scan parquet default.customer_address +Output [2]: [ca_address_sk#16, ca_state#17] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(22) ColumnarToRow [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_state#17] + +(23) Filter [codegen id : 4] +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = IL)) AND isnotnull(ca_address_sk#16)) + +(24) Project [codegen id : 4] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_state#17] + +(25) BroadcastExchange +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] + +(26) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#16] +Join condition: None + +(27) Project [codegen id : 6] +Output [4]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#16] + +(28) Scan parquet default.web_site +Output [2]: [web_site_sk#19, web_company_name#20] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)] +ReadSchema: struct + +(29) ColumnarToRow [codegen id : 5] +Input [2]: [web_site_sk#19, web_company_name#20] + +(30) Filter [codegen id : 5] +Input [2]: [web_site_sk#19, web_company_name#20] +Condition : ((isnotnull(web_company_name#20) AND (web_company_name#20 = pri)) AND isnotnull(web_site_sk#19)) + +(31) Project [codegen id : 5] +Output [1]: [web_site_sk#19] +Input [2]: [web_site_sk#19, web_company_name#20] + +(32) BroadcastExchange +Input [1]: [web_site_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] + +(33) BroadcastHashJoin [codegen id : 6] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#19] +Join condition: None + +(34) Project [codegen id : 6] +Output [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Input [5]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#19] + +(35) HashAggregate [codegen id : 6] +Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] +Keys [1]: [ws_order_number#5] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23] +Results [3]: [ws_order_number#5, sum#24, sum#25] + +(36) Exchange +Input [3]: [ws_order_number#5, sum#24, sum#25] +Arguments: hashpartitioning(ws_order_number#5, 5), true, [id=#26] + +(37) HashAggregate [codegen id : 7] +Input [3]: [ws_order_number#5, sum#24, sum#25] +Keys [1]: [ws_order_number#5] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23] +Results [3]: [ws_order_number#5, sum#24, sum#25] + +(38) HashAggregate [codegen id : 7] +Input [3]: [ws_order_number#5, sum#24, sum#25] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27] +Results [3]: [sum#24, sum#25, count#28] + +(39) Exchange +Input [3]: [sum#24, sum#25, count#28] +Arguments: SinglePartition, true, [id=#29] + +(40) HashAggregate [codegen id : 8] +Input [3]: [sum#24, sum#25, count#28] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27] +Results [3]: [count(ws_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #32] + +(41) Sort [codegen id : 8] +Input [3]: [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt index c7a5e5d90..5e7d7db5c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q94/simplified.txt @@ -1,51 +1,62 @@ -TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] - WholeStageCodegen - HashAggregate [count,count(ws_order_number),sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] +WholeStageCodegen (8) + Sort [order count ] + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] - HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + WholeStageCodegen (7) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] InputAdapter Exchange [ws_order_number] #2 - WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] - BroadcastHashJoin [web_site_sk,ws_web_site_sk] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_web_site_sk] - BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_web_site_sk] - BroadcastHashJoin [d_date_sk,ws_ship_date_sk] - BroadcastHashJoin [wr_order_number,ws_order_number] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + WholeStageCodegen (6) + HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] + Project [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + BroadcastHashJoin [ws_order_number,wr_order_number] + Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] - Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_warehouse_sk,ws_web_site_sk] + Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [ws_order_number,ws_warehouse_sk] - Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] + WholeStageCodegen (1) + Project [ws_warehouse_sk,ws_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Scan parquet default.web_returns [wr_order_number] [wr_order_number] + WholeStageCodegen (2) + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_order_number] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (3) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (4) Project [ca_address_sk] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (5) Project [web_site_sk] Filter [web_company_name,web_site_sk] - Scan parquet default.web_site [web_company_name,web_site_sk] [web_company_name,web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_company_name] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt index d25afb9da..1cc99e296 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/explain.txt @@ -1,54 +1,318 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[order count #1 ASC NULLS FIRST], output=[order count #1,total shipping cost #2,total net profit #3]) -+- *(11) HashAggregate(keys=[], functions=[sum(UnscaledValue(ws_ext_ship_cost#4)), sum(UnscaledValue(ws_net_profit#5)), count(distinct ws_order_number#6)]) - +- Exchange SinglePartition - +- *(10) HashAggregate(keys=[], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5)), partial_count(distinct ws_order_number#6)]) - +- *(10) HashAggregate(keys=[ws_order_number#6], functions=[merge_sum(UnscaledValue(ws_ext_ship_cost#4)), merge_sum(UnscaledValue(ws_net_profit#5))]) - +- Exchange hashpartitioning(ws_order_number#6, 5) - +- *(9) HashAggregate(keys=[ws_order_number#6], functions=[partial_sum(UnscaledValue(ws_ext_ship_cost#4)), partial_sum(UnscaledValue(ws_net_profit#5))]) - +- *(9) Project [ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - +- *(9) BroadcastHashJoin [ws_web_site_sk#7], [web_site_sk#8], Inner, BuildRight - :- *(9) Project [ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : +- *(9) BroadcastHashJoin [ws_ship_addr_sk#9], [ca_address_sk#10], Inner, BuildRight - : :- *(9) Project [ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : : +- *(9) BroadcastHashJoin [ws_ship_date_sk#11], [d_date_sk#12], Inner, BuildRight - : : :- *(9) BroadcastHashJoin [cast(ws_order_number#6 as bigint)], [wr_order_number#13], LeftSemi, BuildRight - : : : :- *(9) BroadcastHashJoin [ws_order_number#6], [ws_order_number#6#14], LeftSemi, BuildRight - : : : : :- *(9) Project [ws_ship_date_sk#11, ws_ship_addr_sk#9, ws_web_site_sk#7, ws_order_number#6, ws_ext_ship_cost#4, ws_net_profit#5] - : : : : : +- *(9) Filter ((isnotnull(ws_ship_date_sk#11) && isnotnull(ws_ship_addr_sk#9)) && isnotnull(ws_web_site_sk#7)) - : : : : : +- *(9) FileScan parquet default.web_sales[ws_ship_date_sk#11,ws_ship_addr_sk#9,ws_web_site_sk#7,ws_order_number#6,ws_ext_ship_cost#4,ws_net_profit#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)], ReadSchema: struct - : : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : : : +- *(1) Project [ws_warehouse_sk#17, ws_order_number#15] - : : : : +- *(1) Filter (isnotnull(ws_order_number#15) && isnotnull(ws_warehouse_sk#17)) - : : : : +- *(1) FileScan parquet default.web_sales[ws_warehouse_sk#17,ws_order_number#15] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])) - : : : +- *(5) Project [wr_order_number#13] - : : : +- *(5) BroadcastHashJoin [wr_order_number#13], [cast(ws_order_number#6 as bigint)], Inner, BuildRight - : : : :- *(5) Project [wr_order_number#13] - : : : : +- *(5) Filter isnotnull(wr_order_number#13) - : : : : +- *(5) FileScan parquet default.web_returns[wr_order_number#13] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_returns], PartitionFilters: [], PushedFilters: [IsNotNull(wr_order_number)], ReadSchema: struct - : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : : +- *(4) Project [ws_order_number#6] - : : : +- *(4) BroadcastHashJoin [ws_order_number#6], [ws_order_number#15], Inner, BuildRight, NOT (ws_warehouse_sk#16 = ws_warehouse_sk#17) - : : : :- *(4) Project [ws_warehouse_sk#16, ws_order_number#6] - : : : : +- *(4) Filter (isnotnull(ws_order_number#6) && isnotnull(ws_warehouse_sk#16)) - : : : : +- *(4) FileScan parquet default.web_sales[ws_warehouse_sk#16,ws_order_number#6] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)], ReadSchema: struct - : : : +- ReusedExchange [ws_warehouse_sk#17, ws_order_number#15], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))) - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(6) Project [d_date_sk#12] - : : +- *(6) Filter (((isnotnull(d_date#18) && (cast(d_date#18 as string) >= 1999-02-01)) && (d_date#18 <= 10683)) && isnotnull(d_date_sk#12)) - : : +- *(6) FileScan parquet default.date_dim[d_date_sk#12,d_date#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(7) Project [ca_address_sk#10] - : +- *(7) Filter ((isnotnull(ca_state#19) && (ca_state#19 = IL)) && isnotnull(ca_address_sk#10)) - : +- *(7) FileScan parquet default.customer_address[ca_address_sk#10,ca_state#19] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/customer_address], PartitionFilters: [], PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(8) Project [web_site_sk#8] - +- *(8) Filter ((isnotnull(web_company_name#20) && (web_company_name#20 = pri)) && isnotnull(web_site_sk#8)) - +- *(8) FileScan parquet default.web_site[web_site_sk#8,web_company_name#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/web_site], PartitionFilters: [], PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)], ReadSchema: struct \ No newline at end of file +* Sort (56) ++- * HashAggregate (55) + +- Exchange (54) + +- * HashAggregate (53) + +- * HashAggregate (52) + +- Exchange (51) + +- * HashAggregate (50) + +- * Project (49) + +- * BroadcastHashJoin Inner BuildRight (48) + :- * Project (42) + : +- * BroadcastHashJoin Inner BuildRight (41) + : :- * Project (35) + : : +- * BroadcastHashJoin Inner BuildRight (34) + : : :- * BroadcastHashJoin LeftSemi BuildRight (28) + : : : :- * BroadcastHashJoin LeftSemi BuildRight (14) + : : : : :- * Filter (3) + : : : : : +- * ColumnarToRow (2) + : : : : : +- Scan parquet default.web_sales (1) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * BroadcastHashJoin Inner BuildRight (11) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.web_sales (4) + : : : : +- BroadcastExchange (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet default.web_sales (7) + : : : +- BroadcastExchange (27) + : : : +- * Project (26) + : : : +- * BroadcastHashJoin Inner BuildRight (25) + : : : :- * Filter (17) + : : : : +- * ColumnarToRow (16) + : : : : +- Scan parquet default.web_returns (15) + : : : +- BroadcastExchange (24) + : : : +- * Project (23) + : : : +- * BroadcastHashJoin Inner BuildRight (22) + : : : :- * Filter (20) + : : : : +- * ColumnarToRow (19) + : : : : +- Scan parquet default.web_sales (18) + : : : +- ReusedExchange (21) + : : +- BroadcastExchange (33) + : : +- * Project (32) + : : +- * Filter (31) + : : +- * ColumnarToRow (30) + : : +- Scan parquet default.date_dim (29) + : +- BroadcastExchange (40) + : +- * Project (39) + : +- * Filter (38) + : +- * ColumnarToRow (37) + : +- Scan parquet default.customer_address (36) + +- BroadcastExchange (47) + +- * Project (46) + +- * Filter (45) + +- * ColumnarToRow (44) + +- Scan parquet default.web_site (43) + + +(1) Scan parquet default.web_sales +Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_ship_date_sk), IsNotNull(ws_ship_addr_sk), IsNotNull(ws_web_site_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 9] +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] + +(3) Filter [codegen id : 9] +Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) + +(4) Scan parquet default.web_sales +Output [2]: [ws_warehouse_sk#7, ws_order_number#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 2] +Input [2]: [ws_warehouse_sk#7, ws_order_number#4] + +(6) Filter [codegen id : 2] +Input [2]: [ws_warehouse_sk#7, ws_order_number#4] +Condition : (isnotnull(ws_order_number#4) AND isnotnull(ws_warehouse_sk#7)) + +(7) Scan parquet default.web_sales +Output [2]: [ws_warehouse_sk#8, ws_order_number#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] +ReadSchema: struct + +(8) ColumnarToRow [codegen id : 1] +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] + +(9) Filter [codegen id : 1] +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Condition : (isnotnull(ws_order_number#9) AND isnotnull(ws_warehouse_sk#8)) + +(10) BroadcastExchange +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#10] + +(11) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [ws_order_number#9] +Join condition: NOT (ws_warehouse_sk#7 = ws_warehouse_sk#8) + +(12) Project [codegen id : 2] +Output [1]: [ws_order_number#4 AS ws_order_number#4#11] +Input [4]: [ws_warehouse_sk#7, ws_order_number#4, ws_warehouse_sk#8, ws_order_number#9] + +(13) BroadcastExchange +Input [1]: [ws_order_number#4#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] + +(14) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [ws_order_number#4#11] +Join condition: None + +(15) Scan parquet default.web_returns +Output [1]: [wr_order_number#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_returns] +PushedFilters: [IsNotNull(wr_order_number)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [1]: [wr_order_number#13] + +(17) Filter [codegen id : 5] +Input [1]: [wr_order_number#13] +Condition : isnotnull(wr_order_number#13) + +(18) Scan parquet default.web_sales +Output [2]: [ws_warehouse_sk#7, ws_order_number#4] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_sales] +PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 4] +Input [2]: [ws_warehouse_sk#7, ws_order_number#4] + +(20) Filter [codegen id : 4] +Input [2]: [ws_warehouse_sk#7, ws_order_number#4] +Condition : (isnotnull(ws_order_number#4) AND isnotnull(ws_warehouse_sk#7)) + +(21) ReusedExchange [Reuses operator id: 10] +Output [2]: [ws_warehouse_sk#14, ws_order_number#15] + +(22) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ws_order_number#4] +Right keys [1]: [ws_order_number#15] +Join condition: NOT (ws_warehouse_sk#7 = ws_warehouse_sk#14) + +(23) Project [codegen id : 4] +Output [1]: [ws_order_number#4] +Input [4]: [ws_warehouse_sk#7, ws_order_number#4, ws_warehouse_sk#14, ws_order_number#15] + +(24) BroadcastExchange +Input [1]: [ws_order_number#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] + +(25) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [wr_order_number#13] +Right keys [1]: [cast(ws_order_number#4 as bigint)] +Join condition: None + +(26) Project [codegen id : 5] +Output [1]: [wr_order_number#13] +Input [2]: [wr_order_number#13, ws_order_number#4] + +(27) BroadcastExchange +Input [1]: [wr_order_number#13] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#17] + +(28) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [cast(ws_order_number#4 as bigint)] +Right keys [1]: [wr_order_number#13] +Join condition: None + +(29) Scan parquet default.date_dim +Output [2]: [d_date_sk#18, d_date#19] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] +ReadSchema: struct + +(30) ColumnarToRow [codegen id : 6] +Input [2]: [d_date_sk#18, d_date#19] + +(31) Filter [codegen id : 6] +Input [2]: [d_date_sk#18, d_date#19] +Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 10623)) AND (d_date#19 <= 10683)) AND isnotnull(d_date_sk#18)) + +(32) Project [codegen id : 6] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_date#19] + +(33) BroadcastExchange +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] + +(34) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_ship_date_sk#1] +Right keys [1]: [d_date_sk#18] +Join condition: None + +(35) Project [codegen id : 9] +Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#18] + +(36) Scan parquet default.customer_address +Output [2]: [ca_address_sk#21, ca_state#22] +Batched: true +Location [not included in comparison]/{warehouse_dir}/customer_address] +PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] +ReadSchema: struct + +(37) ColumnarToRow [codegen id : 7] +Input [2]: [ca_address_sk#21, ca_state#22] + +(38) Filter [codegen id : 7] +Input [2]: [ca_address_sk#21, ca_state#22] +Condition : ((isnotnull(ca_state#22) AND (ca_state#22 = IL)) AND isnotnull(ca_address_sk#21)) + +(39) Project [codegen id : 7] +Output [1]: [ca_address_sk#21] +Input [2]: [ca_address_sk#21, ca_state#22] + +(40) BroadcastExchange +Input [1]: [ca_address_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] + +(41) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_ship_addr_sk#2] +Right keys [1]: [ca_address_sk#21] +Join condition: None + +(42) Project [codegen id : 9] +Output [4]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#21] + +(43) Scan parquet default.web_site +Output [2]: [web_site_sk#24, web_company_name#25] +Batched: true +Location [not included in comparison]/{warehouse_dir}/web_site] +PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)] +ReadSchema: struct + +(44) ColumnarToRow [codegen id : 8] +Input [2]: [web_site_sk#24, web_company_name#25] + +(45) Filter [codegen id : 8] +Input [2]: [web_site_sk#24, web_company_name#25] +Condition : ((isnotnull(web_company_name#25) AND (web_company_name#25 = pri)) AND isnotnull(web_site_sk#24)) + +(46) Project [codegen id : 8] +Output [1]: [web_site_sk#24] +Input [2]: [web_site_sk#24, web_company_name#25] + +(47) BroadcastExchange +Input [1]: [web_site_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] + +(48) BroadcastHashJoin [codegen id : 9] +Left keys [1]: [ws_web_site_sk#3] +Right keys [1]: [web_site_sk#24] +Join condition: None + +(49) Project [codegen id : 9] +Output [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Input [5]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#24] + +(50) HashAggregate [codegen id : 9] +Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] +Keys [1]: [ws_order_number#4] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] +Results [3]: [ws_order_number#4, sum#29, sum#30] + +(51) Exchange +Input [3]: [ws_order_number#4, sum#29, sum#30] +Arguments: hashpartitioning(ws_order_number#4, 5), true, [id=#31] + +(52) HashAggregate [codegen id : 10] +Input [3]: [ws_order_number#4, sum#29, sum#30] +Keys [1]: [ws_order_number#4] +Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] +Results [3]: [ws_order_number#4, sum#29, sum#30] + +(53) HashAggregate [codegen id : 10] +Input [3]: [ws_order_number#4, sum#29, sum#30] +Keys: [] +Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32] +Results [3]: [sum#29, sum#30, count#33] + +(54) Exchange +Input [3]: [sum#29, sum#30, count#33] +Arguments: SinglePartition, true, [id=#34] + +(55) HashAggregate [codegen id : 11] +Input [3]: [sum#29, sum#30, count#33] +Keys: [] +Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32] +Results [3]: [count(ws_order_number#4)#32 AS order count #35, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #36, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #37] + +(56) Sort [codegen id : 11] +Input [3]: [order count #35, total shipping cost #36, total net profit #37] +Arguments: [order count #35 ASC NULLS FIRST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt index 343982d5a..191ff22c1 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q95/simplified.txt @@ -1,73 +1,84 @@ -TakeOrderedAndProject [order count ,total net profit ,total shipping cost ] - WholeStageCodegen - HashAggregate [count,count(ws_order_number),sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] [count,count(ws_order_number),order count ,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),total net profit ,total shipping cost ] +WholeStageCodegen (11) + Sort [order count ] + HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [count,count,count(ws_order_number),sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] - HashAggregate [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] + WholeStageCodegen (10) + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count] + HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] InputAdapter Exchange [ws_order_number] #2 - WholeStageCodegen - HashAggregate [sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),ws_ext_ship_cost,ws_net_profit,ws_order_number] [sum,sum,sum,sum,sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit))] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number] - BroadcastHashJoin [web_site_sk,ws_web_site_sk] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_web_site_sk] - BroadcastHashJoin [ca_address_sk,ws_ship_addr_sk] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_web_site_sk] - BroadcastHashJoin [d_date_sk,ws_ship_date_sk] - BroadcastHashJoin [wr_order_number,ws_order_number] + WholeStageCodegen (9) + HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum] + Project [ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_web_site_sk,web_site_sk] + Project [ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk] + Project [ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] + BroadcastHashJoin [ws_ship_date_sk,d_date_sk] + BroadcastHashJoin [ws_order_number,wr_order_number] BroadcastHashJoin [ws_order_number,ws_order_number] - Project [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] - Filter [ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] - Scan parquet default.web_sales [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] [ws_ext_ship_cost,ws_net_profit,ws_order_number,ws_ship_addr_sk,ws_ship_date_sk,ws_web_site_sk] + Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [ws_order_number] BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Project [ws_order_number,ws_warehouse_sk] - Filter [ws_order_number,ws_warehouse_sk] - Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] + Filter [ws_order_number,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [ws_order_number,ws_warehouse_sk] - Filter [ws_order_number,ws_warehouse_sk] - Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] + WholeStageCodegen (1) + Filter [ws_order_number,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (5) Project [wr_order_number] BroadcastHashJoin [wr_order_number,ws_order_number] - Project [wr_order_number] - Filter [wr_order_number] - Scan parquet default.web_returns [wr_order_number] [wr_order_number] + Filter [wr_order_number] + ColumnarToRow + InputAdapter + Scan parquet default.web_returns [wr_order_number] InputAdapter BroadcastExchange #6 - WholeStageCodegen + WholeStageCodegen (4) Project [ws_order_number] BroadcastHashJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk] - Project [ws_order_number,ws_warehouse_sk] - Filter [ws_order_number,ws_warehouse_sk] - Scan parquet default.web_sales [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] + Filter [ws_order_number,ws_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number] InputAdapter - ReusedExchange [ws_order_number,ws_warehouse_sk] [ws_order_number,ws_warehouse_sk] #4 + ReusedExchange [ws_warehouse_sk,ws_order_number] #4 InputAdapter BroadcastExchange #7 - WholeStageCodegen + WholeStageCodegen (6) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] InputAdapter BroadcastExchange #8 - WholeStageCodegen + WholeStageCodegen (7) Project [ca_address_sk] - Filter [ca_address_sk,ca_state] - Scan parquet default.customer_address [ca_address_sk,ca_state] [ca_address_sk,ca_state] + Filter [ca_state,ca_address_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer_address [ca_address_sk,ca_state] InputAdapter BroadcastExchange #9 - WholeStageCodegen + WholeStageCodegen (8) Project [web_site_sk] Filter [web_company_name,web_site_sk] - Scan parquet default.web_site [web_company_name,web_site_sk] [web_company_name,web_site_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_site [web_site_sk,web_company_name] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt index 4700925d1..6729910d9 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/explain.txt @@ -1,26 +1,160 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[count(1)#1 ASC NULLS FIRST], output=[count(1)#1]) -+- *(5) HashAggregate(keys=[], functions=[count(1)]) - +- Exchange SinglePartition - +- *(4) HashAggregate(keys=[], functions=[partial_count(1)]) - +- *(4) Project - +- *(4) BroadcastHashJoin [ss_store_sk#2], [s_store_sk#3], Inner, BuildRight - :- *(4) Project [ss_store_sk#2] - : +- *(4) BroadcastHashJoin [ss_sold_time_sk#4], [t_time_sk#5], Inner, BuildRight - : :- *(4) Project [ss_sold_time_sk#4, ss_store_sk#2] - : : +- *(4) BroadcastHashJoin [ss_hdemo_sk#6], [hd_demo_sk#7], Inner, BuildRight - : : :- *(4) Project [ss_sold_time_sk#4, ss_hdemo_sk#6, ss_store_sk#2] - : : : +- *(4) Filter ((isnotnull(ss_hdemo_sk#6) && isnotnull(ss_sold_time_sk#4)) && isnotnull(ss_store_sk#2)) - : : : +- *(4) FileScan parquet default.store_sales[ss_sold_time_sk#4,ss_hdemo_sk#6,ss_store_sk#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)], ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(1) Project [hd_demo_sk#7] - : : +- *(1) Filter ((isnotnull(hd_dep_count#8) && (hd_dep_count#8 = 7)) && isnotnull(hd_demo_sk#7)) - : : +- *(1) FileScan parquet default.household_demographics[hd_demo_sk#7,hd_dep_count#8] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/household_demographics], PartitionFilters: [], PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(2) Project [t_time_sk#5] - : +- *(2) Filter ((((isnotnull(t_hour#9) && isnotnull(t_minute#10)) && (t_hour#9 = 20)) && (t_minute#10 >= 30)) && isnotnull(t_time_sk#5)) - : +- *(2) FileScan parquet default.time_dim[t_time_sk#5,t_hour#9,t_minute#10] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/time_dim], PartitionFilters: [], PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsN..., ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(3) Project [s_store_sk#3] - +- *(3) Filter ((isnotnull(s_store_name#11) && (s_store_name#11 = ese)) && isnotnull(s_store_sk#3)) - +- *(3) FileScan parquet default.store[s_store_sk#3,s_store_name#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store], PartitionFilters: [], PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)], ReadSchema: struct \ No newline at end of file +* Sort (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * BroadcastHashJoin Inner BuildRight (23) + :- * Project (17) + : +- * BroadcastHashJoin Inner BuildRight (16) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.store_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.household_demographics (4) + : +- BroadcastExchange (15) + : +- * Project (14) + : +- * Filter (13) + : +- * ColumnarToRow (12) + : +- Scan parquet default.time_dim (11) + +- BroadcastExchange (22) + +- * Project (21) + +- * Filter (20) + +- * ColumnarToRow (19) + +- Scan parquet default.store (18) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] + +(3) Filter [codegen id : 4] +Input [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3] +Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isnotnull(ss_store_sk#3)) + +(4) Scan parquet default.household_demographics +Output [2]: [hd_demo_sk#4, hd_dep_count#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/household_demographics] +PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] + +(6) Filter [codegen id : 1] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] +Condition : ((isnotnull(hd_dep_count#5) AND (hd_dep_count#5 = 7)) AND isnotnull(hd_demo_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [hd_demo_sk#4] +Input [2]: [hd_demo_sk#4, hd_dep_count#5] + +(8) BroadcastExchange +Input [1]: [hd_demo_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_hdemo_sk#2] +Right keys [1]: [hd_demo_sk#4] +Join condition: None + +(10) Project [codegen id : 4] +Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] +Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#4] + +(11) Scan parquet default.time_dim +Output [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/time_dim] +PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] +ReadSchema: struct + +(12) ColumnarToRow [codegen id : 2] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] + +(13) Filter [codegen id : 2] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Condition : ((((isnotnull(t_hour#8) AND isnotnull(t_minute#9)) AND (t_hour#8 = 20)) AND (t_minute#9 >= 30)) AND isnotnull(t_time_sk#7)) + +(14) Project [codegen id : 2] +Output [1]: [t_time_sk#7] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] + +(15) BroadcastExchange +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] + +(16) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_sold_time_sk#1] +Right keys [1]: [t_time_sk#7] +Join condition: None + +(17) Project [codegen id : 4] +Output [1]: [ss_store_sk#3] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#7] + +(18) Scan parquet default.store +Output [2]: [s_store_sk#11, s_store_name#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store] +PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] +ReadSchema: struct + +(19) ColumnarToRow [codegen id : 3] +Input [2]: [s_store_sk#11, s_store_name#12] + +(20) Filter [codegen id : 3] +Input [2]: [s_store_sk#11, s_store_name#12] +Condition : ((isnotnull(s_store_name#12) AND (s_store_name#12 = ese)) AND isnotnull(s_store_sk#11)) + +(21) Project [codegen id : 3] +Output [1]: [s_store_sk#11] +Input [2]: [s_store_sk#11, s_store_name#12] + +(22) BroadcastExchange +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(23) BroadcastHashJoin [codegen id : 4] +Left keys [1]: [ss_store_sk#3] +Right keys [1]: [s_store_sk#11] +Join condition: None + +(24) Project [codegen id : 4] +Output: [] +Input [2]: [ss_store_sk#3, s_store_sk#11] + +(25) HashAggregate [codegen id : 4] +Input: [] +Keys: [] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#14] +Results [1]: [count#15] + +(26) Exchange +Input [1]: [count#15] +Arguments: SinglePartition, true, [id=#16] + +(27) HashAggregate [codegen id : 5] +Input [1]: [count#15] +Keys: [] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#17] +Results [1]: [count(1)#17 AS count(1)#18] + +(28) Sort [codegen id : 5] +Input [1]: [count(1)#18] +Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt index 7f1a66829..45400b6c5 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q96/simplified.txt @@ -1,34 +1,41 @@ -TakeOrderedAndProject [count(1)] - WholeStageCodegen - HashAggregate [count,count(1)] [count,count(1),count(1)] +WholeStageCodegen (5) + Sort [count(1)] + HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 - WholeStageCodegen - HashAggregate [count,count] [count,count] + WholeStageCodegen (4) + HashAggregate [count,count] Project - BroadcastHashJoin [s_store_sk,ss_store_sk] + BroadcastHashJoin [ss_store_sk,s_store_sk] Project [ss_store_sk] BroadcastHashJoin [ss_sold_time_sk,t_time_sk] Project [ss_sold_time_sk,ss_store_sk] - BroadcastHashJoin [hd_demo_sk,ss_hdemo_sk] - Project [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] - Scan parquet default.store_sales [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk] + Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen + WholeStageCodegen (1) Project [hd_demo_sk] - Filter [hd_demo_sk,hd_dep_count] - Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] [hd_demo_sk,hd_dep_count] + Filter [hd_dep_count,hd_demo_sk] + ColumnarToRow + InputAdapter + Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count] InputAdapter BroadcastExchange #3 - WholeStageCodegen + WholeStageCodegen (2) Project [t_time_sk] Filter [t_hour,t_minute,t_time_sk] - Scan parquet default.time_dim [t_hour,t_minute,t_time_sk] [t_hour,t_minute,t_time_sk] + ColumnarToRow + InputAdapter + Scan parquet default.time_dim [t_time_sk,t_hour,t_minute] InputAdapter BroadcastExchange #4 - WholeStageCodegen + WholeStageCodegen (3) Project [s_store_sk] Filter [s_store_name,s_store_sk] - Scan parquet default.store [s_store_name,s_store_sk] [s_store_name,s_store_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store [s_store_sk,s_store_name] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt index fa7b0e6ae..e904ad94d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/explain.txt @@ -1,32 +1,174 @@ == Physical Plan == -CollectLimit 100 -+- *(10) HashAggregate(keys=[], functions=[sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint))]) - +- Exchange SinglePartition - +- *(9) HashAggregate(keys=[], functions=[partial_sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#1) && isnotnull(customer_sk#2)) THEN 1 ELSE 0 END as bigint))]) - +- *(9) Project [customer_sk#1, customer_sk#2] - +- SortMergeJoin [customer_sk#1, item_sk#3], [customer_sk#2, item_sk#4], FullOuter - :- *(4) Sort [customer_sk#1 ASC NULLS FIRST, item_sk#3 ASC NULLS FIRST], false, 0 - : +- Exchange hashpartitioning(customer_sk#1, item_sk#3, 5) - : +- *(3) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) - : +- Exchange hashpartitioning(ss_customer_sk#5, ss_item_sk#6, 5) - : +- *(2) HashAggregate(keys=[ss_customer_sk#5, ss_item_sk#6], functions=[]) - : +- *(2) Project [ss_item_sk#6, ss_customer_sk#5] - : +- *(2) BroadcastHashJoin [ss_sold_date_sk#7], [d_date_sk#8], Inner, BuildRight - : :- *(2) Project [ss_sold_date_sk#7, ss_item_sk#6, ss_customer_sk#5] - : : +- *(2) Filter isnotnull(ss_sold_date_sk#7) - : : +- *(2) FileScan parquet default.store_sales[ss_sold_date_sk#7,ss_item_sk#6,ss_customer_sk#5] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [d_date_sk#8] - : +- *(1) Filter (((isnotnull(d_month_seq#9) && (d_month_seq#9 >= 1200)) && (d_month_seq#9 <= 1211)) && isnotnull(d_date_sk#8)) - : +- *(1) FileScan parquet default.date_dim[d_date_sk#8,d_month_seq#9] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct - +- *(8) Sort [customer_sk#2 ASC NULLS FIRST, item_sk#4 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(customer_sk#2, item_sk#4, 5) - +- *(7) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) - +- Exchange hashpartitioning(cs_bill_customer_sk#10, cs_item_sk#11, 5) - +- *(6) HashAggregate(keys=[cs_bill_customer_sk#10, cs_item_sk#11], functions=[]) - +- *(6) Project [cs_bill_customer_sk#10, cs_item_sk#11] - +- *(6) BroadcastHashJoin [cs_sold_date_sk#12], [d_date_sk#8], Inner, BuildRight - :- *(6) Project [cs_sold_date_sk#12, cs_bill_customer_sk#10, cs_item_sk#11] - : +- *(6) Filter isnotnull(cs_sold_date_sk#12) - : +- *(6) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_bill_customer_sk#10,cs_item_sk#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_sold_date_sk)], ReadSchema: struct - +- ReusedExchange [d_date_sk#8], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) \ No newline at end of file +* HashAggregate (29) ++- Exchange (28) + +- * HashAggregate (27) + +- * Project (26) + +- SortMergeJoin FullOuter (25) + :- * Sort (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Project (20) + +- * BroadcastHashJoin Inner BuildRight (19) + :- * Filter (17) + : +- * ColumnarToRow (16) + : +- Scan parquet default.catalog_sales (15) + +- ReusedExchange (18) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] + +(3) Filter [codegen id : 2] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3] +Condition : isnotnull(ss_sold_date_sk#1) + +(4) Scan parquet default.date_dim +Output [2]: [d_date_sk#4, d_month_seq#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(6) Filter [codegen id : 1] +Input [2]: [d_date_sk#4, d_month_seq#5] +Condition : (((isnotnull(d_month_seq#5) AND (d_month_seq#5 >= 1200)) AND (d_month_seq#5 <= 1211)) AND isnotnull(d_date_sk#4)) + +(7) Project [codegen id : 1] +Output [1]: [d_date_sk#4] +Input [2]: [d_date_sk#4, d_month_seq#5] + +(8) BroadcastExchange +Input [1]: [d_date_sk#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] + +(9) BroadcastHashJoin [codegen id : 2] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(10) Project [codegen id : 2] +Output [2]: [ss_item_sk#2, ss_customer_sk#3] +Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, d_date_sk#4] + +(11) HashAggregate [codegen id : 2] +Input [2]: [ss_item_sk#2, ss_customer_sk#3] +Keys [2]: [ss_customer_sk#3, ss_item_sk#2] +Functions: [] +Aggregate Attributes: [] +Results [2]: [ss_customer_sk#3, ss_item_sk#2] + +(12) Exchange +Input [2]: [ss_customer_sk#3, ss_item_sk#2] +Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), true, [id=#7] + +(13) HashAggregate [codegen id : 3] +Input [2]: [ss_customer_sk#3, ss_item_sk#2] +Keys [2]: [ss_customer_sk#3, ss_item_sk#2] +Functions: [] +Aggregate Attributes: [] +Results [2]: [ss_customer_sk#3 AS customer_sk#8, ss_item_sk#2 AS item_sk#9] + +(14) Sort [codegen id : 3] +Input [2]: [customer_sk#8, item_sk#9] +Arguments: [customer_sk#8 ASC NULLS FIRST, item_sk#9 ASC NULLS FIRST], false, 0 + +(15) Scan parquet default.catalog_sales +Output [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_sold_date_sk)] +ReadSchema: struct + +(16) ColumnarToRow [codegen id : 5] +Input [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] + +(17) Filter [codegen id : 5] +Input [3]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12] +Condition : isnotnull(cs_sold_date_sk#10) + +(18) ReusedExchange [Reuses operator id: 8] +Output [1]: [d_date_sk#4] + +(19) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#4] +Join condition: None + +(20) Project [codegen id : 5] +Output [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Input [4]: [cs_sold_date_sk#10, cs_bill_customer_sk#11, cs_item_sk#12, d_date_sk#4] + +(21) HashAggregate [codegen id : 5] +Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Keys [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Functions: [] +Aggregate Attributes: [] +Results [2]: [cs_bill_customer_sk#11, cs_item_sk#12] + +(22) Exchange +Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), true, [id=#13] + +(23) HashAggregate [codegen id : 6] +Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Keys [2]: [cs_bill_customer_sk#11, cs_item_sk#12] +Functions: [] +Aggregate Attributes: [] +Results [2]: [cs_bill_customer_sk#11 AS customer_sk#14, cs_item_sk#12 AS item_sk#15] + +(24) Sort [codegen id : 6] +Input [2]: [customer_sk#14, item_sk#15] +Arguments: [customer_sk#14 ASC NULLS FIRST, item_sk#15 ASC NULLS FIRST], false, 0 + +(25) SortMergeJoin +Left keys [2]: [customer_sk#8, item_sk#9] +Right keys [2]: [customer_sk#14, item_sk#15] +Join condition: None + +(26) Project [codegen id : 7] +Output [2]: [customer_sk#8, customer_sk#14] +Input [4]: [customer_sk#8, item_sk#9, customer_sk#14, item_sk#15] + +(27) HashAggregate [codegen id : 7] +Input [2]: [customer_sk#8, customer_sk#14] +Keys: [] +Functions [3]: [partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [3]: [sum#16, sum#17, sum#18] +Results [3]: [sum#19, sum#20, sum#21] + +(28) Exchange +Input [3]: [sum#19, sum#20, sum#21] +Arguments: SinglePartition, true, [id=#22] + +(29) HashAggregate [codegen id : 8] +Input [3]: [sum#19, sum#20, sum#21] +Keys: [] +Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] +Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt index 40d75add7..c5921a11c 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q97/simplified.txt @@ -1,48 +1,45 @@ -CollectLimit - WholeStageCodegen - HashAggregate [sum,sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] [catalog_only,store_and_catalog,store_only,sum,sum,sum,sum(cast(CASE WHEN (isnotnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) && isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) && isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint))] - InputAdapter - Exchange #1 - WholeStageCodegen - HashAggregate [customer_sk,customer_sk,sum,sum,sum,sum,sum,sum] [sum,sum,sum,sum,sum,sum] - Project [customer_sk,customer_sk] - InputAdapter - SortMergeJoin [customer_sk,customer_sk,item_sk,item_sk] - WholeStageCodegen - Sort [customer_sk,item_sk] +WholeStageCodegen (8) + HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + WholeStageCodegen (3) + Sort [customer_sk,item_sk] + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] InputAdapter - Exchange [customer_sk,item_sk] #2 - WholeStageCodegen - HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [ss_customer_sk,ss_item_sk] #3 - WholeStageCodegen - HashAggregate [ss_customer_sk,ss_item_sk] - Project [ss_customer_sk,ss_item_sk] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [ss_customer_sk,ss_item_sk,ss_sold_date_sk] - Filter [ss_sold_date_sk] - Scan parquet default.store_sales [ss_customer_sk,ss_item_sk,ss_sold_date_sk] [ss_customer_sk,ss_item_sk,ss_sold_date_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen - Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] - WholeStageCodegen - Sort [customer_sk,item_sk] + Exchange [ss_customer_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_item_sk,ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + WholeStageCodegen (6) + Sort [customer_sk,item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] InputAdapter - Exchange [customer_sk,item_sk] #5 - WholeStageCodegen - HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk] #6 - WholeStageCodegen - HashAggregate [cs_bill_customer_sk,cs_item_sk] - Project [cs_bill_customer_sk,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - Filter [cs_sold_date_sk] - Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk] - InputAdapter - ReusedExchange [d_date_sk] [d_date_sk] #4 + Exchange [cs_bill_customer_sk,cs_item_sk] #4 + WholeStageCodegen (5) + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt index d93c3fdc2..11519207d 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/explain.txt @@ -1,26 +1,147 @@ == Physical Plan == -*(7) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, revenueratio#6] -+- *(7) Sort [i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST], true, 0 - +- Exchange rangepartitioning(i_category#2 ASC NULLS FIRST, i_class#3 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#1 ASC NULLS FIRST, revenueratio#6 ASC NULLS FIRST, 5) - +- *(6) Project [i_item_desc#1, i_category#2, i_class#3, i_current_price#4, itemrevenue#5, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#8) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#9)), DecimalType(38,17)) AS revenueratio#6, i_item_id#7] - +- Window [sum(_w1#10) windowspecdefinition(i_class#3, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#9], [i_class#3] - +- *(5) Sort [i_class#3 ASC NULLS FIRST], false, 0 - +- Exchange hashpartitioning(i_class#3, 5) - +- *(4) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[sum(UnscaledValue(ss_ext_sales_price#11))]) - +- Exchange hashpartitioning(i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4, 5) - +- *(3) HashAggregate(keys=[i_item_id#7, i_item_desc#1, i_category#2, i_class#3, i_current_price#4], functions=[partial_sum(UnscaledValue(ss_ext_sales_price#11))]) - +- *(3) Project [ss_ext_sales_price#11, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] - +- *(3) BroadcastHashJoin [ss_sold_date_sk#12], [d_date_sk#13], Inner, BuildRight - :- *(3) Project [ss_sold_date_sk#12, ss_ext_sales_price#11, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] - : +- *(3) BroadcastHashJoin [ss_item_sk#14], [i_item_sk#15], Inner, BuildRight - : :- *(3) Project [ss_sold_date_sk#12, ss_item_sk#14, ss_ext_sales_price#11] - : : +- *(3) Filter (isnotnull(ss_item_sk#14) && isnotnull(ss_sold_date_sk#12)) - : : +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#12,ss_item_sk#14,ss_ext_sales_price#11] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(1) Project [i_item_sk#15, i_item_id#7, i_item_desc#1, i_current_price#4, i_class#3, i_category#2] - : +- *(1) Filter (i_category#2 IN (Sports,Books,Home) && isnotnull(i_item_sk#15)) - : +- *(1) FileScan parquet default.item[i_item_sk#15,i_item_id#7,i_item_desc#1,i_current_price#4,i_class#3,i_category#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/item], PartitionFilters: [], PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)], ReadSchema: struct= 10644)) && (d_date#16 <= 10674)) && isnotnull(d_date_sk#13)) - +- *(2) FileScan parquet default.date_dim[d_date_sk#13,d_date#16] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), Is..., ReadSchema: struct \ No newline at end of file +* Project (26) ++- * Sort (25) + +- Exchange (24) + +- * Project (23) + +- Window (22) + +- * Sort (21) + +- Exchange (20) + +- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * BroadcastHashJoin Inner BuildRight (15) + :- * Project (9) + : +- * BroadcastHashJoin Inner BuildRight (8) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.item (4) + +- BroadcastExchange (14) + +- * Project (13) + +- * Filter (12) + +- * ColumnarToRow (11) + +- Scan parquet default.date_dim (10) + + +(1) Scan parquet default.store_sales +Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Batched: true +Location [not included in comparison]/{warehouse_dir}/store_sales] +PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] + +(3) Filter [codegen id : 3] +Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3] +Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1)) + +(4) Scan parquet default.item +Output [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Batched: true +Location [not included in comparison]/{warehouse_dir}/item] +PushedFilters: [In(i_category, [Sports,Books,Home]), IsNotNull(i_item_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(6) Filter [codegen id : 1] +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Condition : (i_category#9 IN (Sports,Books,Home) AND isnotnull(i_item_sk#4)) + +(7) BroadcastExchange +Input [6]: [i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] + +(8) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_item_sk#2] +Right keys [1]: [i_item_sk#4] +Join condition: None + +(9) Project [codegen id : 3] +Output [7]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] + +(10) Scan parquet default.date_dim +Output [2]: [d_date_sk#11, d_date#12] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] + +(12) Filter [codegen id : 2] +Input [2]: [d_date_sk#11, d_date#12] +Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 10644)) AND (d_date#12 <= 10674)) AND isnotnull(d_date_sk#11)) + +(13) Project [codegen id : 2] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#12] + +(14) BroadcastExchange +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] + +(15) BroadcastHashJoin [codegen id : 3] +Left keys [1]: [ss_sold_date_sk#1] +Right keys [1]: [d_date_sk#11] +Join condition: None + +(16) Project [codegen id : 3] +Output [6]: [ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Input [8]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9, d_date_sk#11] + +(17) HashAggregate [codegen id : 3] +Input [6]: [ss_ext_sales_price#3, i_item_id#5, i_item_desc#6, i_current_price#7, i_class#8, i_category#9] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] + +(18) Exchange +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Arguments: hashpartitioning(i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, 5), true, [id=#16] + +(19) HashAggregate [codegen id : 4] +Input [6]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7, sum#15] +Keys [5]: [i_item_id#5, i_item_desc#6, i_category#9, i_class#8, i_current_price#7] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#17] +Results [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#17,17,2) AS _w1#20, i_item_id#5] + +(20) Exchange +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: hashpartitioning(i_class#8, 5), true, [id=#21] + +(21) Sort [codegen id : 5] +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [i_class#8 ASC NULLS FIRST], false, 0 + +(22) Window +Input [8]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5] +Arguments: [sum(_w1#20) windowspecdefinition(i_class#8, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#8] + +(23) Project [codegen id : 6] +Output [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2), true) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17), true) AS revenueratio#23, i_item_id#5] +Input [9]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, _w0#19, _w1#20, i_item_id#5, _we0#22] + +(24) Exchange +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] +Arguments: rangepartitioning(i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST, 5), true, [id=#24] + +(25) Sort [codegen id : 7] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] +Arguments: [i_category#9 ASC NULLS FIRST, i_class#8 ASC NULLS FIRST, i_item_id#5 ASC NULLS FIRST, i_item_desc#6 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], true, 0 + +(26) Project [codegen id : 7] +Output [6]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23] +Input [7]: [i_item_desc#6, i_category#9, i_class#8, i_current_price#7, itemrevenue#18, revenueratio#23, i_item_id#5] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt index 914d8934b..2af712236 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q98/simplified.txt @@ -1,38 +1,42 @@ -WholeStageCodegen - Project [i_category,i_class,i_current_price,i_item_desc,itemrevenue,revenueratio] - Sort [i_category,i_class,i_item_desc,i_item_id,revenueratio] +WholeStageCodegen (7) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,revenueratio] + Sort [i_category,i_class,i_item_id,i_item_desc,revenueratio] InputAdapter - Exchange [i_category,i_class,i_item_desc,i_item_id,revenueratio] #1 - WholeStageCodegen - Project [_w0,_we0,i_category,i_class,i_current_price,i_item_desc,i_item_id,itemrevenue] + Exchange [i_category,i_class,i_item_id,i_item_desc,revenueratio] #1 + WholeStageCodegen (6) + Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id] InputAdapter Window [_w1,i_class] - WholeStageCodegen + WholeStageCodegen (5) Sort [i_class] InputAdapter Exchange [i_class] #2 - WholeStageCodegen - HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,sum,sum(UnscaledValue(ss_ext_sales_price))] [_w0,_w1,itemrevenue,sum,sum(UnscaledValue(ss_ext_sales_price))] + WholeStageCodegen (4) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,_w1,sum] InputAdapter - Exchange [i_category,i_class,i_current_price,i_item_desc,i_item_id] #3 - WholeStageCodegen - HashAggregate [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price,sum,sum] [sum,sum] - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price] - BroadcastHashJoin [d_date_sk,ss_sold_date_sk] - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,ss_ext_sales_price,ss_sold_date_sk] - BroadcastHashJoin [i_item_sk,ss_item_sk] - Project [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] - Filter [ss_item_sk,ss_sold_date_sk] - Scan parquet default.store_sales [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] [ss_ext_sales_price,ss_item_sk,ss_sold_date_sk] + Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3 + WholeStageCodegen (3) + HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,ss_ext_sales_price] [sum,sum] + Project [ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_ext_sales_price,i_item_id,i_item_desc,i_current_price,i_class,i_category] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Filter [ss_item_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] - Filter [i_category,i_item_sk] - Scan parquet default.item [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] [i_category,i_class,i_current_price,i_item_desc,i_item_id,i_item_sk] + WholeStageCodegen (1) + Filter [i_category,i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (2) Project [d_date_sk] Filter [d_date,d_date_sk] - Scan parquet default.date_dim [d_date,d_date_sk] [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt index 09cba55e4..595cb2984 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/explain.txt @@ -1,32 +1,183 @@ == Physical Plan == -TakeOrderedAndProject(limit=100, orderBy=[substring(w_warehouse_name, 1, 20)#1 ASC NULLS FIRST,sm_type#2 ASC NULLS FIRST,cc_name#3 ASC NULLS FIRST], output=[substring(w_warehouse_name, 1, 20)#1,sm_type#2,cc_name#3,30 days #4,31 - 60 days #5,61 - 90 days #6,91 - 120 days #7,>120 days #8]) -+- *(6) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) - +- Exchange hashpartitioning(substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3, 5) - +- *(5) HashAggregate(keys=[substring(w_warehouse_name#9, 1, 20) AS substring(w_warehouse_name#9, 1, 20)#10, sm_type#2, cc_name#3], functions=[partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 30) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 60) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 90) && ((cs_ship_date_sk#11 - cs_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#11 - cs_sold_date_sk#12) > 120) THEN 1 ELSE 0 END as bigint))]) - +- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, w_warehouse_name#9, sm_type#2, cc_name#3] - +- *(5) BroadcastHashJoin [cs_ship_date_sk#11], [d_date_sk#13], Inner, BuildRight - :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, w_warehouse_name#9, sm_type#2, cc_name#3] - : +- *(5) BroadcastHashJoin [cs_call_center_sk#14], [cc_call_center_sk#15], Inner, BuildRight - : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, w_warehouse_name#9, sm_type#2] - : : +- *(5) BroadcastHashJoin [cs_ship_mode_sk#16], [sm_ship_mode_sk#17], Inner, BuildRight - : : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, cs_ship_mode_sk#16, w_warehouse_name#9] - : : : +- *(5) BroadcastHashJoin [cs_warehouse_sk#18], [w_warehouse_sk#19], Inner, BuildRight - : : : :- *(5) Project [cs_sold_date_sk#12, cs_ship_date_sk#11, cs_call_center_sk#14, cs_ship_mode_sk#16, cs_warehouse_sk#18] - : : : : +- *(5) Filter (((isnotnull(cs_warehouse_sk#18) && isnotnull(cs_ship_mode_sk#16)) && isnotnull(cs_call_center_sk#14)) && isnotnull(cs_ship_date_sk#11)) - : : : : +- *(5) FileScan parquet default.catalog_sales[cs_sold_date_sk#12,cs_ship_date_sk#11,cs_call_center_sk#14,cs_ship_mode_sk#16,cs_warehouse_sk#18] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/catalog_sales], PartitionFilters: [], PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(..., ReadSchema: struct - : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : : +- *(2) Project [sm_ship_mode_sk#17, sm_type#2] - : : +- *(2) Filter isnotnull(sm_ship_mode_sk#17) - : : +- *(2) FileScan parquet default.ship_mode[sm_ship_mode_sk#17,sm_type#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/ship_mode], PartitionFilters: [], PushedFilters: [IsNotNull(sm_ship_mode_sk)], ReadSchema: struct - : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - : +- *(3) Project [cc_call_center_sk#15, cc_name#3] - : +- *(3) Filter isnotnull(cc_call_center_sk#15) - : +- *(3) FileScan parquet default.call_center[cc_call_center_sk#15,cc_name#3] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/call_center], PartitionFilters: [], PushedFilters: [IsNotNull(cc_call_center_sk)], ReadSchema: struct - +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))) - +- *(4) Project [d_date_sk#13] - +- *(4) Filter (((isnotnull(d_month_seq#20) && (d_month_seq#20 >= 1200)) && (d_month_seq#20 <= 1211)) && isnotnull(d_date_sk#13)) - +- *(4) FileScan parquet default.date_dim[d_date_sk#13,d_month_seq#20] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211),..., ReadSchema: struct \ No newline at end of file +TakeOrderedAndProject (32) ++- * HashAggregate (31) + +- Exchange (30) + +- * HashAggregate (29) + +- * Project (28) + +- * BroadcastHashJoin Inner BuildRight (27) + :- * Project (21) + : +- * BroadcastHashJoin Inner BuildRight (20) + : :- * Project (15) + : : +- * BroadcastHashJoin Inner BuildRight (14) + : : :- * Project (9) + : : : +- * BroadcastHashJoin Inner BuildRight (8) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.warehouse (4) + : : +- BroadcastExchange (13) + : : +- * Filter (12) + : : +- * ColumnarToRow (11) + : : +- Scan parquet default.ship_mode (10) + : +- BroadcastExchange (19) + : +- * Filter (18) + : +- * ColumnarToRow (17) + : +- Scan parquet default.call_center (16) + +- BroadcastExchange (26) + +- * Project (25) + +- * Filter (24) + +- * ColumnarToRow (23) + +- Scan parquet default.date_dim (22) + + +(1) Scan parquet default.catalog_sales +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] +Batched: true +Location [not included in comparison]/{warehouse_dir}/catalog_sales] +PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(cs_ship_date_sk)] +ReadSchema: struct + +(2) ColumnarToRow [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] + +(3) Filter [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5] +Condition : (((isnotnull(cs_warehouse_sk#5) AND isnotnull(cs_ship_mode_sk#4)) AND isnotnull(cs_call_center_sk#3)) AND isnotnull(cs_ship_date_sk#2)) + +(4) Scan parquet default.warehouse +Output [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Batched: true +Location [not included in comparison]/{warehouse_dir}/warehouse] +PushedFilters: [IsNotNull(w_warehouse_sk)] +ReadSchema: struct + +(5) ColumnarToRow [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] + +(6) Filter [codegen id : 1] +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Condition : isnotnull(w_warehouse_sk#6) + +(7) BroadcastExchange +Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] + +(8) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_warehouse_sk#5] +Right keys [1]: [w_warehouse_sk#6] +Join condition: None + +(9) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, w_warehouse_name#7] +Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5, w_warehouse_sk#6, w_warehouse_name#7] + +(10) Scan parquet default.ship_mode +Output [2]: [sm_ship_mode_sk#9, sm_type#10] +Batched: true +Location [not included in comparison]/{warehouse_dir}/ship_mode] +PushedFilters: [IsNotNull(sm_ship_mode_sk)] +ReadSchema: struct + +(11) ColumnarToRow [codegen id : 2] +Input [2]: [sm_ship_mode_sk#9, sm_type#10] + +(12) Filter [codegen id : 2] +Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Condition : isnotnull(sm_ship_mode_sk#9) + +(13) BroadcastExchange +Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] + +(14) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_ship_mode_sk#4] +Right keys [1]: [sm_ship_mode_sk#9] +Join condition: None + +(15) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, w_warehouse_name#7, sm_type#10] +Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, w_warehouse_name#7, sm_ship_mode_sk#9, sm_type#10] + +(16) Scan parquet default.call_center +Output [2]: [cc_call_center_sk#12, cc_name#13] +Batched: true +Location [not included in comparison]/{warehouse_dir}/call_center] +PushedFilters: [IsNotNull(cc_call_center_sk)] +ReadSchema: struct + +(17) ColumnarToRow [codegen id : 3] +Input [2]: [cc_call_center_sk#12, cc_name#13] + +(18) Filter [codegen id : 3] +Input [2]: [cc_call_center_sk#12, cc_name#13] +Condition : isnotnull(cc_call_center_sk#12) + +(19) BroadcastExchange +Input [2]: [cc_call_center_sk#12, cc_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] + +(20) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_call_center_sk#3] +Right keys [1]: [cc_call_center_sk#12] +Join condition: None + +(21) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13] +Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, w_warehouse_name#7, sm_type#10, cc_call_center_sk#12, cc_name#13] + +(22) Scan parquet default.date_dim +Output [2]: [d_date_sk#15, d_month_seq#16] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] +ReadSchema: struct + +(23) ColumnarToRow [codegen id : 4] +Input [2]: [d_date_sk#15, d_month_seq#16] + +(24) Filter [codegen id : 4] +Input [2]: [d_date_sk#15, d_month_seq#16] +Condition : (((isnotnull(d_month_seq#16) AND (d_month_seq#16 >= 1200)) AND (d_month_seq#16 <= 1211)) AND isnotnull(d_date_sk#15)) + +(25) Project [codegen id : 4] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_month_seq#16] + +(26) BroadcastExchange +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] + +(27) BroadcastHashJoin [codegen id : 5] +Left keys [1]: [cs_ship_date_sk#2] +Right keys [1]: [d_date_sk#15] +Join condition: None + +(28) Project [codegen id : 5] +Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13] +Input [6]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13, d_date_sk#15] + +(29) HashAggregate [codegen id : 5] +Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13] +Keys [3]: [substr(w_warehouse_name#7, 1, 20) AS substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13] +Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] +Results [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] + +(30) Exchange +Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, 5), true, [id=#29] + +(31) HashAggregate [codegen id : 6] +Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Keys [3]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13] +Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))] +Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34] +Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40] + +(32) TakeOrderedAndProject +Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#10 ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] + diff --git a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt index 4e22db2fe..9ebaaac52 100644 --- a/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt +++ b/src/test/resources/tpcds/spark-2.4/approved-plans-v1_4/q99/simplified.txt @@ -1,42 +1,48 @@ -TakeOrderedAndProject [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,cc_name,sm_type,substring(w_warehouse_name, 1, 20)] - WholeStageCodegen - HashAggregate [cc_name,sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] [30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) && ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) && ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) && ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint))] +TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ] + WholeStageCodegen (6) + HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum] InputAdapter - Exchange [cc_name,sm_type,substring(w_warehouse_name, 1, 20)] #1 - WholeStageCodegen - HashAggregate [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,w_warehouse_name] [substring(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum,sum,sum,sum,sum,sum] - Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] + Exchange [substr(w_warehouse_name, 1, 20),sm_type,cc_name] #1 + WholeStageCodegen (5) + HashAggregate [w_warehouse_name,sm_type,cc_name,cs_ship_date_sk,cs_sold_date_sk] [sum,sum,sum,sum,sum,substr(w_warehouse_name, 1, 20),sum,sum,sum,sum,sum] + Project [cs_sold_date_sk,cs_ship_date_sk,w_warehouse_name,sm_type,cc_name] BroadcastHashJoin [cs_ship_date_sk,d_date_sk] - Project [cc_name,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] - BroadcastHashJoin [cc_call_center_sk,cs_call_center_sk] - Project [cs_call_center_sk,cs_ship_date_sk,cs_sold_date_sk,sm_type,w_warehouse_name] + Project [cs_sold_date_sk,cs_ship_date_sk,w_warehouse_name,sm_type,cc_name] + BroadcastHashJoin [cs_call_center_sk,cc_call_center_sk] + Project [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,w_warehouse_name,sm_type] BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk] - Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,w_warehouse_name] + Project [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,w_warehouse_name] BroadcastHashJoin [cs_warehouse_sk,w_warehouse_sk] - Project [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] - Filter [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_warehouse_sk] - Scan parquet default.catalog_sales [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] [cs_call_center_sk,cs_ship_date_sk,cs_ship_mode_sk,cs_sold_date_sk,cs_warehouse_sk] + Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk] InputAdapter BroadcastExchange #2 - WholeStageCodegen - Project [w_warehouse_name,w_warehouse_sk] - Filter [w_warehouse_sk] - Scan parquet default.warehouse [w_warehouse_name,w_warehouse_sk] [w_warehouse_name,w_warehouse_sk] + WholeStageCodegen (1) + Filter [w_warehouse_sk] + ColumnarToRow + InputAdapter + Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name] InputAdapter BroadcastExchange #3 - WholeStageCodegen - Project [sm_ship_mode_sk,sm_type] - Filter [sm_ship_mode_sk] - Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] [sm_ship_mode_sk,sm_type] + WholeStageCodegen (2) + Filter [sm_ship_mode_sk] + ColumnarToRow + InputAdapter + Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type] InputAdapter BroadcastExchange #4 - WholeStageCodegen - Project [cc_call_center_sk,cc_name] - Filter [cc_call_center_sk] - Scan parquet default.call_center [cc_call_center_sk,cc_name] [cc_call_center_sk,cc_name] + WholeStageCodegen (3) + Filter [cc_call_center_sk] + ColumnarToRow + InputAdapter + Scan parquet default.call_center [cc_call_center_sk,cc_name] InputAdapter BroadcastExchange #5 - WholeStageCodegen + WholeStageCodegen (4) Project [d_date_sk] - Filter [d_date_sk,d_month_seq] - Scan parquet default.date_dim [d_date_sk,d_month_seq] [d_date_sk,d_month_seq] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala index 31d742ce7..e644621cb 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala @@ -1,5 +1,21 @@ +/* + * Copyright (2020) The Hyperspace Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.microsoft.hyperspace.goldstandard -// scalastyle:off + import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningAwareFileIndex} diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala index d08e5e4f5..6aab43c1a 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala @@ -1,3 +1,19 @@ +/* + * Copyright (2020) The Hyperspace Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.microsoft.hyperspace.goldstandard import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala index e902bfbc7..225978c73 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala @@ -1,3 +1,19 @@ +/* + * Copyright (2020) The Hyperspace Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.microsoft.hyperspace.goldstandard import java.io.File From 1fb6f4bab9333f1804af6cc58b219a77998b0bb2 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Thu, 18 Mar 2021 13:54:01 -0700 Subject: [PATCH 26/31] cleanup --- .../hyperspace/goldstandard/TPCDSBase.scala | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala index b743db6dd..601a30c83 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDSBase.scala @@ -39,16 +39,6 @@ trait TPCDSBase extends SparkFunSuite with SparkInvolvedSuite { // The TPCDS queries below are based on v1.4. // TODO: Fix bulid pipeline for q49 and reenable q49. val tpcdsQueries = Seq("q0") - /* ("q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", - "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", - "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", - "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40", - "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q50", - "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60", - "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70", - "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", - "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", - "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") */ private val tableColumns = Map( "store_sales" -> @@ -572,9 +562,9 @@ trait TPCDSBase extends SparkFunSuite with SparkInvolvedSuite { } override def afterAll(): Unit = { - //tableNames.foreach { tableName => - // spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true) - //} + tableNames.foreach { tableName => + spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true) + } super.afterAll() } } From e4ad3658d26f512e6c711e329f418215f7f11ea6 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Fri, 19 Mar 2021 11:07:18 -0700 Subject: [PATCH 27/31] code cleanup --- .../goldstandard/IndexLogEntryCreator.scala | 60 ++++++++----------- .../goldstandard/MockSignatureProvider.scala | 4 ++ .../goldstandard/TPCDS_Hyperspace.scala | 2 +- 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala index e644621cb..fa752a97d 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala @@ -16,41 +16,34 @@ package com.microsoft.hyperspace.goldstandard -import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path +import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningAwareFileIndex} import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.{DataFrame, SparkSession} +import com.microsoft.hyperspace.HyperspaceException import com.microsoft.hyperspace.actions.Constants -import com.microsoft.hyperspace.example.SparkApp import com.microsoft.hyperspace.index._ -import com.microsoft.hyperspace.util.{JsonUtils, PathUtils} +import com.microsoft.hyperspace.util.PathUtils -object IndexLogEntryCreator extends SparkApp { - def createIndex(str: String): Unit = { - val splits = str.split(";") - val name = splits(0) +object IndexLogEntryCreator { + def createIndex(indexDefinition: String, spark: SparkSession): Unit = { + val splits = indexDefinition.split(";") + val indexName = splits(0) + val tableName = splits(1) val indexCols = splits(2).split(",").toSeq val includedCols = splits(3).split(",").toSeq - val config = IndexConfig(name, indexCols, includedCols) - val table = splits(1) + val indexConfig = IndexConfig(indexName, indexCols, includedCols) val indexPath = { - val baseResourcePath = { - // use the same way as `SQLQueryTestSuite` to get the resource path - java.nio.file.Paths.get("src", "test", "resources", "tpcds").toFile - } - PathUtils - .makeAbsolute( - new Path(baseResourcePath.toString, s"hyperspace/indexes/$name"), - new Configuration) + PathUtils.makeAbsolute(s"${spark.conf.get(IndexConstants.INDEX_SYSTEM_PATH)}/$indexName") } - new IndexLogManagerImpl(indexPath) - .writeLog(0, toIndex(config, s"${indexPath.toString}/v__=0", table, spark)) + val entry = getIndexLogEntry(indexConfig, indexPath, tableName, spark) + assert(new IndexLogManagerImpl(indexPath).writeLog(0, entry)) } - def toRelation(sourceDf: DataFrame): Option[Relation] = { + private def toRelation(sourceDf: DataFrame): Relation = { val leafPlans = sourceDf.queryExecution.optimizedPlan.collectLeaves() + assert(leafPlans.size == 1) leafPlans.head match { case LogicalRelation( HadoopFsRelation(location: PartitioningAwareFileIndex, _, dataSchema, _, _, _), @@ -59,30 +52,29 @@ object IndexLogEntryCreator extends SparkApp { _) => val sourceDataProperties = Hdfs.Properties(Content.fromDirectory(location.rootPaths.head, new FileIdTracker)) - val fileFormatName = "parquet" - Some( - Relation( - location.rootPaths.map(_.toString), - Hdfs(sourceDataProperties), - dataSchema.json, - fileFormatName, - Map.empty)) - case _ => None + Relation( + location.rootPaths.map(_.toString), + Hdfs(sourceDataProperties), + dataSchema.json, + "parquet", + Map.empty) + case _ => throw HyperspaceException("Unexpected relation found.") } } - def toIndex( + private def getIndexLogEntry( config: IndexConfig, - path: String, + indexPath: Path, tableName: String, spark: SparkSession): IndexLogEntry = { + val indexRootPath = new Path(indexPath, "v__=0") val sourceDf = spark.table(tableName) val indexSchema = { val allCols = config.indexedColumns ++ config.includedColumns StructType(sourceDf.schema.filter(f => allCols.contains(f.name))) } - val relation: Relation = toRelation(sourceDf).get + val relation = toRelation(sourceDf) val sourcePlanProperties = SparkPlan.Properties( Seq(relation), @@ -101,7 +93,7 @@ object IndexLogEntryCreator extends SparkApp { IndexLogEntry.schemaString(indexSchema), 200, Map("hasParquetAsSourceFormat" -> "true"))), - Content(Directory.fromDirectory(new Path(path), new FileIdTracker)), + Content(Directory.fromDirectory(indexRootPath, new FileIdTracker)), Source(SparkPlan(sourcePlanProperties)), Map()) entry.state = Constants.States.ACTIVE diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala index 6aab43c1a..2730b5180 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala @@ -21,6 +21,10 @@ import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRela import com.microsoft.hyperspace.index.LogicalPlanSignatureProvider +/** + * MockSignatureProvider is used in TPCDS_Hyperspace plan stability suite. It returns the + * table name of the source TPCDS table on which the index is created. + */ class MockSignatureProvider extends LogicalPlanSignatureProvider { override def signature(logicalPlan: LogicalPlan): Option[String] = { val leafPlans = logicalPlan.collectLeaves() diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala index 225978c73..46f175e57 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala @@ -49,7 +49,7 @@ class TPCDS_Hyperspace extends PlanStabilitySuite { val indexes = Seq( "dtindex;date_dim;d_date_sk;d_year", "ssIndex;store_sales;ss_sold_date_sk;ss_customer_sk") - indexes.foreach(createIndex) + indexes.foreach(createIndex(_, spark)) // Enable cross join because some queries fail during query optimization phase. withSQLConf( From cb211fb985e32f6b228542fa77ef394de43be1c8 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Mon, 22 Mar 2021 11:24:00 -0700 Subject: [PATCH 28/31] diffs added wrt spark --- .../hyperspace/diffsWithSpark/q0/explain.txt | 28 ++++++++++++++ .../diffsWithSpark/q0/simplified.txt | 38 +++++++++++++++++++ .../withoutHyperspace/q0/explain.txt | 14 +++++++ .../withoutHyperspace/q0/simplified.txt | 22 +++++++++++ 4 files changed, 102 insertions(+) create mode 100644 src/test/resources/tpcds/hyperspace/diffsWithSpark/q0/explain.txt create mode 100644 src/test/resources/tpcds/hyperspace/diffsWithSpark/q0/simplified.txt create mode 100644 src/test/resources/tpcds/hyperspace/withoutHyperspace/q0/explain.txt create mode 100644 src/test/resources/tpcds/hyperspace/withoutHyperspace/q0/simplified.txt diff --git a/src/test/resources/tpcds/hyperspace/diffsWithSpark/q0/explain.txt b/src/test/resources/tpcds/hyperspace/diffsWithSpark/q0/explain.txt new file mode 100644 index 000000000..a9adddcd4 --- /dev/null +++ b/src/test/resources/tpcds/hyperspace/diffsWithSpark/q0/explain.txt @@ -0,0 +1,28 @@ +1,14c1,10 +< == Physical Plan == +< CollectLimit 100 +< +- *(5) Project [d_year#1, ss_customer_sk#2] +< +- *(5) SortMergeJoin [d_date_sk#3], [ss_sold_date_sk#4], Inner +< :- *(2) Sort [d_date_sk#3 ASC NULLS FIRST], false, 0 +< : +- Exchange hashpartitioning(d_date_sk#3, 200) +< : +- *(1) Project [d_date_sk#3, d_year#1] +< : +- *(1) Filter isnotnull(d_date_sk#3) +< : +- *(1) FileScan parquet default.date_dim[d_date_sk#3,d_year#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct +< +- *(4) Sort [ss_sold_date_sk#4 ASC NULLS FIRST], false, 0 +< +- Exchange hashpartitioning(ss_sold_date_sk#4, 200) +< +- *(3) Project [ss_sold_date_sk#4, ss_customer_sk#2] +< +- *(3) Filter isnotnull(ss_sold_date_sk#4) +< +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#4,ss_customer_sk#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct +\ No newline at end of file +--- +> == Physical Plan == +> CollectLimit 100 +> +- *(3) Project [d_year#1, ss_customer_sk#2] +> +- *(3) SortMergeJoin [d_date_sk#3], [ss_sold_date_sk#4], Inner +> :- *(1) Project [d_date_sk#3, d_year#1] +> : +- *(1) Filter isnotnull(d_date_sk#3) +> : +- *(1) FileScan Hyperspace(Type: CI, Name: dtindex, LogVersion: 0) default.date_dim[d_date_sk#3,d_year#1] Batched: true, Format: Parquet, Location: InMemoryFileIndex[], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct, SelectedBucketsCount: 200 out of 200 +> +- *(2) Project [ss_sold_date_sk#4, ss_customer_sk#2] +> +- *(2) Filter isnotnull(ss_sold_date_sk#4) +> +- *(2) FileScan Hyperspace(Type: CI, Name: ssIndex, LogVersion: 0) default.store_sales[ss_sold_date_sk#4,ss_customer_sk#2] Batched: true, Format: Parquet, Location: InMemoryFileIndex[], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct, SelectedBucketsCount: 200 out of 200 +\ No newline at end of file diff --git a/src/test/resources/tpcds/hyperspace/diffsWithSpark/q0/simplified.txt b/src/test/resources/tpcds/hyperspace/diffsWithSpark/q0/simplified.txt new file mode 100644 index 000000000..edb77f0d1 --- /dev/null +++ b/src/test/resources/tpcds/hyperspace/diffsWithSpark/q0/simplified.txt @@ -0,0 +1,38 @@ +1,22c1,14 +< CollectLimit +< WholeStageCodegen +< Project [d_year,ss_customer_sk] +< SortMergeJoin [d_date_sk,ss_sold_date_sk] +< InputAdapter +< WholeStageCodegen +< Sort [d_date_sk] +< InputAdapter +< Exchange [d_date_sk] #1 +< WholeStageCodegen +< Project [d_date_sk,d_year] +< Filter [d_date_sk] +< Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] +< InputAdapter +< WholeStageCodegen +< Sort [ss_sold_date_sk] +< InputAdapter +< Exchange [ss_sold_date_sk] #2 +< WholeStageCodegen +< Project [ss_customer_sk,ss_sold_date_sk] +< Filter [ss_sold_date_sk] +< Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] +--- +> CollectLimit +> WholeStageCodegen +> Project [d_year,ss_customer_sk] +> SortMergeJoin [d_date_sk,ss_sold_date_sk] +> InputAdapter +> WholeStageCodegen +> Project [d_date_sk,d_year] +> Filter [d_date_sk] +> Scan Hyperspace(Type: CI, Name: dtindex, LogVersion: 0) default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] +> InputAdapter +> WholeStageCodegen +> Project [ss_customer_sk,ss_sold_date_sk] +> Filter [ss_sold_date_sk] +> Scan Hyperspace(Type: CI, Name: ssIndex, LogVersion: 0) default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] diff --git a/src/test/resources/tpcds/hyperspace/withoutHyperspace/q0/explain.txt b/src/test/resources/tpcds/hyperspace/withoutHyperspace/q0/explain.txt new file mode 100644 index 000000000..eb4415dd9 --- /dev/null +++ b/src/test/resources/tpcds/hyperspace/withoutHyperspace/q0/explain.txt @@ -0,0 +1,14 @@ +== Physical Plan == +CollectLimit 100 ++- *(5) Project [d_year#1, ss_customer_sk#2] + +- *(5) SortMergeJoin [d_date_sk#3], [ss_sold_date_sk#4], Inner + :- *(2) Sort [d_date_sk#3 ASC NULLS FIRST], false, 0 + : +- Exchange hashpartitioning(d_date_sk#3, 200) + : +- *(1) Project [d_date_sk#3, d_year#1] + : +- *(1) Filter isnotnull(d_date_sk#3) + : +- *(1) FileScan parquet default.date_dim[d_date_sk#3,d_year#1] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/date_dim], PartitionFilters: [], PushedFilters: [IsNotNull(d_date_sk)], ReadSchema: struct + +- *(4) Sort [ss_sold_date_sk#4 ASC NULLS FIRST], false, 0 + +- Exchange hashpartitioning(ss_sold_date_sk#4, 200) + +- *(3) Project [ss_sold_date_sk#4, ss_customer_sk#2] + +- *(3) Filter isnotnull(ss_sold_date_sk#4) + +- *(3) FileScan parquet default.store_sales[ss_sold_date_sk#4,ss_customer_sk#2] Batched: true, Format: Parquet, Location [not included in comparison]/{warehouse_dir}/store_sales], PartitionFilters: [], PushedFilters: [IsNotNull(ss_sold_date_sk)], ReadSchema: struct \ No newline at end of file diff --git a/src/test/resources/tpcds/hyperspace/withoutHyperspace/q0/simplified.txt b/src/test/resources/tpcds/hyperspace/withoutHyperspace/q0/simplified.txt new file mode 100644 index 000000000..b5cd777ca --- /dev/null +++ b/src/test/resources/tpcds/hyperspace/withoutHyperspace/q0/simplified.txt @@ -0,0 +1,22 @@ +CollectLimit + WholeStageCodegen + Project [d_year,ss_customer_sk] + SortMergeJoin [d_date_sk,ss_sold_date_sk] + InputAdapter + WholeStageCodegen + Sort [d_date_sk] + InputAdapter + Exchange [d_date_sk] #1 + WholeStageCodegen + Project [d_date_sk,d_year] + Filter [d_date_sk] + Scan parquet default.date_dim [d_date_sk,d_year] [d_date_sk,d_year] + InputAdapter + WholeStageCodegen + Sort [ss_sold_date_sk] + InputAdapter + Exchange [ss_sold_date_sk] #2 + WholeStageCodegen + Project [ss_customer_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk] [ss_customer_sk,ss_sold_date_sk] From 659d65c3e3a6261d6d8a4943883e7171101407fa Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Mon, 22 Mar 2021 11:54:01 -0700 Subject: [PATCH 29/31] review comments and refactor --- .../goldstandard/IndexLogEntryCreator.scala | 33 ++++++++--------- .../goldstandard/TPCDS_Hyperspace.scala | 35 +++++++++++++++---- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala index fa752a97d..b27f752a5 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala @@ -27,17 +27,10 @@ import com.microsoft.hyperspace.index._ import com.microsoft.hyperspace.util.PathUtils object IndexLogEntryCreator { - def createIndex(indexDefinition: String, spark: SparkSession): Unit = { - val splits = indexDefinition.split(";") - val indexName = splits(0) - val tableName = splits(1) - val indexCols = splits(2).split(",").toSeq - val includedCols = splits(3).split(",").toSeq - val indexConfig = IndexConfig(indexName, indexCols, includedCols) - val indexPath = { - PathUtils.makeAbsolute(s"${spark.conf.get(IndexConstants.INDEX_SYSTEM_PATH)}/$indexName") - } - val entry = getIndexLogEntry(indexConfig, indexPath, tableName, spark) + def createIndex(index: IndexDefinition, spark: SparkSession): Unit = { + val indexPath = + PathUtils.makeAbsolute(s"${spark.conf.get(IndexConstants.INDEX_SYSTEM_PATH)}/${index.name}") + val entry = getIndexLogEntry(index, indexPath, spark) assert(new IndexLogManagerImpl(indexPath).writeLog(0, entry)) } @@ -64,14 +57,13 @@ object IndexLogEntryCreator { } private def getIndexLogEntry( - config: IndexConfig, + index: IndexDefinition, indexPath: Path, - tableName: String, spark: SparkSession): IndexLogEntry = { val indexRootPath = new Path(indexPath, "v__=0") - val sourceDf = spark.table(tableName) + val sourceDf = spark.table(index.tableName) val indexSchema = { - val allCols = config.indexedColumns ++ config.includedColumns + val allCols = index.indexedCols ++ index.includedCols StructType(sourceDf.schema.filter(f => allCols.contains(f.name))) } val relation = toRelation(sourceDf) @@ -81,15 +73,18 @@ object IndexLogEntryCreator { null, null, LogicalPlanFingerprint( - LogicalPlanFingerprint.Properties(Seq( - Signature("com.microsoft.hyperspace.goldstandard.MockSignatureProvider", tableName))))) + LogicalPlanFingerprint.Properties( + Seq( + Signature( + "com.microsoft.hyperspace.goldstandard.MockSignatureProvider", + index.tableName))))) val entry = IndexLogEntry( - config.indexName, + index.name, CoveringIndex( CoveringIndex.Properties( CoveringIndex.Properties - .Columns(config.indexedColumns, config.includedColumns), + .Columns(index.indexedCols, index.includedCols), IndexLogEntry.schemaString(indexSchema), 200, Map("hasParquetAsSourceFormat" -> "true"))), diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala index 46f175e57..bf0b677b4 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala @@ -36,6 +36,11 @@ class TPCDS_Hyperspace extends PlanStabilitySuite { super.beforeAll() spark.conf.set(INDEX_SYSTEM_PATH, indexSystemPath) spark.enableHyperspace() + + val indexes = Seq( + "dtindex;date_dim;d_date_sk;d_year", + "ssIndex;store_sales;ss_sold_date_sk;ss_customer_sk") + indexes.foreach(i => createIndex(IndexDefinition.fromString(i), spark)) } override def afterAll(): Unit = { @@ -45,12 +50,6 @@ class TPCDS_Hyperspace extends PlanStabilitySuite { tpcdsQueries.foreach { q => test(s"check simplified (tpcds-v1.4/$q)") { - - val indexes = Seq( - "dtindex;date_dim;d_date_sk;d_year", - "ssIndex;store_sales;ss_sold_date_sk;ss_customer_sk") - indexes.foreach(createIndex(_, spark)) - // Enable cross join because some queries fail during query optimization phase. withSQLConf( ("spark.sql.crossJoin.enabled" -> "true"), @@ -60,3 +59,27 @@ class TPCDS_Hyperspace extends PlanStabilitySuite { } } } + +case class IndexDefinition( + name: String, + indexedCols: Seq[String], + includedCols: Seq[String], + tableName: String) { +} + +object IndexDefinition { + /** + * Index definition from conf files should be provided in the following format: + * "index-name;table-name;comma-separated-indexed-cols;comma-separated-included-cols" + * @param definition: Index definition in string representation mentioned above. + * @return IndexDefinition. + */ + def fromString(definition: String): IndexDefinition = { + val splits = definition.split(";") + val indexName = splits(0) + val tableName = splits(1) + val indexCols = splits(2).split(",").toSeq + val includedCols = splits(3).split(",").toSeq + IndexDefinition(indexName, indexCols, includedCols, tableName) + } +} From 40864ae09ee5dcfc4becd0a0c2fa9086dcb342a1 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Mon, 22 Mar 2021 14:18:21 -0700 Subject: [PATCH 30/31] remove CR and LF chars when comparing plans --- .../hyperspace/goldstandard/PlanStabilitySuite.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala index 00358ebf4..f4b7079d1 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala @@ -99,7 +99,9 @@ trait PlanStabilitySuite extends TPCDSBase with SQLHelper with Logging { private def isApproved(dir: File, actualSimplifiedPlan: String): Boolean = { val file = new File(dir, "simplified.txt") val expected = FileUtils.readFileToString(file, StandardCharsets.UTF_8) - expected == actualSimplifiedPlan + expected.replaceAll("\r", "").replaceAll("\n", "") == actualSimplifiedPlan + .replaceAll("\r", "") + .replaceAll("\n", "") } /** From ad399a3fd495d2aee716ced8c30924f907699812 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Tue, 23 Mar 2021 11:03:11 -0700 Subject: [PATCH 31/31] review comments --- .../hyperspace/goldstandard/IndexLogEntryCreator.scala | 2 +- .../hyperspace/goldstandard/MockSignatureProvider.scala | 2 ++ .../hyperspace/goldstandard/PlanStabilitySuite.scala | 4 +--- .../microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala | 5 +++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala index b27f752a5..a33d929c7 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/IndexLogEntryCreator.scala @@ -87,7 +87,7 @@ object IndexLogEntryCreator { .Columns(index.indexedCols, index.includedCols), IndexLogEntry.schemaString(indexSchema), 200, - Map("hasParquetAsSourceFormat" -> "true"))), + Map(IndexConstants.HAS_PARQUET_AS_SOURCE_FORMAT_PROPERTY -> "true"))), Content(Directory.fromDirectory(indexRootPath, new FileIdTracker)), Source(SparkPlan(sourcePlanProperties)), Map()) diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala index 2730b5180..a18067e12 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/MockSignatureProvider.scala @@ -19,6 +19,7 @@ package com.microsoft.hyperspace.goldstandard import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningAwareFileIndex} +import com.microsoft.hyperspace.HyperspaceException import com.microsoft.hyperspace.index.LogicalPlanSignatureProvider /** @@ -37,6 +38,7 @@ class MockSignatureProvider extends LogicalPlanSignatureProvider { _, _) => Some(location.rootPaths.head.getName) + case _ => throw HyperspaceException("Unexpected logical plan found.") } } } diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala index f4b7079d1..1d954f4a1 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/PlanStabilitySuite.scala @@ -99,9 +99,7 @@ trait PlanStabilitySuite extends TPCDSBase with SQLHelper with Logging { private def isApproved(dir: File, actualSimplifiedPlan: String): Boolean = { val file = new File(dir, "simplified.txt") val expected = FileUtils.readFileToString(file, StandardCharsets.UTF_8) - expected.replaceAll("\r", "").replaceAll("\n", "") == actualSimplifiedPlan - .replaceAll("\r", "") - .replaceAll("\n", "") + expected.replaceAll("[\\r\\n]+", "") == actualSimplifiedPlan.replaceAll("[\\r\\n]+", "") } /** diff --git a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala index bf0b677b4..3b6938db4 100644 --- a/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala +++ b/src/test/scala/com/microsoft/hyperspace/goldstandard/TPCDS_Hyperspace.scala @@ -37,10 +37,10 @@ class TPCDS_Hyperspace extends PlanStabilitySuite { spark.conf.set(INDEX_SYSTEM_PATH, indexSystemPath) spark.enableHyperspace() - val indexes = Seq( + val indexDefinitions = Seq( "dtindex;date_dim;d_date_sk;d_year", "ssIndex;store_sales;ss_sold_date_sk;ss_customer_sk") - indexes.foreach(i => createIndex(IndexDefinition.fromString(i), spark)) + indexDefinitions.foreach(i => createIndex(IndexDefinition.fromString(i), spark)) } override def afterAll(): Unit = { @@ -71,6 +71,7 @@ object IndexDefinition { /** * Index definition from conf files should be provided in the following format: * "index-name;table-name;comma-separated-indexed-cols;comma-separated-included-cols" + * * @param definition: Index definition in string representation mentioned above. * @return IndexDefinition. */